webscout 1.2.3__tar.gz → 1.2.5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of webscout might be problematic. Click here for more details.

Files changed (39) hide show
  1. {webscout-1.2.3 → webscout-1.2.5}/PKG-INFO +21 -1
  2. {webscout-1.2.3 → webscout-1.2.5}/README.md +19 -0
  3. {webscout-1.2.3 → webscout-1.2.5}/setup.py +2 -1
  4. {webscout-1.2.3 → webscout-1.2.5}/webscout/AI.py +204 -0
  5. {webscout-1.2.3 → webscout-1.2.5}/webscout/AIutel.py +1 -1
  6. {webscout-1.2.3 → webscout-1.2.5}/webscout/DWEBS.py +197 -197
  7. {webscout-1.2.3 → webscout-1.2.5}/webscout/__init__.py +3 -1
  8. webscout-1.2.5/webscout/version.py +2 -0
  9. {webscout-1.2.3 → webscout-1.2.5}/webscout.egg-info/PKG-INFO +21 -1
  10. {webscout-1.2.3 → webscout-1.2.5}/webscout.egg-info/requires.txt +1 -0
  11. webscout-1.2.3/webscout/version.py +0 -2
  12. {webscout-1.2.3 → webscout-1.2.5}/DeepWEBS/__init__.py +0 -0
  13. {webscout-1.2.3 → webscout-1.2.5}/DeepWEBS/documents/__init__.py +0 -0
  14. {webscout-1.2.3 → webscout-1.2.5}/DeepWEBS/documents/query_results_extractor.py +0 -0
  15. {webscout-1.2.3 → webscout-1.2.5}/DeepWEBS/documents/webpage_content_extractor.py +0 -0
  16. {webscout-1.2.3 → webscout-1.2.5}/DeepWEBS/networks/__init__.py +0 -0
  17. {webscout-1.2.3 → webscout-1.2.5}/DeepWEBS/networks/filepath_converter.py +0 -0
  18. {webscout-1.2.3 → webscout-1.2.5}/DeepWEBS/networks/google_searcher.py +0 -0
  19. {webscout-1.2.3 → webscout-1.2.5}/DeepWEBS/networks/network_configs.py +0 -0
  20. {webscout-1.2.3 → webscout-1.2.5}/DeepWEBS/networks/webpage_fetcher.py +0 -0
  21. {webscout-1.2.3 → webscout-1.2.5}/DeepWEBS/utilsdw/__init__.py +0 -0
  22. {webscout-1.2.3 → webscout-1.2.5}/DeepWEBS/utilsdw/enver.py +0 -0
  23. {webscout-1.2.3 → webscout-1.2.5}/DeepWEBS/utilsdw/logger.py +0 -0
  24. {webscout-1.2.3 → webscout-1.2.5}/LICENSE.md +0 -0
  25. {webscout-1.2.3 → webscout-1.2.5}/setup.cfg +0 -0
  26. {webscout-1.2.3 → webscout-1.2.5}/webscout/AIbase.py +0 -0
  27. {webscout-1.2.3 → webscout-1.2.5}/webscout/HelpingAI.py +0 -0
  28. {webscout-1.2.3 → webscout-1.2.5}/webscout/LLM.py +0 -0
  29. {webscout-1.2.3 → webscout-1.2.5}/webscout/__main__.py +0 -0
  30. {webscout-1.2.3 → webscout-1.2.5}/webscout/cli.py +0 -0
  31. {webscout-1.2.3 → webscout-1.2.5}/webscout/exceptions.py +0 -0
  32. {webscout-1.2.3 → webscout-1.2.5}/webscout/models.py +0 -0
  33. {webscout-1.2.3 → webscout-1.2.5}/webscout/utils.py +0 -0
  34. {webscout-1.2.3 → webscout-1.2.5}/webscout/webscout_search.py +0 -0
  35. {webscout-1.2.3 → webscout-1.2.5}/webscout/webscout_search_async.py +0 -0
  36. {webscout-1.2.3 → webscout-1.2.5}/webscout.egg-info/SOURCES.txt +0 -0
  37. {webscout-1.2.3 → webscout-1.2.5}/webscout.egg-info/dependency_links.txt +0 -0
  38. {webscout-1.2.3 → webscout-1.2.5}/webscout.egg-info/entry_points.txt +0 -0
  39. {webscout-1.2.3 → webscout-1.2.5}/webscout.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: webscout
3
- Version: 1.2.3
3
+ Version: 1.2.5
4
4
  Summary: Search for words, documents, images, videos, news, maps and text translation using the Google, DuckDuckGo.com, yep.com, phind.com, you.com, etc Also containes AI models
5
5
  Author: OEvortex
6
6
  Author-email: helpingai5@gmail.com
@@ -45,6 +45,7 @@ Requires-Dist: sse_starlette
45
45
  Requires-Dist: termcolor
46
46
  Requires-Dist: tiktoken
47
47
  Requires-Dist: tldextract
48
+ Requires-Dist: orjson
48
49
  Provides-Extra: dev
49
50
  Requires-Dist: ruff>=0.1.6; extra == "dev"
50
51
  Requires-Dist: pytest>=7.4.2; extra == "dev"
@@ -92,6 +93,7 @@ Also containes AI models that you can use
92
93
  - [6. `BlackBox` - Search/chat With BlackBox](#6-blackbox---searchchat-with-blackbox)
93
94
  - [7. `PERPLEXITY` - Search With PERPLEXITY](#7-perplexity---search-with-perplexity)
94
95
  - [8. `OpenGPT` - chat With OPENGPT](#8-opengpt---chat-with-opengpt)
96
+ - [9. `KOBOLDIA` -](#9-koboldia--)
95
97
  - [usage of special .LLM file from webscout (webscout.LLM)](#usage-of-special-llm-file-from-webscout-webscoutllm)
96
98
  - [`LLM`](#llm)
97
99
 
@@ -604,6 +606,24 @@ opengpt = OPENGPT(is_conversation=True, max_tokens=8000, timeout=30)
604
606
  prompt = "tell me about india"
605
607
  response_str = opengpt.chat(prompt)
606
608
  print(response_str)
609
+ ```
610
+ ### 9. `KOBOLDIA` -
611
+ ```python
612
+ from webscout.AI import KOBOLDAI
613
+
614
+ # Instantiate the KOBOLDAI class with default parameters
615
+ koboldai = KOBOLDAI()
616
+
617
+ # Define a prompt to send to the AI
618
+ prompt = "What is the capital of France?"
619
+
620
+ # Use the 'ask' method to get a response from the AI
621
+ response = koboldai.ask(prompt)
622
+
623
+ # Extract and print the message from the response
624
+ message = koboldai.get_message(response)
625
+ print(message)
626
+
607
627
  ```
608
628
 
609
629
  ## usage of special .LLM file from webscout (webscout.LLM)
@@ -41,6 +41,7 @@ Also containes AI models that you can use
41
41
  - [6. `BlackBox` - Search/chat With BlackBox](#6-blackbox---searchchat-with-blackbox)
42
42
  - [7. `PERPLEXITY` - Search With PERPLEXITY](#7-perplexity---search-with-perplexity)
43
43
  - [8. `OpenGPT` - chat With OPENGPT](#8-opengpt---chat-with-opengpt)
44
+ - [9. `KOBOLDIA` -](#9-koboldia--)
44
45
  - [usage of special .LLM file from webscout (webscout.LLM)](#usage-of-special-llm-file-from-webscout-webscoutllm)
45
46
  - [`LLM`](#llm)
46
47
 
@@ -553,6 +554,24 @@ opengpt = OPENGPT(is_conversation=True, max_tokens=8000, timeout=30)
553
554
  prompt = "tell me about india"
554
555
  response_str = opengpt.chat(prompt)
555
556
  print(response_str)
557
+ ```
558
+ ### 9. `KOBOLDIA` -
559
+ ```python
560
+ from webscout.AI import KOBOLDAI
561
+
562
+ # Instantiate the KOBOLDAI class with default parameters
563
+ koboldai = KOBOLDAI()
564
+
565
+ # Define a prompt to send to the AI
566
+ prompt = "What is the capital of France?"
567
+
568
+ # Use the 'ask' method to get a response from the AI
569
+ response = koboldai.ask(prompt)
570
+
571
+ # Extract and print the message from the response
572
+ message = koboldai.get_message(response)
573
+ print(message)
574
+
556
575
  ```
557
576
 
558
577
  ## usage of special .LLM file from webscout (webscout.LLM)
@@ -9,7 +9,7 @@ with open("README.md", encoding="utf-8") as f:
9
9
 
10
10
  setup(
11
11
  name="webscout",
12
- version="1.2.3", # Use the version variable from the version.py file
12
+ version="1.2.5",
13
13
  description="Search for words, documents, images, videos, news, maps and text translation using the Google, DuckDuckGo.com, yep.com, phind.com, you.com, etc Also containes AI models",
14
14
  long_description=README,
15
15
  long_description_content_type="text/markdown",
@@ -53,6 +53,7 @@ setup(
53
53
  "termcolor",
54
54
  "tiktoken",
55
55
  "tldextract",
56
+ "orjson",
56
57
  ],
57
58
  entry_points={
58
59
  "console_scripts": [
@@ -21,10 +21,200 @@ import yaml
21
21
  from webscout.AIutel import Optimizers
22
22
  from webscout.AIutel import Conversation
23
23
  from webscout.AIutel import AwesomePrompts
24
+ from webscout.AIbase import Provider
24
25
  from Helpingai_T2 import Perplexity
25
26
  from typing import Any
26
27
  import logging
27
28
  #------------------------------------------------------OpenGPT-----------------------------------------------------------
29
+ class KOBOLDAI(Provider):
30
+ def __init__(
31
+ self,
32
+ is_conversation: bool = True,
33
+ max_tokens: int = 600,
34
+ temperature: float = 1,
35
+ top_p: float = 1,
36
+ timeout: int = 30,
37
+ intro: str = None,
38
+ filepath: str = None,
39
+ update_file: bool = True,
40
+ proxies: dict = {},
41
+ history_offset: int = 10250,
42
+ act: str = None,
43
+ ):
44
+ """Instantiate TGPT
45
+
46
+ Args:
47
+ is_conversation (str, optional): Flag for chatting conversationally. Defaults to True.
48
+ max_tokens (int, optional): Maximum number of tokens to be generated upon completion. Defaults to 600.
49
+ temperature (float, optional): Charge of the generated text's randomness. Defaults to 0.2.
50
+ top_p (float, optional): Sampling threshold during inference time. Defaults to 0.999.
51
+ timeout (int, optional): Http requesting timeout. Defaults to 30
52
+ intro (str, optional): Conversation introductory prompt. Defaults to `Conversation.intro`.
53
+ filepath (str, optional): Path to file containing conversation history. Defaults to None.
54
+ update_file (bool, optional): Add new prompts and responses to the file. Defaults to True.
55
+ proxies (dict, optional) : Http reqiuest proxies (socks). Defaults to {}.
56
+ history_offset (int, optional): Limit conversation history to this number of last texts. Defaults to 10250.
57
+ act (str|int, optional): Awesome prompt key or index. (Used as intro). Defaults to None.
58
+ """
59
+ self.session = requests.Session()
60
+ self.is_conversation = is_conversation
61
+ self.max_tokens_to_sample = max_tokens
62
+ self.temperature = temperature
63
+ self.top_p = top_p
64
+ self.chat_endpoint = (
65
+ "https://koboldai-koboldcpp-tiefighter.hf.space/api/extra/generate/stream"
66
+ )
67
+ self.stream_chunk_size = 64
68
+ self.timeout = timeout
69
+ self.last_response = {}
70
+ self.headers = {
71
+ "Content-Type": "application/json",
72
+ "Accept": "application/json",
73
+ }
74
+
75
+ self.__available_optimizers = (
76
+ method
77
+ for method in dir(Optimizers)
78
+ if callable(getattr(Optimizers, method)) and not method.startswith("__")
79
+ )
80
+ self.session.headers.update(self.headers)
81
+ Conversation.intro = (
82
+ AwesomePrompts().get_act(
83
+ act, raise_not_found=True, default=None, case_insensitive=True
84
+ )
85
+ if act
86
+ else intro or Conversation.intro
87
+ )
88
+ self.conversation = Conversation(
89
+ is_conversation, self.max_tokens_to_sample, filepath, update_file
90
+ )
91
+ self.conversation.history_offset = history_offset
92
+ self.session.proxies = proxies
93
+
94
+ def ask(
95
+ self,
96
+ prompt: str,
97
+ stream: bool = False,
98
+ raw: bool = False,
99
+ optimizer: str = None,
100
+ conversationally: bool = False,
101
+ ) -> dict:
102
+ """Chat with AI
103
+
104
+ Args:
105
+ prompt (str): Prompt to be send.
106
+ stream (bool, optional): Flag for streaming response. Defaults to False.
107
+ raw (bool, optional): Stream back raw response as received. Defaults to False.
108
+ optimizer (str, optional): Prompt optimizer name - `[code, shell_command]`. Defaults to None.
109
+ conversationally (bool, optional): Chat conversationally when using optimizer. Defaults to False.
110
+ Returns:
111
+ dict : {}
112
+ ```json
113
+ {
114
+ "token" : "How may I assist you today?"
115
+ }
116
+ ```
117
+ """
118
+ conversation_prompt = self.conversation.gen_complete_prompt(prompt)
119
+ if optimizer:
120
+ if optimizer in self.__available_optimizers:
121
+ conversation_prompt = getattr(Optimizers, optimizer)(
122
+ conversation_prompt if conversationally else prompt
123
+ )
124
+ else:
125
+ raise Exception(
126
+ f"Optimizer is not one of {self.__available_optimizers}"
127
+ )
128
+
129
+ self.session.headers.update(self.headers)
130
+ payload = {
131
+ "prompt": conversation_prompt,
132
+ "temperature": self.temperature,
133
+ "top_p": self.top_p,
134
+ }
135
+
136
+ def for_stream():
137
+ response = self.session.post(
138
+ self.chat_endpoint, json=payload, stream=True, timeout=self.timeout
139
+ )
140
+ if not response.ok:
141
+ raise Exception(
142
+ f"Failed to generate response - ({response.status_code}, {response.reason}) - {response.text}"
143
+ )
144
+
145
+ message_load = ""
146
+ for value in response.iter_lines(
147
+ decode_unicode=True,
148
+ delimiter="" if raw else "event: message\ndata:",
149
+ chunk_size=self.stream_chunk_size,
150
+ ):
151
+ try:
152
+ resp = json.loads(value)
153
+ message_load += self.get_message(resp)
154
+ resp["token"] = message_load
155
+ self.last_response.update(resp)
156
+ yield value if raw else resp
157
+ except json.decoder.JSONDecodeError:
158
+ pass
159
+ self.conversation.update_chat_history(
160
+ prompt, self.get_message(self.last_response)
161
+ )
162
+
163
+ def for_non_stream():
164
+ # let's make use of stream
165
+ for _ in for_stream():
166
+ pass
167
+ return self.last_response
168
+
169
+ return for_stream() if stream else for_non_stream()
170
+
171
+ def chat(
172
+ self,
173
+ prompt: str,
174
+ stream: bool = False,
175
+ optimizer: str = None,
176
+ conversationally: bool = False,
177
+ ) -> str:
178
+ """Generate response `str`
179
+ Args:
180
+ prompt (str): Prompt to be send.
181
+ stream (bool, optional): Flag for streaming response. Defaults to False.
182
+ optimizer (str, optional): Prompt optimizer name - `[code, shell_command]`. Defaults to None.
183
+ conversationally (bool, optional): Chat conversationally when using optimizer. Defaults to False.
184
+ Returns:
185
+ str: Response generated
186
+ """
187
+
188
+ def for_stream():
189
+ for response in self.ask(
190
+ prompt, True, optimizer=optimizer, conversationally=conversationally
191
+ ):
192
+ yield self.get_message(response)
193
+
194
+ def for_non_stream():
195
+ return self.get_message(
196
+ self.ask(
197
+ prompt,
198
+ False,
199
+ optimizer=optimizer,
200
+ conversationally=conversationally,
201
+ )
202
+ )
203
+
204
+ return for_stream() if stream else for_non_stream()
205
+
206
+ def get_message(self, response: dict) -> str:
207
+ """Retrieves message only from response
208
+
209
+ Args:
210
+ response (dict): Response generated by `self.ask`
211
+
212
+ Returns:
213
+ str: Message extracted
214
+ """
215
+ assert isinstance(response, dict), "Response should be of dict data-type only"
216
+ return response.get("token")
217
+ #------------------------------------------------------OpenGPT-----------------------------------------------------------
28
218
  class OPENGPT:
29
219
  def __init__(
30
220
  self,
@@ -1237,5 +1427,19 @@ def opengpt(prompt, stream):
1237
1427
  else:
1238
1428
  response_str = opengpt.chat(prompt)
1239
1429
  print(response_str)
1430
+
1431
+ @cli.command()
1432
+ @click.option('--prompt', prompt='Enter your prompt', help='The prompt to send.')
1433
+ @click.option('--stream', is_flag=True, help='Flag for streaming response.')
1434
+ @click.option('--raw', is_flag=True, help='Stream back raw response as received.')
1435
+ @click.option('--optimizer', type=str, help='Prompt optimizer name.')
1436
+ @click.option('--conversationally', is_flag=True, help='Chat conversationally when using optimizer.')
1437
+ def koboldai_cli(prompt, stream, raw, optimizer, conversationally):
1438
+ """Chat with KOBOLDAI using the provided prompt."""
1439
+ koboldai_instance = KOBOLDAI() # Initialize a KOBOLDAI instance
1440
+ response = koboldai_instance.ask(prompt, stream, raw, optimizer, conversationally)
1441
+ processed_response = koboldai_instance.get_message(response) # Process the response
1442
+ print(processed_response)
1443
+
1240
1444
  if __name__ == '__main__':
1241
1445
  cli()
@@ -11,7 +11,7 @@ import click
11
11
  from rich.markdown import Markdown
12
12
  from rich.console import Console
13
13
 
14
- appdir = appdirs.AppDirs("pytgpt", "Smartwa")
14
+ appdir = appdirs.AppDirs("AIWEBS", "vortex")
15
15
 
16
16
  default_path = appdir.user_cache_dir
17
17
 
@@ -1,197 +1,197 @@
1
-
2
- from pydantic import BaseModel, Field
3
- from typing import Union
4
-
5
- from DeepWEBS.utilsdw.logger import logger
6
- from DeepWEBS.networks.google_searcher import GoogleSearcher
7
- from DeepWEBS.networks.webpage_fetcher import BatchWebpageFetcher
8
- from DeepWEBS.documents.query_results_extractor import QueryResultsExtractor
9
- from DeepWEBS.documents.webpage_content_extractor import BatchWebpageContentExtractor
10
- from DeepWEBS.utilsdw.logger import logger
11
- import argparse
12
-
13
- class DeepWEBS:
14
- def __init__(self):
15
- pass
16
-
17
- class DeepSearch(BaseModel):
18
- queries: list = Field(
19
- default=[""],
20
- description="(list[str]) Queries to search",
21
- )
22
- result_num: int = Field(
23
- default=10,
24
- description="(int) Number of search results",
25
- )
26
- safe: bool = Field(
27
- default=False,
28
- description="(bool) Enable SafeSearch",
29
- )
30
- types: list = Field(
31
- default=["web"],
32
- description="(list[str]) Types of search results: `web`, `image`, `videos`, `news`",
33
- )
34
- extract_webpage: bool = Field(
35
- default=False,
36
- description="(bool) Enable extracting main text contents from webpage, will add `text` filed in each `query_result` dict",
37
- )
38
- overwrite_query_html: bool = Field(
39
- default=False,
40
- description="(bool) Overwrite HTML file of query results",
41
- )
42
- overwrite_webpage_html: bool = Field(
43
- default=False,
44
- description="(bool) Overwrite HTML files of webpages from query results",
45
- )
46
-
47
- def queries_to_search_results(self, item: DeepSearch):
48
- google_searcher = GoogleSearcher()
49
- queries_search_results = []
50
- for query in item.queries:
51
- query_results_extractor = QueryResultsExtractor()
52
- if not query.strip():
53
- continue
54
- try:
55
- query_html_path = google_searcher.search(
56
- query=query,
57
- result_num=item.result_num,
58
- safe=item.safe,
59
- overwrite=item.overwrite_query_html,
60
- )
61
- except Exception as e:
62
- logger.error(f"Failed to search for query '{query}': {e}")
63
- continue
64
-
65
- try:
66
- query_search_results = query_results_extractor.extract(query_html_path)
67
- except Exception as e:
68
- logger.error(f"Failed to extract search results for query '{query}': {e}")
69
- continue
70
-
71
- queries_search_results.append(query_search_results)
72
- logger.note(queries_search_results)
73
-
74
- if item.extract_webpage:
75
- queries_search_results = self.extract_webpages(
76
- queries_search_results,
77
- overwrite_webpage_html=item.overwrite_webpage_html,
78
- )
79
- return queries_search_results
80
-
81
- def extract_webpages(self, queries_search_results, overwrite_webpage_html=False):
82
- for query_idx, query_search_results in enumerate(queries_search_results):
83
- try:
84
- # Fetch webpages with urls
85
- batch_webpage_fetcher = BatchWebpageFetcher()
86
- urls = [
87
- query_result["url"]
88
- for query_result in query_search_results["query_results"]
89
- ]
90
- url_and_html_path_list = batch_webpage_fetcher.fetch(
91
- urls,
92
- overwrite=overwrite_webpage_html,
93
- output_parent=query_search_results["query"],
94
- )
95
- except Exception as e:
96
- logger.error(f"Failed to fetch webpages for query '{query_search_results['query']}': {e}")
97
- continue
98
-
99
- # Extract webpage contents from htmls
100
- html_paths = [
101
- str(url_and_html_path["html_path"])
102
- for url_and_html_path in url_and_html_path_list
103
- ]
104
- batch_webpage_content_extractor = BatchWebpageContentExtractor()
105
- try:
106
- html_path_and_extracted_content_list = (
107
- batch_webpage_content_extractor.extract(html_paths)
108
- )
109
- except Exception as e:
110
- logger.error(f"Failed to extract webpage contents for query '{query_search_results['query']}': {e}")
111
- continue
112
-
113
- # Build the map of url to extracted_content
114
- html_path_to_url_dict = {
115
- str(url_and_html_path["html_path"]): url_and_html_path["url"]
116
- for url_and_html_path in url_and_html_path_list
117
- }
118
- url_to_extracted_content_dict = {
119
- html_path_to_url_dict[
120
- html_path_and_extracted_content["html_path"]
121
- ]: html_path_and_extracted_content["extracted_content"]
122
- for html_path_and_extracted_content in html_path_and_extracted_content_list
123
- }
124
-
125
- # Write extracted contents (as 'text' field) to query_search_results
126
- for query_result_idx, query_result in enumerate(
127
- query_search_results["query_results"]
128
- ):
129
- url = query_result["url"]
130
- extracted_content = url_to_extracted_content_dict.get(url, "")
131
- queries_search_results[query_idx]["query_results"][query_result_idx][
132
- "text"
133
- ] = extracted_content
134
-
135
- return queries_search_results
136
-
137
-
138
- class ArgParser(argparse.ArgumentParser):
139
- def __init__(self, *args, **kwargs):
140
- super(ArgParser, self).__init__(*args, **kwargs)
141
-
142
- self.add_argument(
143
- "-q",
144
- "--queries",
145
- type=str,
146
- nargs="+",
147
- required=True,
148
- help="Queries to search",
149
- )
150
- self.add_argument(
151
- "-n",
152
- "--result_num",
153
- type=int,
154
- default=10,
155
- help="Number of search results",
156
- )
157
- self.add_argument(
158
- "-s",
159
- "--safe",
160
- default=False,
161
- action="store_true",
162
- help="Enable SafeSearch",
163
- )
164
- self.add_argument(
165
- "-t",
166
- "--types",
167
- type=str,
168
- nargs="+",
169
- default=["web"],
170
- choices=["web", "image", "videos", "news"],
171
- help="Types of search results",
172
- )
173
- self.add_argument(
174
- "-e",
175
- "--extract_webpage",
176
- default=False,
177
- action="store_true",
178
- help="Enable extracting main text contents from webpage",
179
- )
180
- self.add_argument(
181
- "-o",
182
- "--overwrite_query_html",
183
- default=False,
184
- action="store_true",
185
- help="Overwrite HTML file of query results",
186
- )
187
- self.add_argument(
188
- "-w",
189
- "--overwrite_webpage_html",
190
- default=False,
191
- action="store_true",
192
- help="Overwrite HTML files of webpages from query results",
193
- )
194
-
195
- self.args = self.parse_args()
196
-
197
-
1
+
2
+ from pydantic import BaseModel, Field
3
+ from typing import Union
4
+
5
+ from DeepWEBS.utilsdw.logger import logger
6
+ from DeepWEBS.networks.google_searcher import GoogleSearcher
7
+ from DeepWEBS.networks.webpage_fetcher import BatchWebpageFetcher
8
+ from DeepWEBS.documents.query_results_extractor import QueryResultsExtractor
9
+ from DeepWEBS.documents.webpage_content_extractor import BatchWebpageContentExtractor
10
+ from DeepWEBS.utilsdw.logger import logger
11
+ import argparse
12
+
13
+ class DeepWEBS:
14
+ def __init__(self):
15
+ pass
16
+
17
+ class DeepSearch(BaseModel):
18
+ queries: list = Field(
19
+ default=[""],
20
+ description="(list[str]) Queries to search",
21
+ )
22
+ result_num: int = Field(
23
+ default=10,
24
+ description="(int) Number of search results",
25
+ )
26
+ safe: bool = Field(
27
+ default=False,
28
+ description="(bool) Enable SafeSearch",
29
+ )
30
+ types: list = Field(
31
+ default=["web"],
32
+ description="(list[str]) Types of search results: `web`, `image`, `videos`, `news`",
33
+ )
34
+ extract_webpage: bool = Field(
35
+ default=False,
36
+ description="(bool) Enable extracting main text contents from webpage, will add `text` filed in each `query_result` dict",
37
+ )
38
+ overwrite_query_html: bool = Field(
39
+ default=False,
40
+ description="(bool) Overwrite HTML file of query results",
41
+ )
42
+ overwrite_webpage_html: bool = Field(
43
+ default=False,
44
+ description="(bool) Overwrite HTML files of webpages from query results",
45
+ )
46
+
47
+ def queries_to_search_results(self, item: DeepSearch):
48
+ google_searcher = GoogleSearcher()
49
+ queries_search_results = []
50
+ for query in item.queries:
51
+ query_results_extractor = QueryResultsExtractor()
52
+ if not query.strip():
53
+ continue
54
+ try:
55
+ query_html_path = google_searcher.search(
56
+ query=query,
57
+ result_num=item.result_num,
58
+ safe=item.safe,
59
+ overwrite=item.overwrite_query_html,
60
+ )
61
+ except Exception as e:
62
+ logger.error(f"Failed to search for query '{query}': {e}")
63
+ continue
64
+
65
+ try:
66
+ query_search_results = query_results_extractor.extract(query_html_path)
67
+ except Exception as e:
68
+ logger.error(f"Failed to extract search results for query '{query}': {e}")
69
+ continue
70
+
71
+ queries_search_results.append(query_search_results)
72
+ logger.note(queries_search_results)
73
+
74
+ if item.extract_webpage:
75
+ queries_search_results = self.extract_webpages(
76
+ queries_search_results,
77
+ overwrite_webpage_html=item.overwrite_webpage_html,
78
+ )
79
+ return queries_search_results
80
+
81
+ def extract_webpages(self, queries_search_results, overwrite_webpage_html=False):
82
+ for query_idx, query_search_results in enumerate(queries_search_results):
83
+ try:
84
+ # Fetch webpages with urls
85
+ batch_webpage_fetcher = BatchWebpageFetcher()
86
+ urls = [
87
+ query_result["url"]
88
+ for query_result in query_search_results["query_results"]
89
+ ]
90
+ url_and_html_path_list = batch_webpage_fetcher.fetch(
91
+ urls,
92
+ overwrite=overwrite_webpage_html,
93
+ output_parent=query_search_results["query"],
94
+ )
95
+ except Exception as e:
96
+ logger.error(f"Failed to fetch webpages for query '{query_search_results['query']}': {e}")
97
+ continue
98
+
99
+ # Extract webpage contents from htmls
100
+ html_paths = [
101
+ str(url_and_html_path["html_path"])
102
+ for url_and_html_path in url_and_html_path_list
103
+ ]
104
+ batch_webpage_content_extractor = BatchWebpageContentExtractor()
105
+ try:
106
+ html_path_and_extracted_content_list = (
107
+ batch_webpage_content_extractor.extract(html_paths)
108
+ )
109
+ except Exception as e:
110
+ logger.error(f"Failed to extract webpage contents for query '{query_search_results['query']}': {e}")
111
+ continue
112
+
113
+ # Build the map of url to extracted_content
114
+ html_path_to_url_dict = {
115
+ str(url_and_html_path["html_path"]): url_and_html_path["url"]
116
+ for url_and_html_path in url_and_html_path_list
117
+ }
118
+ url_to_extracted_content_dict = {
119
+ html_path_to_url_dict[
120
+ html_path_and_extracted_content["html_path"]
121
+ ]: html_path_and_extracted_content["extracted_content"]
122
+ for html_path_and_extracted_content in html_path_and_extracted_content_list
123
+ }
124
+
125
+ # Write extracted contents (as 'text' field) to query_search_results
126
+ for query_result_idx, query_result in enumerate(
127
+ query_search_results["query_results"]
128
+ ):
129
+ url = query_result["url"]
130
+ extracted_content = url_to_extracted_content_dict.get(url, "")
131
+ queries_search_results[query_idx]["query_results"][query_result_idx][
132
+ "text"
133
+ ] = extracted_content
134
+
135
+ return queries_search_results
136
+
137
+
138
+ class ArgParser(argparse.ArgumentParser):
139
+ def __init__(self, *args, **kwargs):
140
+ super(ArgParser, self).__init__(*args, **kwargs)
141
+
142
+ self.add_argument(
143
+ "-q",
144
+ "--queries",
145
+ type=str,
146
+ nargs="+",
147
+ required=True,
148
+ help="Queries to search",
149
+ )
150
+ self.add_argument(
151
+ "-n",
152
+ "--result_num",
153
+ type=int,
154
+ default=10,
155
+ help="Number of search results",
156
+ )
157
+ self.add_argument(
158
+ "-s",
159
+ "--safe",
160
+ default=False,
161
+ action="store_true",
162
+ help="Enable SafeSearch",
163
+ )
164
+ self.add_argument(
165
+ "-t",
166
+ "--types",
167
+ type=str,
168
+ nargs="+",
169
+ default=["web"],
170
+ choices=["web", "image", "videos", "news"],
171
+ help="Types of search results",
172
+ )
173
+ self.add_argument(
174
+ "-e",
175
+ "--extract_webpage",
176
+ default=False,
177
+ action="store_true",
178
+ help="Enable extracting main text contents from webpage",
179
+ )
180
+ self.add_argument(
181
+ "-o",
182
+ "--overwrite_query_html",
183
+ default=False,
184
+ action="store_true",
185
+ help="Overwrite HTML file of query results",
186
+ )
187
+ self.add_argument(
188
+ "-w",
189
+ "--overwrite_webpage_html",
190
+ default=False,
191
+ action="store_true",
192
+ help="Overwrite HTML files of webpages from query results",
193
+ )
194
+
195
+ self.args = self.parse_args()
196
+
197
+
@@ -1,7 +1,7 @@
1
1
  """Webscout.
2
2
 
3
3
  Search for words, documents, images, videos, news, maps and text translation
4
- using the DuckDuckGo.com search engine.
4
+ using the Google, DuckDuckGo.com, yep.com, phind.com, you.com, etc Also containes AI models
5
5
  """
6
6
 
7
7
  import logging
@@ -9,6 +9,8 @@ from .webscout_search import WEBS
9
9
  from .webscout_search_async import AsyncWEBS
10
10
  from .version import __version__
11
11
  from .DWEBS import DeepWEBS
12
+ from .AIutel import appdir
13
+
12
14
  __all__ = ["WEBS", "AsyncWEBS", "__version__", "cli"]
13
15
 
14
16
  logging.getLogger("webscout").addHandler(logging.NullHandler())
@@ -0,0 +1,2 @@
1
+ __version__ = "1.2.5"
2
+
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: webscout
3
- Version: 1.2.3
3
+ Version: 1.2.5
4
4
  Summary: Search for words, documents, images, videos, news, maps and text translation using the Google, DuckDuckGo.com, yep.com, phind.com, you.com, etc Also containes AI models
5
5
  Author: OEvortex
6
6
  Author-email: helpingai5@gmail.com
@@ -45,6 +45,7 @@ Requires-Dist: sse_starlette
45
45
  Requires-Dist: termcolor
46
46
  Requires-Dist: tiktoken
47
47
  Requires-Dist: tldextract
48
+ Requires-Dist: orjson
48
49
  Provides-Extra: dev
49
50
  Requires-Dist: ruff>=0.1.6; extra == "dev"
50
51
  Requires-Dist: pytest>=7.4.2; extra == "dev"
@@ -92,6 +93,7 @@ Also containes AI models that you can use
92
93
  - [6. `BlackBox` - Search/chat With BlackBox](#6-blackbox---searchchat-with-blackbox)
93
94
  - [7. `PERPLEXITY` - Search With PERPLEXITY](#7-perplexity---search-with-perplexity)
94
95
  - [8. `OpenGPT` - chat With OPENGPT](#8-opengpt---chat-with-opengpt)
96
+ - [9. `KOBOLDIA` -](#9-koboldia--)
95
97
  - [usage of special .LLM file from webscout (webscout.LLM)](#usage-of-special-llm-file-from-webscout-webscoutllm)
96
98
  - [`LLM`](#llm)
97
99
 
@@ -604,6 +606,24 @@ opengpt = OPENGPT(is_conversation=True, max_tokens=8000, timeout=30)
604
606
  prompt = "tell me about india"
605
607
  response_str = opengpt.chat(prompt)
606
608
  print(response_str)
609
+ ```
610
+ ### 9. `KOBOLDIA` -
611
+ ```python
612
+ from webscout.AI import KOBOLDAI
613
+
614
+ # Instantiate the KOBOLDAI class with default parameters
615
+ koboldai = KOBOLDAI()
616
+
617
+ # Define a prompt to send to the AI
618
+ prompt = "What is the capital of France?"
619
+
620
+ # Use the 'ask' method to get a response from the AI
621
+ response = koboldai.ask(prompt)
622
+
623
+ # Extract and print the message from the response
624
+ message = koboldai.get_message(response)
625
+ print(message)
626
+
607
627
  ```
608
628
 
609
629
  ## usage of special .LLM file from webscout (webscout.LLM)
@@ -19,6 +19,7 @@ sse_starlette
19
19
  termcolor
20
20
  tiktoken
21
21
  tldextract
22
+ orjson
22
23
 
23
24
  [dev]
24
25
  ruff>=0.1.6
@@ -1,2 +0,0 @@
1
- __version__ = "1.2.3"
2
-
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes