webscout 4.3__tar.gz → 4.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of webscout might be problematic. Click here for more details.
- {webscout-4.3/webscout.egg-info → webscout-4.4}/PKG-INFO +7 -1
- {webscout-4.3 → webscout-4.4}/setup.py +8 -2
- {webscout-4.3 → webscout-4.4}/webscout/AIauto.py +5 -7
- {webscout-4.3 → webscout-4.4}/webscout/Extra/autollama.py +20 -9
- {webscout-4.3 → webscout-4.4}/webscout/Extra/gguf.py +5 -19
- webscout-4.4/webscout/Extra/weather.py +67 -0
- {webscout-4.3 → webscout-4.4}/webscout/Extra/weather_ascii.py +5 -0
- webscout-4.4/webscout/GoogleS.py +342 -0
- {webscout-4.3 → webscout-4.4}/webscout/__init__.py +6 -3
- {webscout-4.3 → webscout-4.4}/webscout/cli.py +22 -21
- {webscout-4.3 → webscout-4.4}/webscout/utils.py +13 -3
- {webscout-4.3 → webscout-4.4}/webscout/webai.py +3 -0
- {webscout-4.3 → webscout-4.4}/webscout/webscout_search.py +34 -26
- {webscout-4.3 → webscout-4.4/webscout.egg-info}/PKG-INFO +7 -1
- {webscout-4.3 → webscout-4.4}/webscout.egg-info/SOURCES.txt +1 -0
- {webscout-4.3 → webscout-4.4}/webscout.egg-info/requires.txt +6 -0
- webscout-4.3/webscout/Extra/weather.py +0 -49
- {webscout-4.3 → webscout-4.4}/LICENSE.md +0 -0
- {webscout-4.3 → webscout-4.4}/README.md +0 -0
- {webscout-4.3 → webscout-4.4}/setup.cfg +0 -0
- {webscout-4.3 → webscout-4.4}/webscout/AIbase.py +0 -0
- {webscout-4.3 → webscout-4.4}/webscout/AIutel.py +0 -0
- {webscout-4.3 → webscout-4.4}/webscout/DWEBS.py +0 -0
- {webscout-4.3 → webscout-4.4}/webscout/Extra/__init__.py +0 -0
- {webscout-4.3 → webscout-4.4}/webscout/LLM.py +0 -0
- {webscout-4.3 → webscout-4.4}/webscout/Local/__init__.py +0 -0
- {webscout-4.3 → webscout-4.4}/webscout/Local/_version.py +0 -0
- {webscout-4.3 → webscout-4.4}/webscout/Local/formats.py +0 -0
- {webscout-4.3 → webscout-4.4}/webscout/Local/model.py +0 -0
- {webscout-4.3 → webscout-4.4}/webscout/Local/rawdog.py +0 -0
- {webscout-4.3 → webscout-4.4}/webscout/Local/samplers.py +0 -0
- {webscout-4.3 → webscout-4.4}/webscout/Local/thread.py +0 -0
- {webscout-4.3 → webscout-4.4}/webscout/Local/utils.py +0 -0
- {webscout-4.3 → webscout-4.4}/webscout/Provider/BasedGPT.py +0 -0
- {webscout-4.3 → webscout-4.4}/webscout/Provider/Berlin4h.py +0 -0
- {webscout-4.3 → webscout-4.4}/webscout/Provider/Blackboxai.py +0 -0
- {webscout-4.3 → webscout-4.4}/webscout/Provider/ChatGPTUK.py +0 -0
- {webscout-4.3 → webscout-4.4}/webscout/Provider/Cohere.py +0 -0
- {webscout-4.3 → webscout-4.4}/webscout/Provider/Deepinfra.py +0 -0
- {webscout-4.3 → webscout-4.4}/webscout/Provider/Deepseek.py +0 -0
- {webscout-4.3 → webscout-4.4}/webscout/Provider/FreeGemini.py +0 -0
- {webscout-4.3 → webscout-4.4}/webscout/Provider/Gemini.py +0 -0
- {webscout-4.3 → webscout-4.4}/webscout/Provider/Geminiflash.py +0 -0
- {webscout-4.3 → webscout-4.4}/webscout/Provider/Geminipro.py +0 -0
- {webscout-4.3 → webscout-4.4}/webscout/Provider/Groq.py +0 -0
- {webscout-4.3 → webscout-4.4}/webscout/Provider/Koboldai.py +0 -0
- {webscout-4.3 → webscout-4.4}/webscout/Provider/Leo.py +0 -0
- {webscout-4.3 → webscout-4.4}/webscout/Provider/Llama.py +0 -0
- {webscout-4.3 → webscout-4.4}/webscout/Provider/OLLAMA.py +0 -0
- {webscout-4.3 → webscout-4.4}/webscout/Provider/OpenGPT.py +0 -0
- {webscout-4.3 → webscout-4.4}/webscout/Provider/Openai.py +0 -0
- {webscout-4.3 → webscout-4.4}/webscout/Provider/Perplexity.py +0 -0
- {webscout-4.3 → webscout-4.4}/webscout/Provider/Phind.py +0 -0
- {webscout-4.3 → webscout-4.4}/webscout/Provider/Poe.py +0 -0
- {webscout-4.3 → webscout-4.4}/webscout/Provider/Reka.py +0 -0
- {webscout-4.3 → webscout-4.4}/webscout/Provider/ThinkAnyAI.py +0 -0
- {webscout-4.3 → webscout-4.4}/webscout/Provider/VTLchat.py +0 -0
- {webscout-4.3 → webscout-4.4}/webscout/Provider/Xjai.py +0 -0
- {webscout-4.3 → webscout-4.4}/webscout/Provider/Yepchat.py +0 -0
- {webscout-4.3 → webscout-4.4}/webscout/Provider/Youchat.py +0 -0
- {webscout-4.3 → webscout-4.4}/webscout/Provider/__init__.py +0 -0
- {webscout-4.3 → webscout-4.4}/webscout/YTdownloader.py +0 -0
- {webscout-4.3 → webscout-4.4}/webscout/__main__.py +0 -0
- {webscout-4.3 → webscout-4.4}/webscout/async_providers.py +0 -0
- {webscout-4.3 → webscout-4.4}/webscout/exceptions.py +0 -0
- {webscout-4.3 → webscout-4.4}/webscout/g4f.py +0 -0
- {webscout-4.3 → webscout-4.4}/webscout/models.py +0 -0
- {webscout-4.3 → webscout-4.4}/webscout/tempid.py +0 -0
- {webscout-4.3 → webscout-4.4}/webscout/transcriber.py +0 -0
- {webscout-4.3 → webscout-4.4}/webscout/version.py +0 -0
- {webscout-4.3 → webscout-4.4}/webscout/voice.py +0 -0
- {webscout-4.3 → webscout-4.4}/webscout/webscout_search_async.py +0 -0
- {webscout-4.3 → webscout-4.4}/webscout/websx_search.py +0 -0
- {webscout-4.3 → webscout-4.4}/webscout.egg-info/dependency_links.txt +0 -0
- {webscout-4.3 → webscout-4.4}/webscout.egg-info/entry_points.txt +0 -0
- {webscout-4.3 → webscout-4.4}/webscout.egg-info/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: webscout
|
|
3
|
-
Version: 4.
|
|
3
|
+
Version: 4.4
|
|
4
4
|
Summary: Search for anything using Google, DuckDuckGo, brave, qwant, phind.com, Contains AI models, can transcribe yt videos, temporary email and phone number generation, has TTS support, webai (terminal gpt and open interpreter) and offline LLMs and more
|
|
5
5
|
Author: OEvortex
|
|
6
6
|
Author-email: helpingai5@gmail.com
|
|
@@ -34,6 +34,7 @@ Requires-Dist: tqdm
|
|
|
34
34
|
Requires-Dist: webdriver-manager
|
|
35
35
|
Requires-Dist: halo>=0.0.31
|
|
36
36
|
Requires-Dist: g4f>=0.2.2.3
|
|
37
|
+
Requires-Dist: g4f[webdriver]
|
|
37
38
|
Requires-Dist: rich
|
|
38
39
|
Requires-Dist: python-dotenv
|
|
39
40
|
Requires-Dist: beautifulsoup4
|
|
@@ -55,6 +56,11 @@ Requires-Dist: playsound
|
|
|
55
56
|
Requires-Dist: poe_api_wrapper
|
|
56
57
|
Requires-Dist: pyreqwest_impersonate
|
|
57
58
|
Requires-Dist: ballyregan
|
|
59
|
+
Requires-Dist: nodriver
|
|
60
|
+
Requires-Dist: PyExecJS
|
|
61
|
+
Requires-Dist: ollama
|
|
62
|
+
Requires-Dist: pyfiglet
|
|
63
|
+
Requires-Dist: yaspin
|
|
58
64
|
Provides-Extra: dev
|
|
59
65
|
Requires-Dist: ruff>=0.1.6; extra == "dev"
|
|
60
66
|
Requires-Dist: pytest>=7.4.2; extra == "dev"
|
|
@@ -5,7 +5,7 @@ with open("README.md", encoding="utf-8") as f:
|
|
|
5
5
|
|
|
6
6
|
setup(
|
|
7
7
|
name="webscout",
|
|
8
|
-
version="4.
|
|
8
|
+
version="4.4",
|
|
9
9
|
description="Search for anything using Google, DuckDuckGo, brave, qwant, phind.com, Contains AI models, can transcribe yt videos, temporary email and phone number generation, has TTS support, webai (terminal gpt and open interpreter) and offline LLMs and more",
|
|
10
10
|
long_description=README,
|
|
11
11
|
long_description_content_type="text/markdown",
|
|
@@ -38,6 +38,7 @@ setup(
|
|
|
38
38
|
"webdriver-manager",
|
|
39
39
|
"halo>=0.0.31",
|
|
40
40
|
"g4f>=0.2.2.3",
|
|
41
|
+
"g4f[webdriver]",
|
|
41
42
|
"rich",
|
|
42
43
|
"python-dotenv",
|
|
43
44
|
"beautifulsoup4",
|
|
@@ -58,7 +59,12 @@ setup(
|
|
|
58
59
|
"playsound",
|
|
59
60
|
"poe_api_wrapper",
|
|
60
61
|
"pyreqwest_impersonate",
|
|
61
|
-
"ballyregan"
|
|
62
|
+
"ballyregan",
|
|
63
|
+
"nodriver",
|
|
64
|
+
"PyExecJS",
|
|
65
|
+
"ollama",
|
|
66
|
+
"pyfiglet",
|
|
67
|
+
"yaspin",
|
|
62
68
|
],
|
|
63
69
|
entry_points={
|
|
64
70
|
"console_scripts": [
|
|
@@ -1,8 +1,7 @@
|
|
|
1
1
|
from webscout.AIbase import Provider, AsyncProvider
|
|
2
2
|
from webscout.Provider.ThinkAnyAI import ThinkAnyAI
|
|
3
3
|
from webscout.Provider.Xjai import Xjai
|
|
4
|
-
from webscout.Provider.Llama import
|
|
5
|
-
from webscout.Provider.Llama import AsyncLLAMA2
|
|
4
|
+
from webscout.Provider.Llama import LLAMA
|
|
6
5
|
from webscout.Provider.Leo import LEO
|
|
7
6
|
from webscout.Provider.Leo import AsyncLEO
|
|
8
7
|
from webscout.Provider.Koboldai import KOBOLDAI
|
|
@@ -44,7 +43,7 @@ provider_map: dict[
|
|
|
44
43
|
Union[
|
|
45
44
|
ThinkAnyAI,
|
|
46
45
|
Xjai,
|
|
47
|
-
|
|
46
|
+
LLAMA,
|
|
48
47
|
LEO,
|
|
49
48
|
KOBOLDAI,
|
|
50
49
|
OPENGPT,
|
|
@@ -69,7 +68,7 @@ provider_map: dict[
|
|
|
69
68
|
] = {
|
|
70
69
|
"ThinkAnyAI": ThinkAnyAI,
|
|
71
70
|
"Xjai": Xjai,
|
|
72
|
-
"LLAMA2":
|
|
71
|
+
"LLAMA2": LLAMA,
|
|
73
72
|
"LEO": LEO,
|
|
74
73
|
"KOBOLDAI": KOBOLDAI,
|
|
75
74
|
"OPENGPT": OPENGPT,
|
|
@@ -124,7 +123,7 @@ class AUTO(Provider):
|
|
|
124
123
|
self.provider: Union[
|
|
125
124
|
ThinkAnyAI,
|
|
126
125
|
Xjai,
|
|
127
|
-
|
|
126
|
+
LLAMA,
|
|
128
127
|
LEO,
|
|
129
128
|
KOBOLDAI,
|
|
130
129
|
OPENGPT,
|
|
@@ -353,13 +352,12 @@ class AsyncAUTO(AsyncProvider):
|
|
|
353
352
|
AsyncOPENGPT,
|
|
354
353
|
AsyncKOBOLDAI,
|
|
355
354
|
AsyncPhindSearch,
|
|
356
|
-
AsyncLLAMA2,
|
|
357
355
|
AsyncBLACKBOXAI,
|
|
358
356
|
AsyncGPT4FREE,
|
|
359
357
|
AsyncLEO,
|
|
360
358
|
ThinkAnyAI,
|
|
361
359
|
Xjai,
|
|
362
|
-
|
|
360
|
+
LLAMA,
|
|
363
361
|
LEO,
|
|
364
362
|
KOBOLDAI,
|
|
365
363
|
OPENGPT,
|
|
@@ -1,6 +1,14 @@
|
|
|
1
1
|
import subprocess
|
|
2
2
|
import argparse
|
|
3
3
|
import os
|
|
4
|
+
from rich.console import Console
|
|
5
|
+
from rich.panel import Panel
|
|
6
|
+
from rich.progress import track
|
|
7
|
+
from yaspin import yaspin
|
|
8
|
+
from pyfiglet import figlet_format
|
|
9
|
+
import time
|
|
10
|
+
|
|
11
|
+
console = Console()
|
|
4
12
|
|
|
5
13
|
def autollama(model_path, gguf_file):
|
|
6
14
|
"""Manages models with Ollama using the autollama.sh script.
|
|
@@ -9,6 +17,7 @@ def autollama(model_path, gguf_file):
|
|
|
9
17
|
model_path (str): The path to the Hugging Face model.
|
|
10
18
|
gguf_file (str): The name of the GGUF file.
|
|
11
19
|
"""
|
|
20
|
+
console.print(f"[bold green]{figlet_format('Autollama')}[/]\n", justify="center")
|
|
12
21
|
|
|
13
22
|
# Check if autollama.sh exists in the current working directory
|
|
14
23
|
script_path = os.path.join(os.getcwd(), "autollama.sh")
|
|
@@ -172,25 +181,27 @@ echo "Use Ollama run $MODEL_NAME"
|
|
|
172
181
|
# Execute the command
|
|
173
182
|
process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
|
|
174
183
|
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
print(line, end='')
|
|
178
|
-
|
|
179
|
-
for line in process.stderr:
|
|
180
|
-
print(line, end='')
|
|
184
|
+
for line in iter(process.stdout.readline, ''):
|
|
185
|
+
console.print(Panel(line.strip(), title="Autollama Output", expand=False))
|
|
181
186
|
|
|
187
|
+
for line in iter(process.stderr.readline, ''):
|
|
188
|
+
console.print(Panel(line.strip(), title="Autollama Errors (if any)", expand=False))
|
|
189
|
+
|
|
182
190
|
process.wait()
|
|
191
|
+
console.print("[green]Model is ready![/]")
|
|
183
192
|
|
|
184
193
|
def main():
|
|
185
194
|
parser = argparse.ArgumentParser(description='Automatically create and run an Ollama model in Ollama')
|
|
186
|
-
parser.add_argument('-m', '--model_path', required=True, help='Set the
|
|
195
|
+
parser.add_argument('-m', '--model_path', required=True, help='Set the huggingface model id to the Hugging Face model')
|
|
187
196
|
parser.add_argument('-g', '--gguf_file', required=True, help='Set the GGUF file name')
|
|
188
197
|
args = parser.parse_args()
|
|
189
198
|
|
|
190
199
|
try:
|
|
191
|
-
|
|
200
|
+
with yaspin(text="Processing...") as spinner:
|
|
201
|
+
autollama(args.model_path, args.gguf_file)
|
|
202
|
+
spinner.ok("Done!")
|
|
192
203
|
except Exception as e:
|
|
193
|
-
print(f"Error: {e}")
|
|
204
|
+
console.print(f"[red]Error: {e}[/]")
|
|
194
205
|
exit(1)
|
|
195
206
|
|
|
196
207
|
if __name__ == "__main__":
|
|
@@ -1,7 +1,10 @@
|
|
|
1
1
|
# webscout/Extra/gguf.py
|
|
2
2
|
import subprocess
|
|
3
|
-
import argparse
|
|
4
3
|
import os
|
|
4
|
+
from pyfiglet import figlet_format
|
|
5
|
+
from rich.console import Console
|
|
6
|
+
|
|
7
|
+
console = Console()
|
|
5
8
|
|
|
6
9
|
def convert(model_id, username=None, token=None, quantization_methods="q4_k_m,q5_k_m"):
|
|
7
10
|
"""Converts and quantizes a Hugging Face model to GGUF format.
|
|
@@ -17,6 +20,7 @@ def convert(model_id, username=None, token=None, quantization_methods="q4_k_m,q5
|
|
|
17
20
|
ValueError: If an invalid quantization method is provided.
|
|
18
21
|
"""
|
|
19
22
|
|
|
23
|
+
console.print(f"[bold green]{figlet_format('GGUF Converter')}[/]\n", justify="center")
|
|
20
24
|
# List of valid quantization methods
|
|
21
25
|
valid_methods = [
|
|
22
26
|
"q2_k", "q3_k_l", "q3_k_m", "q3_k_s",
|
|
@@ -220,21 +224,3 @@ echo "Script completed."
|
|
|
220
224
|
process.wait()
|
|
221
225
|
|
|
222
226
|
|
|
223
|
-
def main():
|
|
224
|
-
parser = argparse.ArgumentParser(description='Convert and quantize model using gguf.sh')
|
|
225
|
-
parser.add_argument('-m', '--model_id', required=True, help='Set the HF model ID (e.g., "google/flan-t5-xl")')
|
|
226
|
-
parser.add_argument('-u', '--username', help='Set your Hugging Face username (required for uploads)')
|
|
227
|
-
parser.add_argument('-t', '--token', help='Set your Hugging Face API token (required for uploads)')
|
|
228
|
-
parser.add_argument('-q', '--quantization_methods', default="q4_k_m,q5_k_m",
|
|
229
|
-
help='Comma-separated quantization methods (default: q4_k_m,q5_k_m). Valid methods: q2_k, q3_k_l, q3_k_m, q3_k_s, q4_0, q4_1, q4_k_m, q4_k_s, q5_0, q5_1, q5_k_m, q5_k_s, q6_k, q8_0')
|
|
230
|
-
|
|
231
|
-
args = parser.parse_args()
|
|
232
|
-
|
|
233
|
-
try:
|
|
234
|
-
convert(args.model_id, args.username, args.token, args.quantization_methods)
|
|
235
|
-
except ValueError as e:
|
|
236
|
-
print(e)
|
|
237
|
-
exit(1)
|
|
238
|
-
|
|
239
|
-
if __name__ == "__main__":
|
|
240
|
-
main()
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
import requests
|
|
2
|
+
from rich.console import Console
|
|
3
|
+
from rich.table import Table
|
|
4
|
+
from yaspin import yaspin
|
|
5
|
+
from pyfiglet import figlet_format
|
|
6
|
+
|
|
7
|
+
console = Console()
|
|
8
|
+
|
|
9
|
+
def get(location):
|
|
10
|
+
"""Fetches weather data for the given location.
|
|
11
|
+
|
|
12
|
+
Args:
|
|
13
|
+
location (str): The location for which to fetch weather data.
|
|
14
|
+
|
|
15
|
+
Returns:
|
|
16
|
+
dict: A dictionary containing weather data if the request is successful,
|
|
17
|
+
otherwise a string indicating the error.
|
|
18
|
+
"""
|
|
19
|
+
url = f"https://wttr.in/{location}?format=j1"
|
|
20
|
+
|
|
21
|
+
with yaspin(text="Fetching weather data...") as spinner:
|
|
22
|
+
response = requests.get(url)
|
|
23
|
+
spinner.ok("✅ ")
|
|
24
|
+
|
|
25
|
+
if response.status_code == 200:
|
|
26
|
+
return response.json()
|
|
27
|
+
else:
|
|
28
|
+
return f"Error: Unable to fetch weather data. Status code: {response.status_code}"
|
|
29
|
+
|
|
30
|
+
def print_weather(weather_data):
|
|
31
|
+
"""Prints the weather data in a user-friendly format.
|
|
32
|
+
|
|
33
|
+
Args:
|
|
34
|
+
weather_data (dict or str): The weather data returned from get_weather()
|
|
35
|
+
or an error message.
|
|
36
|
+
"""
|
|
37
|
+
if isinstance(weather_data, str):
|
|
38
|
+
console.print(f"[bold red]Error:[/] {weather_data}")
|
|
39
|
+
return
|
|
40
|
+
|
|
41
|
+
current = weather_data['current_condition'][0]
|
|
42
|
+
location_name = weather_data['nearest_area'][0]['areaName'][0]['value']
|
|
43
|
+
|
|
44
|
+
console.print(f"[bold blue]\n{figlet_format('Weather Report')}[/]\n", justify="center")
|
|
45
|
+
console.print(f"[bold green]Weather in {location_name}:[/]\n")
|
|
46
|
+
|
|
47
|
+
table = Table(show_header=False, show_lines=True)
|
|
48
|
+
table.add_row("Temperature:", f"{current['temp_C']}°C / {current['temp_F']}°F")
|
|
49
|
+
table.add_row("Condition:", current['weatherDesc'][0]['value'])
|
|
50
|
+
table.add_row("Humidity:", f"{current['humidity']}%")
|
|
51
|
+
table.add_row("Wind:", f"{current['windspeedKmph']} km/h, {current['winddir16Point']}")
|
|
52
|
+
console.print(table)
|
|
53
|
+
|
|
54
|
+
console.print(f"\n[bold green]Forecast:[/]")
|
|
55
|
+
table = Table(show_header=True, header_style="bold cyan")
|
|
56
|
+
table.add_column("Date", style="dim", width=12)
|
|
57
|
+
table.add_column("Temperature Range")
|
|
58
|
+
table.add_column("Description")
|
|
59
|
+
|
|
60
|
+
for day in weather_data['weather']:
|
|
61
|
+
date = day['date']
|
|
62
|
+
max_temp = day['maxtempC']
|
|
63
|
+
min_temp = day['mintempC']
|
|
64
|
+
desc = day['hourly'][4]['weatherDesc'][0]['value']
|
|
65
|
+
table.add_row(date, f"{min_temp}°C to {max_temp}°C", desc)
|
|
66
|
+
console.print(table)
|
|
67
|
+
|
|
@@ -1,5 +1,8 @@
|
|
|
1
1
|
import requests
|
|
2
|
+
from rich.console import Console
|
|
3
|
+
from pyfiglet import figlet_format
|
|
2
4
|
|
|
5
|
+
console = Console()
|
|
3
6
|
def get(location):
|
|
4
7
|
"""Fetches ASCII art weather data for the given location.
|
|
5
8
|
Args:
|
|
@@ -9,6 +12,7 @@ def get(location):
|
|
|
9
12
|
str: ASCII art weather report if the request is successful,
|
|
10
13
|
otherwise an error message.
|
|
11
14
|
"""
|
|
15
|
+
console.print(f"[bold green]{figlet_format('Weather')}[/]\n", justify="center")
|
|
12
16
|
url = f"https://wttr.in/{location}"
|
|
13
17
|
response = requests.get(url, headers={'User-Agent': 'curl'})
|
|
14
18
|
|
|
@@ -16,3 +20,4 @@ def get(location):
|
|
|
16
20
|
return "\n".join(response.text.splitlines()[:-1])
|
|
17
21
|
else:
|
|
18
22
|
return f"Error: Unable to fetch weather data. Status code: {response.status_code}"
|
|
23
|
+
|
|
@@ -0,0 +1,342 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import random
|
|
3
|
+
import sys
|
|
4
|
+
import time
|
|
5
|
+
import ssl
|
|
6
|
+
|
|
7
|
+
if sys.version_info[0] > 2:
|
|
8
|
+
from http.cookiejar import LWPCookieJar
|
|
9
|
+
from urllib.request import Request, urlopen
|
|
10
|
+
from urllib.parse import quote_plus, urlparse, parse_qs
|
|
11
|
+
else:
|
|
12
|
+
from cookielib import LWPCookieJar
|
|
13
|
+
from urllib import quote_plus
|
|
14
|
+
from urllib2 import Request, urlopen
|
|
15
|
+
from urlparse import urlparse, parse_qs
|
|
16
|
+
|
|
17
|
+
try:
|
|
18
|
+
from bs4 import BeautifulSoup
|
|
19
|
+
is_bs4 = True
|
|
20
|
+
except ImportError:
|
|
21
|
+
from BeautifulSoup import BeautifulSoup # type: ignore
|
|
22
|
+
is_bs4 = False
|
|
23
|
+
|
|
24
|
+
__all__ = [
|
|
25
|
+
|
|
26
|
+
# Main search function.
|
|
27
|
+
'search',
|
|
28
|
+
|
|
29
|
+
# Shortcut for "get lucky" search.
|
|
30
|
+
'lucky',
|
|
31
|
+
|
|
32
|
+
# Miscellaneous utility functions.
|
|
33
|
+
'get_random_user_agent', 'get_tbs',
|
|
34
|
+
]
|
|
35
|
+
|
|
36
|
+
# URL templates to make Google searches.
|
|
37
|
+
url_home = "https://www.google.%(tld)s/"
|
|
38
|
+
url_search = "https://www.google.%(tld)s/search?hl=%(lang)s&q=%(query)s&" \
|
|
39
|
+
"btnG=Google+Search&tbs=%(tbs)s&safe=%(safe)s&" \
|
|
40
|
+
"cr=%(country)s"
|
|
41
|
+
url_next_page = "https://www.google.%(tld)s/search?hl=%(lang)s&q=%(query)s&" \
|
|
42
|
+
"start=%(start)d&tbs=%(tbs)s&safe=%(safe)s&" \
|
|
43
|
+
"cr=%(country)s"
|
|
44
|
+
url_search_num = "https://www.google.%(tld)s/search?hl=%(lang)s&q=%(query)s&" \
|
|
45
|
+
"num=%(num)d&btnG=Google+Search&tbs=%(tbs)s&safe=%(safe)s&" \
|
|
46
|
+
"cr=%(country)s"
|
|
47
|
+
url_next_page_num = "https://www.google.%(tld)s/search?hl=%(lang)s&" \
|
|
48
|
+
"q=%(query)s&num=%(num)d&start=%(start)d&tbs=%(tbs)s&" \
|
|
49
|
+
"safe=%(safe)s&cr=%(country)s"
|
|
50
|
+
url_parameters = (
|
|
51
|
+
'hl', 'q', 'num', 'btnG', 'start', 'tbs', 'safe', 'cr')
|
|
52
|
+
|
|
53
|
+
# Cookie jar. Stored at the user's home folder.
|
|
54
|
+
# If the cookie jar is inaccessible, the errors are ignored.
|
|
55
|
+
home_folder = os.getenv('HOME')
|
|
56
|
+
if not home_folder:
|
|
57
|
+
home_folder = os.getenv('USERHOME')
|
|
58
|
+
if not home_folder:
|
|
59
|
+
home_folder = '.' # Use the current folder on error.
|
|
60
|
+
cookie_jar = LWPCookieJar(os.path.join(home_folder, '.google-cookie'))
|
|
61
|
+
try:
|
|
62
|
+
cookie_jar.load()
|
|
63
|
+
except Exception:
|
|
64
|
+
pass
|
|
65
|
+
|
|
66
|
+
# Default user agent, unless instructed by the user to change it.
|
|
67
|
+
USER_AGENT = 'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.0)'
|
|
68
|
+
|
|
69
|
+
try:
|
|
70
|
+
install_folder = os.path.abspath(os.path.split(__file__)[0])
|
|
71
|
+
try:
|
|
72
|
+
user_agents_file = os.path.join(install_folder, 'user_agents.txt.gz')
|
|
73
|
+
import gzip
|
|
74
|
+
fp = gzip.open(user_agents_file, 'rb')
|
|
75
|
+
try:
|
|
76
|
+
user_agents_list = [_.strip() for _ in fp.readlines()]
|
|
77
|
+
finally:
|
|
78
|
+
fp.close()
|
|
79
|
+
del fp
|
|
80
|
+
except Exception:
|
|
81
|
+
user_agents_file = os.path.join(install_folder, 'user_agents.txt')
|
|
82
|
+
with open(user_agents_file) as fp:
|
|
83
|
+
user_agents_list = [_.strip() for _ in fp.readlines()]
|
|
84
|
+
except Exception:
|
|
85
|
+
user_agents_list = [USER_AGENT]
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
# Get a random user agent.
|
|
89
|
+
def get_random_user_agent():
|
|
90
|
+
"""
|
|
91
|
+
Get a random user agent string.
|
|
92
|
+
|
|
93
|
+
:rtype: str
|
|
94
|
+
:return: Random user agent string.
|
|
95
|
+
"""
|
|
96
|
+
return random.choice(user_agents_list)
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
# Helper function to format the tbs parameter.
|
|
100
|
+
def get_tbs(from_date, to_date):
|
|
101
|
+
"""
|
|
102
|
+
Helper function to format the tbs parameter.
|
|
103
|
+
|
|
104
|
+
:param datetime.date from_date: Python date object.
|
|
105
|
+
:param datetime.date to_date: Python date object.
|
|
106
|
+
|
|
107
|
+
:rtype: str
|
|
108
|
+
:return: Dates encoded in tbs format.
|
|
109
|
+
"""
|
|
110
|
+
from_date = from_date.strftime('%m/%d/%Y')
|
|
111
|
+
to_date = to_date.strftime('%m/%d/%Y')
|
|
112
|
+
return 'cdr:1,cd_min:%(from_date)s,cd_max:%(to_date)s' % vars()
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
# Request the given URL and return the response page, using the cookie jar.
|
|
116
|
+
# If the cookie jar is inaccessible, the errors are ignored.
|
|
117
|
+
def get_page(url, user_agent=None, verify_ssl=True):
|
|
118
|
+
"""
|
|
119
|
+
Request the given URL and return the response page, using the cookie jar.
|
|
120
|
+
|
|
121
|
+
:param str url: URL to retrieve.
|
|
122
|
+
:param str user_agent: User agent for the HTTP requests.
|
|
123
|
+
Use None for the default.
|
|
124
|
+
:param bool verify_ssl: Verify the SSL certificate to prevent
|
|
125
|
+
traffic interception attacks. Defaults to True.
|
|
126
|
+
|
|
127
|
+
:rtype: str
|
|
128
|
+
:return: Web page retrieved for the given URL.
|
|
129
|
+
|
|
130
|
+
:raises IOError: An exception is raised on error.
|
|
131
|
+
:raises urllib2.URLError: An exception is raised on error.
|
|
132
|
+
:raises urllib2.HTTPError: An exception is raised on error.
|
|
133
|
+
"""
|
|
134
|
+
if user_agent is None:
|
|
135
|
+
user_agent = USER_AGENT
|
|
136
|
+
request = Request(url)
|
|
137
|
+
request.add_header('User-Agent', user_agent)
|
|
138
|
+
cookie_jar.add_cookie_header(request)
|
|
139
|
+
if verify_ssl:
|
|
140
|
+
response = urlopen(request)
|
|
141
|
+
else:
|
|
142
|
+
context = ssl._create_unverified_context()
|
|
143
|
+
response = urlopen(request, context=context)
|
|
144
|
+
cookie_jar.extract_cookies(response, request)
|
|
145
|
+
html = response.read()
|
|
146
|
+
response.close()
|
|
147
|
+
try:
|
|
148
|
+
cookie_jar.save()
|
|
149
|
+
except Exception:
|
|
150
|
+
pass
|
|
151
|
+
return html
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
# Filter links found in the Google result pages HTML code.
|
|
155
|
+
# Returns None if the link doesn't yield a valid result.
|
|
156
|
+
def filter_result(link):
|
|
157
|
+
try:
|
|
158
|
+
|
|
159
|
+
# Decode hidden URLs.
|
|
160
|
+
if link.startswith('/url?'):
|
|
161
|
+
o = urlparse(link, 'http')
|
|
162
|
+
link = parse_qs(o.query)['q'][0]
|
|
163
|
+
|
|
164
|
+
# Valid results are absolute URLs not pointing to a Google domain,
|
|
165
|
+
# like images.google.com or googleusercontent.com for example.
|
|
166
|
+
# TODO this could be improved!
|
|
167
|
+
o = urlparse(link, 'http')
|
|
168
|
+
if o.netloc and 'google' not in o.netloc:
|
|
169
|
+
return link
|
|
170
|
+
|
|
171
|
+
# On error, return None.
|
|
172
|
+
except Exception:
|
|
173
|
+
pass
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
# Returns a generator that yields URLs.
|
|
177
|
+
def search(query, tld='com', lang='en', tbs='0', safe='off', num=10, start=0,
|
|
178
|
+
stop=None, pause=2.0, country='', extra_params=None,
|
|
179
|
+
user_agent=None, verify_ssl=True):
|
|
180
|
+
"""
|
|
181
|
+
Search the given query string using Google.
|
|
182
|
+
|
|
183
|
+
:param str query: Query string. Must NOT be url-encoded.
|
|
184
|
+
:param str tld: Top level domain.
|
|
185
|
+
:param str lang: Language.
|
|
186
|
+
:param str tbs: Time limits (i.e "qdr:h" => last hour,
|
|
187
|
+
"qdr:d" => last 24 hours, "qdr:m" => last month).
|
|
188
|
+
:param str safe: Safe search.
|
|
189
|
+
:param int num: Number of results per page.
|
|
190
|
+
:param int start: First result to retrieve.
|
|
191
|
+
:param int stop: Last result to retrieve.
|
|
192
|
+
Use None to keep searching forever.
|
|
193
|
+
:param float pause: Lapse to wait between HTTP requests.
|
|
194
|
+
A lapse too long will make the search slow, but a lapse too short may
|
|
195
|
+
cause Google to block your IP. Your mileage may vary!
|
|
196
|
+
:param str country: Country or region to focus the search on. Similar to
|
|
197
|
+
changing the TLD, but does not yield exactly the same results.
|
|
198
|
+
Only Google knows why...
|
|
199
|
+
:param dict extra_params: A dictionary of extra HTTP GET
|
|
200
|
+
parameters, which must be URL encoded. For example if you don't want
|
|
201
|
+
Google to filter similar results you can set the extra_params to
|
|
202
|
+
{'filter': '0'} which will append '&filter=0' to every query.
|
|
203
|
+
:param str user_agent: User agent for the HTTP requests.
|
|
204
|
+
Use None for the default.
|
|
205
|
+
:param bool verify_ssl: Verify the SSL certificate to prevent
|
|
206
|
+
traffic interception attacks. Defaults to True.
|
|
207
|
+
|
|
208
|
+
:rtype: generator of str
|
|
209
|
+
:return: Generator (iterator) that yields found URLs.
|
|
210
|
+
If the stop parameter is None the iterator will loop forever.
|
|
211
|
+
"""
|
|
212
|
+
# Set of hashes for the results found.
|
|
213
|
+
# This is used to avoid repeated results.
|
|
214
|
+
hashes = set()
|
|
215
|
+
|
|
216
|
+
# Count the number of links yielded.
|
|
217
|
+
count = 0
|
|
218
|
+
|
|
219
|
+
# Prepare the search string.
|
|
220
|
+
query = quote_plus(query)
|
|
221
|
+
|
|
222
|
+
# If no extra_params is given, create an empty dictionary.
|
|
223
|
+
# We should avoid using an empty dictionary as a default value
|
|
224
|
+
# in a function parameter in Python.
|
|
225
|
+
if not extra_params:
|
|
226
|
+
extra_params = {}
|
|
227
|
+
|
|
228
|
+
# Check extra_params for overlapping.
|
|
229
|
+
for builtin_param in url_parameters:
|
|
230
|
+
if builtin_param in extra_params.keys():
|
|
231
|
+
raise ValueError(
|
|
232
|
+
'GET parameter "%s" is overlapping with \
|
|
233
|
+
the built-in GET parameter',
|
|
234
|
+
builtin_param
|
|
235
|
+
)
|
|
236
|
+
|
|
237
|
+
# Grab the cookie from the home page.
|
|
238
|
+
get_page(url_home % vars(), user_agent, verify_ssl)
|
|
239
|
+
|
|
240
|
+
# Prepare the URL of the first request.
|
|
241
|
+
if start:
|
|
242
|
+
if num == 10:
|
|
243
|
+
url = url_next_page % vars()
|
|
244
|
+
else:
|
|
245
|
+
url = url_next_page_num % vars()
|
|
246
|
+
else:
|
|
247
|
+
if num == 10:
|
|
248
|
+
url = url_search % vars()
|
|
249
|
+
else:
|
|
250
|
+
url = url_search_num % vars()
|
|
251
|
+
|
|
252
|
+
# Loop until we reach the maximum result, if any (otherwise, loop forever).
|
|
253
|
+
while not stop or count < stop:
|
|
254
|
+
|
|
255
|
+
# Remeber last count to detect the end of results.
|
|
256
|
+
last_count = count
|
|
257
|
+
|
|
258
|
+
# Append extra GET parameters to the URL.
|
|
259
|
+
# This is done on every iteration because we're
|
|
260
|
+
# rebuilding the entire URL at the end of this loop.
|
|
261
|
+
for k, v in extra_params.items():
|
|
262
|
+
k = quote_plus(k)
|
|
263
|
+
v = quote_plus(v)
|
|
264
|
+
url = url + ('&%s=%s' % (k, v))
|
|
265
|
+
|
|
266
|
+
# Sleep between requests.
|
|
267
|
+
# Keeps Google from banning you for making too many requests.
|
|
268
|
+
time.sleep(pause)
|
|
269
|
+
|
|
270
|
+
# Request the Google Search results page.
|
|
271
|
+
html = get_page(url, user_agent, verify_ssl)
|
|
272
|
+
|
|
273
|
+
# Parse the response and get every anchored URL.
|
|
274
|
+
if is_bs4:
|
|
275
|
+
soup = BeautifulSoup(html, 'html.parser')
|
|
276
|
+
else:
|
|
277
|
+
soup = BeautifulSoup(html)
|
|
278
|
+
try:
|
|
279
|
+
anchors = soup.find(id='search').findAll('a')
|
|
280
|
+
# Sometimes (depending on the User-agent) there is
|
|
281
|
+
# no id "search" in html response...
|
|
282
|
+
except AttributeError:
|
|
283
|
+
# Remove links of the top bar.
|
|
284
|
+
gbar = soup.find(id='gbar')
|
|
285
|
+
if gbar:
|
|
286
|
+
gbar.clear()
|
|
287
|
+
anchors = soup.findAll('a')
|
|
288
|
+
|
|
289
|
+
# Process every anchored URL.
|
|
290
|
+
for a in anchors:
|
|
291
|
+
|
|
292
|
+
# Get the URL from the anchor tag.
|
|
293
|
+
try:
|
|
294
|
+
link = a['href']
|
|
295
|
+
except KeyError:
|
|
296
|
+
continue
|
|
297
|
+
|
|
298
|
+
# Filter invalid links and links pointing to Google itself.
|
|
299
|
+
link = filter_result(link)
|
|
300
|
+
if not link:
|
|
301
|
+
continue
|
|
302
|
+
|
|
303
|
+
# Discard repeated results.
|
|
304
|
+
h = hash(link)
|
|
305
|
+
if h in hashes:
|
|
306
|
+
continue
|
|
307
|
+
hashes.add(h)
|
|
308
|
+
|
|
309
|
+
# Yield the result.
|
|
310
|
+
yield link
|
|
311
|
+
|
|
312
|
+
# Increase the results counter.
|
|
313
|
+
# If we reached the limit, stop.
|
|
314
|
+
count += 1
|
|
315
|
+
if stop and count >= stop:
|
|
316
|
+
return
|
|
317
|
+
|
|
318
|
+
# End if there are no more results.
|
|
319
|
+
# XXX TODO review this logic, not sure if this is still true!
|
|
320
|
+
if last_count == count:
|
|
321
|
+
break
|
|
322
|
+
|
|
323
|
+
# Prepare the URL for the next request.
|
|
324
|
+
start += num
|
|
325
|
+
if num == 10:
|
|
326
|
+
url = url_next_page % vars()
|
|
327
|
+
else:
|
|
328
|
+
url = url_next_page_num % vars()
|
|
329
|
+
|
|
330
|
+
|
|
331
|
+
# Shortcut to single-item search.
|
|
332
|
+
# Evaluates the iterator to return the single URL as a string.
|
|
333
|
+
def lucky(*args, **kwargs):
|
|
334
|
+
"""
|
|
335
|
+
Shortcut to single-item search.
|
|
336
|
+
|
|
337
|
+
Same arguments as the main search function, but the return value changes.
|
|
338
|
+
|
|
339
|
+
:rtype: str
|
|
340
|
+
:return: URL found by Google.
|
|
341
|
+
"""
|
|
342
|
+
return next(search(*args, **kwargs))
|
|
@@ -5,16 +5,16 @@ from .DWEBS import *
|
|
|
5
5
|
from .transcriber import transcriber
|
|
6
6
|
from .voice import play_audio
|
|
7
7
|
from .websx_search import WEBSX
|
|
8
|
-
|
|
9
8
|
from .LLM import VLM, LLM
|
|
10
9
|
from .YTdownloader import *
|
|
11
|
-
|
|
10
|
+
from .GoogleS import *
|
|
12
11
|
import g4f
|
|
13
12
|
from .YTdownloader import *
|
|
14
13
|
from .Provider import *
|
|
15
14
|
from .Extra import gguf
|
|
16
15
|
from .Extra import autollama
|
|
17
16
|
from .Extra import weather_ascii, weather
|
|
17
|
+
|
|
18
18
|
__repo__ = "https://github.com/OE-LUCIFER/Webscout"
|
|
19
19
|
|
|
20
20
|
webai = [
|
|
@@ -58,12 +58,15 @@ __all__ = [
|
|
|
58
58
|
"WEBS",
|
|
59
59
|
"AsyncWEBS",
|
|
60
60
|
"__version__",
|
|
61
|
-
"
|
|
61
|
+
"DWEBS",
|
|
62
62
|
"transcriber",
|
|
63
63
|
"play_audio",
|
|
64
64
|
"TempMailClient",
|
|
65
65
|
"TemporaryPhoneNumber",
|
|
66
66
|
"LLM",
|
|
67
|
+
"YTdownloader",
|
|
68
|
+
"WEBSX",
|
|
69
|
+
"VLM",
|
|
67
70
|
# Localai models and utilities
|
|
68
71
|
# "Model",
|
|
69
72
|
# "Thread",
|