webscout 6.3__py3-none-any.whl → 6.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of webscout might be problematic. Click here for more details.
- webscout/AIauto.py +191 -176
- webscout/AIbase.py +0 -197
- webscout/AIutel.py +488 -1130
- webscout/Bing_search.py +250 -153
- webscout/DWEBS.py +151 -19
- webscout/Extra/__init__.py +2 -1
- webscout/Extra/autocoder/__init__.py +9 -0
- webscout/Extra/autocoder/autocoder_utiles.py +121 -0
- webscout/Extra/autocoder/rawdog.py +681 -0
- webscout/Extra/autollama.py +246 -195
- webscout/Extra/gguf.py +441 -416
- webscout/LLM.py +206 -43
- webscout/Litlogger/__init__.py +681 -0
- webscout/Provider/DARKAI.py +1 -1
- webscout/Provider/EDITEE.py +1 -1
- webscout/Provider/NinjaChat.py +1 -1
- webscout/Provider/PI.py +221 -207
- webscout/Provider/Perplexity.py +598 -598
- webscout/Provider/RoboCoders.py +206 -0
- webscout/Provider/TTI/AiForce/__init__.py +22 -0
- webscout/Provider/TTI/AiForce/async_aiforce.py +257 -0
- webscout/Provider/TTI/AiForce/sync_aiforce.py +242 -0
- webscout/Provider/TTI/Nexra/__init__.py +22 -0
- webscout/Provider/TTI/Nexra/async_nexra.py +286 -0
- webscout/Provider/TTI/Nexra/sync_nexra.py +258 -0
- webscout/Provider/TTI/PollinationsAI/__init__.py +23 -0
- webscout/Provider/TTI/PollinationsAI/async_pollinations.py +330 -0
- webscout/Provider/TTI/PollinationsAI/sync_pollinations.py +285 -0
- webscout/Provider/TTI/__init__.py +2 -4
- webscout/Provider/TTI/artbit/__init__.py +22 -0
- webscout/Provider/TTI/artbit/async_artbit.py +184 -0
- webscout/Provider/TTI/artbit/sync_artbit.py +176 -0
- webscout/Provider/TTI/blackbox/__init__.py +4 -0
- webscout/Provider/TTI/blackbox/async_blackbox.py +212 -0
- webscout/Provider/TTI/{blackboximage.py → blackbox/sync_blackbox.py} +199 -153
- webscout/Provider/TTI/deepinfra/__init__.py +4 -0
- webscout/Provider/TTI/deepinfra/async_deepinfra.py +227 -0
- webscout/Provider/TTI/deepinfra/sync_deepinfra.py +199 -0
- webscout/Provider/TTI/huggingface/__init__.py +22 -0
- webscout/Provider/TTI/huggingface/async_huggingface.py +199 -0
- webscout/Provider/TTI/huggingface/sync_huggingface.py +195 -0
- webscout/Provider/TTI/imgninza/__init__.py +4 -0
- webscout/Provider/TTI/imgninza/async_ninza.py +214 -0
- webscout/Provider/TTI/{imgninza.py → imgninza/sync_ninza.py} +209 -136
- webscout/Provider/TTI/talkai/__init__.py +4 -0
- webscout/Provider/TTI/talkai/async_talkai.py +229 -0
- webscout/Provider/TTI/talkai/sync_talkai.py +207 -0
- webscout/Provider/__init__.py +146 -139
- webscout/Provider/askmyai.py +2 -2
- webscout/Provider/cerebras.py +227 -219
- webscout/Provider/llama3mitril.py +0 -1
- webscout/Provider/mhystical.py +176 -0
- webscout/Provider/perplexitylabs.py +265 -0
- webscout/Provider/twitterclone.py +251 -245
- webscout/Provider/typegpt.py +359 -0
- webscout/__init__.py +28 -23
- webscout/__main__.py +5 -5
- webscout/cli.py +252 -280
- webscout/conversation.py +227 -0
- webscout/exceptions.py +161 -29
- webscout/litagent/__init__.py +172 -0
- webscout/litprinter/__init__.py +831 -0
- webscout/optimizers.py +270 -0
- webscout/prompt_manager.py +279 -0
- webscout/swiftcli/__init__.py +810 -0
- webscout/transcriber.py +479 -551
- webscout/update_checker.py +125 -0
- webscout/version.py +1 -1
- {webscout-6.3.dist-info → webscout-6.4.dist-info}/METADATA +26 -45
- {webscout-6.3.dist-info → webscout-6.4.dist-info}/RECORD +75 -45
- webscout/Provider/TTI/AIuncensoredimage.py +0 -103
- webscout/Provider/TTI/Nexra.py +0 -120
- webscout/Provider/TTI/PollinationsAI.py +0 -138
- webscout/Provider/TTI/WebSimAI.py +0 -142
- webscout/Provider/TTI/aiforce.py +0 -160
- webscout/Provider/TTI/artbit.py +0 -141
- webscout/Provider/TTI/deepinfra.py +0 -148
- webscout/Provider/TTI/huggingface.py +0 -155
- webscout/Provider/TTI/talkai.py +0 -116
- webscout/models.py +0 -23
- /webscout/{g4f.py → gpt4free.py} +0 -0
- {webscout-6.3.dist-info → webscout-6.4.dist-info}/LICENSE.md +0 -0
- {webscout-6.3.dist-info → webscout-6.4.dist-info}/WHEEL +0 -0
- {webscout-6.3.dist-info → webscout-6.4.dist-info}/entry_points.txt +0 -0
- {webscout-6.3.dist-info → webscout-6.4.dist-info}/top_level.txt +0 -0
webscout/Bing_search.py
CHANGED
|
@@ -1,154 +1,251 @@
|
|
|
1
|
-
from bs4 import BeautifulSoup
|
|
2
|
-
import requests
|
|
3
|
-
from typing import Dict, List, Optional, Union
|
|
4
|
-
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
5
|
-
from urllib.parse import quote, urlparse, parse_qs
|
|
6
|
-
import base64
|
|
7
|
-
import urllib3
|
|
8
|
-
|
|
9
|
-
# Disable SSL warnings
|
|
10
|
-
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
|
11
|
-
|
|
12
|
-
class BingS:
|
|
13
|
-
"""
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
1
|
+
from bs4 import BeautifulSoup
|
|
2
|
+
import requests
|
|
3
|
+
from typing import Dict, List, Optional, Union
|
|
4
|
+
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
5
|
+
from urllib.parse import quote, urlparse, parse_qs
|
|
6
|
+
import base64
|
|
7
|
+
import urllib3
|
|
8
|
+
|
|
9
|
+
# Disable SSL warnings
|
|
10
|
+
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
|
11
|
+
|
|
12
|
+
class BingS:
|
|
13
|
+
"""A Python interface for Bing search engine.
|
|
14
|
+
|
|
15
|
+
The BingS class provides a simple interface to perform searches on Bing.com
|
|
16
|
+
and extract search results programmatically.
|
|
17
|
+
|
|
18
|
+
Basic Usage:
|
|
19
|
+
>>> from webscout.Bing_search import BingS
|
|
20
|
+
>>> searcher = BingS()
|
|
21
|
+
>>> results = searcher.search("Python programming")
|
|
22
|
+
>>> for result in results:
|
|
23
|
+
... print(result['title'], result['href'])
|
|
24
|
+
|
|
25
|
+
Advanced Usage:
|
|
26
|
+
>>> # With custom headers and proxy
|
|
27
|
+
>>> headers = {'User-Agent': 'Custom User Agent'}
|
|
28
|
+
>>> proxy = 'http://proxy.example.com:8080'
|
|
29
|
+
>>> searcher = BingS(headers=headers, proxy=proxy)
|
|
30
|
+
>>> results = searcher.search(
|
|
31
|
+
... "AI developments",
|
|
32
|
+
... max_results=5,
|
|
33
|
+
... extract_webpage_text=True,
|
|
34
|
+
... max_extract_characters=1000
|
|
35
|
+
... )
|
|
36
|
+
>>> # Access result fields
|
|
37
|
+
>>> for result in results:
|
|
38
|
+
... print(f"Title: {result['title']}")
|
|
39
|
+
... print(f"URL: {result['href']}")
|
|
40
|
+
... print(f"Description: {result['abstract']}")
|
|
41
|
+
... if result['visible_text']:
|
|
42
|
+
... print(f"Page Content: {result['visible_text'][:100]}...")
|
|
43
|
+
|
|
44
|
+
The class supports context management protocol:
|
|
45
|
+
>>> with BingS() as searcher:
|
|
46
|
+
... results = searcher.search("Python tutorials")
|
|
47
|
+
|
|
48
|
+
Return Dictionary Format:
|
|
49
|
+
{
|
|
50
|
+
'title': str, # The title of the search result
|
|
51
|
+
'href': str, # The URL of the search result
|
|
52
|
+
'abstract': str, # Brief description or snippet
|
|
53
|
+
'index': int, # Position in search results
|
|
54
|
+
'type': str, # Type of result (always 'web')
|
|
55
|
+
'visible_text': str # Extracted webpage text (if requested)
|
|
56
|
+
}
|
|
57
|
+
"""
|
|
58
|
+
|
|
59
|
+
_executor: ThreadPoolExecutor = ThreadPoolExecutor(max_workers=10)
|
|
60
|
+
|
|
61
|
+
def __init__(
|
|
62
|
+
self,
|
|
63
|
+
headers: Optional[Dict[str, str]] = None,
|
|
64
|
+
proxy: Optional[str] = None,
|
|
65
|
+
timeout: Optional[int] = 10,
|
|
66
|
+
) -> None:
|
|
67
|
+
"""Initialize a new BingS instance.
|
|
68
|
+
|
|
69
|
+
Args:
|
|
70
|
+
headers (Optional[Dict[str, str]]): Custom HTTP headers for requests.
|
|
71
|
+
Defaults to a standard User-Agent if not provided.
|
|
72
|
+
proxy (Optional[str]): Proxy URL to use for requests.
|
|
73
|
+
Example: 'http://proxy.example.com:8080'
|
|
74
|
+
timeout (Optional[int]): Request timeout in seconds. Defaults to 10.
|
|
75
|
+
|
|
76
|
+
Example:
|
|
77
|
+
>>> searcher = BingS(
|
|
78
|
+
... headers={'User-Agent': 'Custom UA'},
|
|
79
|
+
... proxy='http://proxy.example.com:8080',
|
|
80
|
+
... timeout=15
|
|
81
|
+
... )
|
|
82
|
+
"""
|
|
83
|
+
self.proxy: Optional[str] = proxy
|
|
84
|
+
self.headers = headers if headers else {
|
|
85
|
+
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
|
|
86
|
+
}
|
|
87
|
+
self.headers["Referer"] = "https://www.bing.com/"
|
|
88
|
+
self.client = requests.Session()
|
|
89
|
+
self.client.headers.update(self.headers)
|
|
90
|
+
self.client.proxies.update({"http": self.proxy, "https": self.proxy})
|
|
91
|
+
self.timeout = timeout
|
|
92
|
+
|
|
93
|
+
def __enter__(self) -> "BingS":
|
|
94
|
+
return self
|
|
95
|
+
|
|
96
|
+
def __exit__(self, exc_type, exc_val, exc_tb):
|
|
97
|
+
self.client.close()
|
|
98
|
+
|
|
99
|
+
def _get_url(
|
|
100
|
+
self,
|
|
101
|
+
method: str,
|
|
102
|
+
url: str,
|
|
103
|
+
params: Optional[Dict[str, str]] = None,
|
|
104
|
+
data: Optional[Union[Dict[str, str], bytes]] = None,
|
|
105
|
+
) -> bytes:
|
|
106
|
+
try:
|
|
107
|
+
resp = self.client.request(method, url, params=params, data=data, timeout=self.timeout, verify=False)
|
|
108
|
+
except Exception as ex:
|
|
109
|
+
raise Exception(f"{url} {type(ex).__name__}: {ex}") from ex
|
|
110
|
+
if resp.status_code == 200:
|
|
111
|
+
return resp.content
|
|
112
|
+
raise Exception(f"{resp.url} returned status code {resp.status_code}. {params=} {data=}")
|
|
113
|
+
|
|
114
|
+
def extract_text_from_webpage(self, html_content, max_characters=None):
|
|
115
|
+
"""Extracts visible text from HTML content using BeautifulSoup."""
|
|
116
|
+
soup = BeautifulSoup(html_content, "html.parser")
|
|
117
|
+
# Remove unwanted tags
|
|
118
|
+
for tag in soup(["script", "style", "header", "footer", "nav"]):
|
|
119
|
+
tag.extract()
|
|
120
|
+
# Get the remaining visible text
|
|
121
|
+
visible_text = soup.get_text(separator=' ', strip=True)
|
|
122
|
+
if max_characters:
|
|
123
|
+
visible_text = visible_text[:max_characters]
|
|
124
|
+
return visible_text
|
|
125
|
+
|
|
126
|
+
def search(
|
|
127
|
+
self,
|
|
128
|
+
keywords: str,
|
|
129
|
+
max_results: Optional[int] = 10,
|
|
130
|
+
extract_webpage_text: bool = False,
|
|
131
|
+
max_extract_characters: Optional[int] = 100,
|
|
132
|
+
) -> List[Dict[str, str]]:
|
|
133
|
+
"""Perform a Bing search and return results.
|
|
134
|
+
|
|
135
|
+
Args:
|
|
136
|
+
keywords (str): Search query string.
|
|
137
|
+
max_results (Optional[int]): Maximum number of results to return.
|
|
138
|
+
Defaults to 10.
|
|
139
|
+
extract_webpage_text (bool): If True, fetches and extracts text from
|
|
140
|
+
each result webpage. Defaults to False.
|
|
141
|
+
max_extract_characters (Optional[int]): Maximum number of characters
|
|
142
|
+
to extract from each webpage. Only used if extract_webpage_text
|
|
143
|
+
is True. Defaults to 100.
|
|
144
|
+
|
|
145
|
+
Returns:
|
|
146
|
+
List[Dict[str, str]]: List of search results. Each result contains:
|
|
147
|
+
- title: The title of the search result
|
|
148
|
+
- href: The URL of the search result
|
|
149
|
+
- abstract: Brief description or snippet
|
|
150
|
+
- index: Position in search results
|
|
151
|
+
- type: Type of result (always 'web')
|
|
152
|
+
- visible_text: Extracted webpage text (if extract_webpage_text=True)
|
|
153
|
+
|
|
154
|
+
Raises:
|
|
155
|
+
AssertionError: If keywords is empty.
|
|
156
|
+
Exception: If request fails or returns non-200 status code.
|
|
157
|
+
|
|
158
|
+
Example:
|
|
159
|
+
>>> searcher = BingS()
|
|
160
|
+
>>> results = searcher.search(
|
|
161
|
+
... "Python tutorials",
|
|
162
|
+
... max_results=5,
|
|
163
|
+
... extract_webpage_text=True
|
|
164
|
+
... )
|
|
165
|
+
>>> for result in results:
|
|
166
|
+
... print(f"Title: {result['title']}")
|
|
167
|
+
... print(f"URL: {result['href']}")
|
|
168
|
+
... print(f"Description: {result['abstract']}")
|
|
169
|
+
... if result['visible_text']:
|
|
170
|
+
... print(f"Content: {result['visible_text'][:100]}...")
|
|
171
|
+
"""
|
|
172
|
+
assert keywords, "keywords is mandatory"
|
|
173
|
+
|
|
174
|
+
results = []
|
|
175
|
+
futures = []
|
|
176
|
+
start = 1
|
|
177
|
+
while len(results) < max_results:
|
|
178
|
+
params = {
|
|
179
|
+
"q": keywords,
|
|
180
|
+
"first": start
|
|
181
|
+
}
|
|
182
|
+
futures.append(self._executor.submit(self._get_url, "GET", "https://www.bing.com/search", params=params))
|
|
183
|
+
start += 10
|
|
184
|
+
|
|
185
|
+
for future in as_completed(futures):
|
|
186
|
+
try:
|
|
187
|
+
resp_content = future.result()
|
|
188
|
+
soup = BeautifulSoup(resp_content, "html.parser")
|
|
189
|
+
result_block = soup.select('li.b_algo')
|
|
190
|
+
|
|
191
|
+
if not result_block:
|
|
192
|
+
break
|
|
193
|
+
|
|
194
|
+
for result in result_block:
|
|
195
|
+
try:
|
|
196
|
+
link = result.select_one('h2 a')
|
|
197
|
+
title = link.text if link else ""
|
|
198
|
+
url = link['href'] if link else ""
|
|
199
|
+
abstract = result.select_one('.b_caption p')
|
|
200
|
+
description = abstract.text if abstract else ""
|
|
201
|
+
|
|
202
|
+
# Remove "WEB" from the beginning of the description if it exists
|
|
203
|
+
if description.startswith("WEB"):
|
|
204
|
+
description = description[3:].strip()
|
|
205
|
+
|
|
206
|
+
visible_text = ""
|
|
207
|
+
if extract_webpage_text:
|
|
208
|
+
try:
|
|
209
|
+
actual_url = self._decode_bing_url(url)
|
|
210
|
+
page_content = self._get_url("GET", actual_url)
|
|
211
|
+
visible_text = self.extract_text_from_webpage(
|
|
212
|
+
page_content, max_characters=max_extract_characters
|
|
213
|
+
)
|
|
214
|
+
except Exception as e:
|
|
215
|
+
print(f"Error extracting text from {url}: {e}")
|
|
216
|
+
|
|
217
|
+
results.append({
|
|
218
|
+
"title": title,
|
|
219
|
+
"href": url,
|
|
220
|
+
"abstract": description,
|
|
221
|
+
"index": len(results),
|
|
222
|
+
"type": "web",
|
|
223
|
+
"visible_text": visible_text,
|
|
224
|
+
})
|
|
225
|
+
|
|
226
|
+
if len(results) >= max_results:
|
|
227
|
+
return results
|
|
228
|
+
|
|
229
|
+
except Exception as e:
|
|
230
|
+
print(f"Error extracting result: {e}")
|
|
231
|
+
|
|
232
|
+
except Exception as e:
|
|
233
|
+
print(f"Error fetching URL: {e}")
|
|
234
|
+
|
|
235
|
+
return results
|
|
236
|
+
|
|
237
|
+
def _decode_bing_url(self, url):
|
|
238
|
+
if 'bing.com/ck/a' in url:
|
|
239
|
+
parsed_url = urlparse(url)
|
|
240
|
+
query_params = parse_qs(parsed_url.query)
|
|
241
|
+
if 'u' in query_params:
|
|
242
|
+
encoded_url = query_params['u'][0]
|
|
243
|
+
return base64.b64decode(encoded_url).decode('utf-8')
|
|
244
|
+
return url
|
|
245
|
+
|
|
246
|
+
if __name__ == "__main__":
|
|
247
|
+
from rich import print
|
|
248
|
+
searcher = BingS()
|
|
249
|
+
results = searcher.search("Python development tools", max_results=5, extract_webpage_text=True, max_extract_characters=2000)
|
|
250
|
+
for result in results:
|
|
154
251
|
print(result)
|
webscout/DWEBS.py
CHANGED
|
@@ -3,19 +3,97 @@ import requests
|
|
|
3
3
|
from typing import Dict, List, Optional, Union, Any
|
|
4
4
|
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
5
5
|
from urllib.parse import quote, urljoin
|
|
6
|
-
|
|
6
|
+
|
|
7
7
|
import time
|
|
8
8
|
import random
|
|
9
9
|
import json
|
|
10
10
|
import os
|
|
11
11
|
from datetime import datetime, timedelta
|
|
12
12
|
from functools import lru_cache
|
|
13
|
-
import
|
|
13
|
+
from .Litlogger import LitLogger, LogFormat, ColorScheme
|
|
14
14
|
from tenacity import retry, stop_after_attempt, wait_exponential
|
|
15
15
|
|
|
16
16
|
class GoogleS:
|
|
17
|
-
"""
|
|
18
|
-
|
|
17
|
+
"""A Python interface for Google search with advanced features 🔥
|
|
18
|
+
|
|
19
|
+
The GoogleS class provides a powerful interface to perform web searches, image searches,
|
|
20
|
+
and advanced filtering on Google. Built with love by HAI to keep it 💯
|
|
21
|
+
|
|
22
|
+
Basic Usage:
|
|
23
|
+
>>> from webscout.DWEBS import GoogleS
|
|
24
|
+
>>> searcher = GoogleS()
|
|
25
|
+
>>> # Simple web search
|
|
26
|
+
>>> results = searcher.search("Python programming")
|
|
27
|
+
>>> for result in results:
|
|
28
|
+
... print(f"Title: {result['title']}")
|
|
29
|
+
... print(f"URL: {result['href']}")
|
|
30
|
+
... print(f"Description: {result['abstract']}")
|
|
31
|
+
|
|
32
|
+
Advanced Web Search:
|
|
33
|
+
>>> # Search with filters
|
|
34
|
+
>>> results = searcher.search(
|
|
35
|
+
... query="Python tutorials",
|
|
36
|
+
... site="github.com",
|
|
37
|
+
... file_type="pdf",
|
|
38
|
+
... time_period="month",
|
|
39
|
+
... max_results=5
|
|
40
|
+
... )
|
|
41
|
+
>>> # Example response format:
|
|
42
|
+
>>> {
|
|
43
|
+
... 'title': 'Python Tutorial',
|
|
44
|
+
... 'href': 'https://example.com/python-tutorial',
|
|
45
|
+
... 'abstract': 'Comprehensive Python tutorial covering basics to advanced topics',
|
|
46
|
+
... 'index': 0,
|
|
47
|
+
... 'type': 'web',
|
|
48
|
+
... 'visible_text': '' # Optional: Contains webpage text if extract_text=True
|
|
49
|
+
... }
|
|
50
|
+
|
|
51
|
+
Image Search:
|
|
52
|
+
>>> # Search for images
|
|
53
|
+
>>> images = searcher.search_images(
|
|
54
|
+
... query="cute puppies",
|
|
55
|
+
... size="large",
|
|
56
|
+
... color="color",
|
|
57
|
+
... type_filter="photo",
|
|
58
|
+
... max_results=5
|
|
59
|
+
... )
|
|
60
|
+
>>> # Example response format:
|
|
61
|
+
>>> {
|
|
62
|
+
... 'title': 'Cute Puppy Image',
|
|
63
|
+
... 'thumbnail': 'https://example.com/puppy-thumb.jpg',
|
|
64
|
+
... 'full_url': 'https://example.com/puppy-full.jpg',
|
|
65
|
+
... 'type': 'image'
|
|
66
|
+
... }
|
|
67
|
+
|
|
68
|
+
Features:
|
|
69
|
+
- Web Search: Get detailed web results with title, URL, and description
|
|
70
|
+
- Image Search: Find images with thumbnails and full-resolution URLs
|
|
71
|
+
- Advanced Filters: Site-specific search, file types, time periods
|
|
72
|
+
- Rate Limiting: Smart request handling to avoid blocks
|
|
73
|
+
- Caching: Save results for faster repeat searches
|
|
74
|
+
- Retry Logic: Automatic retry on temporary failures
|
|
75
|
+
- Logging: Optional LitLogger integration for beautiful console output
|
|
76
|
+
- Proxy Support: Use custom proxies for requests
|
|
77
|
+
- Concurrent Processing: Multi-threaded requests for better performance
|
|
78
|
+
|
|
79
|
+
Response Format:
|
|
80
|
+
Web Search Results:
|
|
81
|
+
{
|
|
82
|
+
'title': str, # Title of the webpage
|
|
83
|
+
'href': str, # URL of the webpage
|
|
84
|
+
'abstract': str, # Brief description or snippet
|
|
85
|
+
'index': int, # Result position
|
|
86
|
+
'type': 'web', # Result type identifier
|
|
87
|
+
'visible_text': str # Full page text (if extract_text=True)
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
Image Search Results:
|
|
91
|
+
{
|
|
92
|
+
'title': str, # Image title or description
|
|
93
|
+
'thumbnail': str, # Thumbnail image URL
|
|
94
|
+
'full_url': str, # Full resolution image URL
|
|
95
|
+
'type': 'image' # Result type identifier
|
|
96
|
+
}
|
|
19
97
|
"""
|
|
20
98
|
|
|
21
99
|
SEARCH_TYPES = {
|
|
@@ -31,7 +109,8 @@ class GoogleS:
|
|
|
31
109
|
timeout: Optional[int] = 10,
|
|
32
110
|
max_workers: int = 20,
|
|
33
111
|
cache_dir: Optional[str] = None,
|
|
34
|
-
rate_limit: float = 0.01
|
|
112
|
+
rate_limit: float = 0.01,
|
|
113
|
+
use_litlogger: bool = False
|
|
35
114
|
):
|
|
36
115
|
"""
|
|
37
116
|
Initialize the GoogleS object with enhanced features.
|
|
@@ -39,6 +118,7 @@ class GoogleS:
|
|
|
39
118
|
Args:
|
|
40
119
|
cache_dir: Directory to store search result cache
|
|
41
120
|
rate_limit: Minimum time between requests in seconds
|
|
121
|
+
use_litlogger: Whether to use LitLogger for logging (default: False)
|
|
42
122
|
"""
|
|
43
123
|
self.proxy = proxy
|
|
44
124
|
self.headers = headers if headers else {
|
|
@@ -56,10 +136,16 @@ class GoogleS:
|
|
|
56
136
|
os.makedirs(cache_dir)
|
|
57
137
|
self.last_request_time = 0
|
|
58
138
|
self.rate_limit = rate_limit
|
|
139
|
+
self.use_litlogger = use_litlogger
|
|
59
140
|
|
|
60
|
-
# Setup logging
|
|
61
|
-
|
|
62
|
-
|
|
141
|
+
# Setup enhanced logging with LitLogger if enabled
|
|
142
|
+
if self.use_litlogger:
|
|
143
|
+
self.logger = LitLogger(
|
|
144
|
+
name="GoogleS",
|
|
145
|
+
format=LogFormat.MODERN_EMOJI,
|
|
146
|
+
color_scheme=ColorScheme.CYBERPUNK,
|
|
147
|
+
console_output=True
|
|
148
|
+
)
|
|
63
149
|
|
|
64
150
|
def _respect_rate_limit(self):
|
|
65
151
|
"""Ensure minimum time between requests"""
|
|
@@ -77,11 +163,16 @@ class GoogleS:
|
|
|
77
163
|
"""
|
|
78
164
|
self._respect_rate_limit()
|
|
79
165
|
try:
|
|
166
|
+
if self.use_litlogger:
|
|
167
|
+
self.logger.debug(f"Making {method} request to {url}")
|
|
80
168
|
resp = self.client.request(method, url, params=params, data=data, timeout=self.timeout)
|
|
81
169
|
resp.raise_for_status()
|
|
170
|
+
if self.use_litlogger:
|
|
171
|
+
self.logger.success(f"Request successful: {resp.status_code}")
|
|
82
172
|
return resp.content
|
|
83
173
|
except requests.exceptions.RequestException as ex:
|
|
84
|
-
self.
|
|
174
|
+
if self.use_litlogger:
|
|
175
|
+
self.logger.error(f"Request failed: {url} - {str(ex)}")
|
|
85
176
|
raise
|
|
86
177
|
|
|
87
178
|
@lru_cache(maxsize=100)
|
|
@@ -99,7 +190,11 @@ class GoogleS:
|
|
|
99
190
|
with open(cache_file, 'r') as f:
|
|
100
191
|
cached_data = json.load(f)
|
|
101
192
|
if datetime.fromisoformat(cached_data['timestamp']) + timedelta(hours=24) > datetime.now():
|
|
193
|
+
if self.use_litlogger:
|
|
194
|
+
self.logger.info(f"Using cached results for: {cache_key}")
|
|
102
195
|
return cached_data['results']
|
|
196
|
+
if self.use_litlogger:
|
|
197
|
+
self.logger.debug(f"No valid cache found for: {cache_key}")
|
|
103
198
|
return None
|
|
104
199
|
|
|
105
200
|
def _cache_results(self, cache_key: str, results: List[Dict[str, Any]]):
|
|
@@ -117,18 +212,49 @@ class GoogleS:
|
|
|
117
212
|
self,
|
|
118
213
|
query: str,
|
|
119
214
|
max_results: int = 10,
|
|
120
|
-
size: Optional[str] = None,
|
|
121
|
-
color: Optional[str] = None,
|
|
122
|
-
type_filter: Optional[str] = None,
|
|
215
|
+
size: Optional[str] = None,
|
|
216
|
+
color: Optional[str] = None,
|
|
217
|
+
type_filter: Optional[str] = None,
|
|
123
218
|
**kwargs
|
|
124
219
|
) -> List[Dict[str, str]]:
|
|
125
|
-
"""
|
|
126
|
-
|
|
127
|
-
|
|
220
|
+
"""Search for images on Google with style! 🖼️
|
|
221
|
+
|
|
128
222
|
Args:
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
223
|
+
query (str): What you're looking for fam
|
|
224
|
+
max_results (int): How many results you want (default: 10)
|
|
225
|
+
size (Optional[str]): Image size filter
|
|
226
|
+
- 'large': Big pics
|
|
227
|
+
- 'medium': Medium sized
|
|
228
|
+
- 'icon': Small icons
|
|
229
|
+
color (Optional[str]): Color filter
|
|
230
|
+
- 'color': Full color
|
|
231
|
+
- 'gray': Black and white
|
|
232
|
+
- 'transparent': Transparent background
|
|
233
|
+
type_filter (Optional[str]): Type of image
|
|
234
|
+
- 'face': Just faces
|
|
235
|
+
- 'photo': Real photos
|
|
236
|
+
- 'clipart': Vector art
|
|
237
|
+
- 'lineart': Line drawings
|
|
238
|
+
|
|
239
|
+
Returns:
|
|
240
|
+
List[Dict[str, str]]: List of image results with these keys:
|
|
241
|
+
- 'thumbnail': Small preview URL
|
|
242
|
+
- 'full_url': Full resolution image URL
|
|
243
|
+
- 'title': Image title/description
|
|
244
|
+
- 'type': Always 'image'
|
|
245
|
+
|
|
246
|
+
Example:
|
|
247
|
+
>>> searcher = GoogleS()
|
|
248
|
+
>>> # Find some cool nature pics
|
|
249
|
+
>>> images = searcher.search_images(
|
|
250
|
+
... query="beautiful landscapes",
|
|
251
|
+
... size="large",
|
|
252
|
+
... color="color",
|
|
253
|
+
... max_results=5
|
|
254
|
+
... )
|
|
255
|
+
>>> for img in images:
|
|
256
|
+
... print(f"Found: {img['title']}")
|
|
257
|
+
... print(f"URL: {img['full_url']}")
|
|
132
258
|
"""
|
|
133
259
|
params = {
|
|
134
260
|
"q": query,
|
|
@@ -192,6 +318,9 @@ class GoogleS:
|
|
|
192
318
|
exclude_terms: List of terms to exclude from search
|
|
193
319
|
exact_phrase: Exact phrase to match
|
|
194
320
|
"""
|
|
321
|
+
if self.use_litlogger:
|
|
322
|
+
self.logger.info(f"Starting search for: {query}")
|
|
323
|
+
|
|
195
324
|
# Build advanced query
|
|
196
325
|
advanced_query = query
|
|
197
326
|
if site:
|
|
@@ -202,7 +331,10 @@ class GoogleS:
|
|
|
202
331
|
advanced_query += " " + " ".join(f"-{term}" for term in exclude_terms)
|
|
203
332
|
if exact_phrase:
|
|
204
333
|
advanced_query = f'"{exact_phrase}"' + advanced_query
|
|
205
|
-
|
|
334
|
+
|
|
335
|
+
if self.use_litlogger:
|
|
336
|
+
self.logger.debug(f"Advanced query: {advanced_query}")
|
|
337
|
+
|
|
206
338
|
# Check cache first
|
|
207
339
|
cache_key = self._cache_key(advanced_query, region=region, language=language,
|
|
208
340
|
safe=safe, time_period=time_period, sort_by=sort_by)
|