webscout 8.3.1__py3-none-any.whl → 8.3.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of webscout might be problematic. Click here for more details.
- webscout/AIutel.py +46 -53
- webscout/Bing_search.py +418 -0
- webscout/Extra/gguf.py +706 -177
- webscout/Provider/AISEARCH/genspark_search.py +7 -7
- webscout/Provider/GeminiProxy.py +140 -0
- webscout/Provider/MCPCore.py +78 -75
- webscout/Provider/OPENAI/BLACKBOXAI.py +1 -4
- webscout/Provider/OPENAI/GeminiProxy.py +328 -0
- webscout/Provider/OPENAI/README.md +2 -0
- webscout/Provider/OPENAI/README_AUTOPROXY.md +238 -0
- webscout/Provider/OPENAI/__init__.py +15 -1
- webscout/Provider/OPENAI/autoproxy.py +332 -39
- webscout/Provider/OPENAI/base.py +15 -5
- webscout/Provider/OPENAI/e2b.py +0 -1
- webscout/Provider/OPENAI/mcpcore.py +109 -70
- webscout/Provider/OPENAI/scirachat.py +59 -51
- webscout/Provider/OPENAI/toolbaz.py +2 -9
- webscout/Provider/OPENAI/xenai.py +514 -0
- webscout/Provider/OPENAI/yep.py +8 -2
- webscout/Provider/TTI/__init__.py +1 -0
- webscout/Provider/TTI/bing.py +231 -0
- webscout/Provider/TTS/speechma.py +45 -39
- webscout/Provider/TogetherAI.py +366 -0
- webscout/Provider/XenAI.py +324 -0
- webscout/Provider/__init__.py +8 -3
- webscout/Provider/deepseek_assistant.py +378 -0
- webscout/auth/__init__.py +44 -0
- webscout/auth/api_key_manager.py +189 -0
- webscout/auth/auth_system.py +100 -0
- webscout/auth/config.py +76 -0
- webscout/auth/database.py +400 -0
- webscout/auth/exceptions.py +67 -0
- webscout/auth/middleware.py +248 -0
- webscout/auth/models.py +130 -0
- webscout/auth/providers.py +257 -0
- webscout/auth/rate_limiter.py +254 -0
- webscout/auth/request_models.py +127 -0
- webscout/auth/request_processing.py +226 -0
- webscout/auth/routes.py +526 -0
- webscout/auth/schemas.py +103 -0
- webscout/auth/server.py +312 -0
- webscout/auth/static/favicon.svg +11 -0
- webscout/auth/swagger_ui.py +203 -0
- webscout/auth/templates/components/authentication.html +237 -0
- webscout/auth/templates/components/base.html +103 -0
- webscout/auth/templates/components/endpoints.html +750 -0
- webscout/auth/templates/components/examples.html +491 -0
- webscout/auth/templates/components/footer.html +75 -0
- webscout/auth/templates/components/header.html +27 -0
- webscout/auth/templates/components/models.html +286 -0
- webscout/auth/templates/components/navigation.html +70 -0
- webscout/auth/templates/static/api.js +455 -0
- webscout/auth/templates/static/icons.js +168 -0
- webscout/auth/templates/static/main.js +784 -0
- webscout/auth/templates/static/particles.js +201 -0
- webscout/auth/templates/static/styles.css +3353 -0
- webscout/auth/templates/static/ui.js +374 -0
- webscout/auth/templates/swagger_ui.html +170 -0
- webscout/client.py +49 -3
- webscout/scout/core/scout.py +104 -26
- webscout/scout/element.py +139 -18
- webscout/swiftcli/core/cli.py +14 -3
- webscout/swiftcli/decorators/output.py +59 -9
- webscout/update_checker.py +31 -49
- webscout/version.py +1 -1
- webscout/webscout_search.py +4 -12
- webscout/webscout_search_async.py +3 -10
- webscout/yep_search.py +2 -11
- {webscout-8.3.1.dist-info → webscout-8.3.2.dist-info}/METADATA +41 -11
- {webscout-8.3.1.dist-info → webscout-8.3.2.dist-info}/RECORD +74 -36
- {webscout-8.3.1.dist-info → webscout-8.3.2.dist-info}/entry_points.txt +1 -1
- webscout/Provider/HF_space/__init__.py +0 -0
- webscout/Provider/HF_space/qwen_qwen2.py +0 -206
- webscout/Provider/OPENAI/api.py +0 -1320
- {webscout-8.3.1.dist-info → webscout-8.3.2.dist-info}/WHEEL +0 -0
- {webscout-8.3.1.dist-info → webscout-8.3.2.dist-info}/licenses/LICENSE.md +0 -0
- {webscout-8.3.1.dist-info → webscout-8.3.2.dist-info}/top_level.txt +0 -0
webscout/AIutel.py
CHANGED
|
@@ -277,69 +277,50 @@ def _sanitize_stream_sync(
|
|
|
277
277
|
processing_active = start_marker is None
|
|
278
278
|
buffer = ""
|
|
279
279
|
found_start = False if start_marker else True
|
|
280
|
+
line_iterator: Iterable[str]
|
|
280
281
|
|
|
281
|
-
# Fast path for single string processing
|
|
282
282
|
if isinstance(data, str):
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
processed_item = json.loads(data)
|
|
290
|
-
except Exception as e:
|
|
291
|
-
if error_handler:
|
|
292
|
-
try:
|
|
293
|
-
handled = error_handler(e, data)
|
|
294
|
-
if handled is not None:
|
|
295
|
-
processed_item = handled
|
|
296
|
-
|
|
297
|
-
except Exception:
|
|
298
|
-
pass
|
|
299
|
-
if processed_item is None:
|
|
300
|
-
processed_item = data if yield_raw_on_error else None
|
|
283
|
+
# If data is a string, decide whether to split it into lines
|
|
284
|
+
# or treat it as an iterable containing a single chunk.
|
|
285
|
+
temp_lines: List[str]
|
|
286
|
+
if line_delimiter is None: # Default: split by newlines if present
|
|
287
|
+
if '\n' in data or '\r' in data:
|
|
288
|
+
temp_lines = data.splitlines()
|
|
301
289
|
else:
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
pass
|
|
314
|
-
else:
|
|
315
|
-
yield processed_item
|
|
316
|
-
return
|
|
317
|
-
|
|
318
|
-
# Stream processing path
|
|
319
|
-
if not hasattr(data, '__iter__'):
|
|
320
|
-
raise TypeError(f"Input must be a string or an iterable, not {type(data).__name__}")
|
|
321
|
-
|
|
322
|
-
try:
|
|
323
|
-
iterator = iter(data)
|
|
324
|
-
first_item = next(iterator, None)
|
|
325
|
-
if first_item is None:
|
|
290
|
+
temp_lines = [data] # Treat as a single line/chunk
|
|
291
|
+
elif line_delimiter in data: # Custom delimiter found in string
|
|
292
|
+
temp_lines = data.split(line_delimiter)
|
|
293
|
+
else: # Custom delimiter not found, or string is effectively a single segment
|
|
294
|
+
temp_lines = [data]
|
|
295
|
+
line_iterator = iter(temp_lines)
|
|
296
|
+
elif hasattr(data, '__iter__'): # data is an iterable (but not a string)
|
|
297
|
+
_iter = iter(data)
|
|
298
|
+
first_item = next(_iter, None)
|
|
299
|
+
|
|
300
|
+
if first_item is None: # Iterable was empty
|
|
326
301
|
return
|
|
302
|
+
|
|
327
303
|
from itertools import chain
|
|
328
|
-
|
|
304
|
+
# Reconstruct the full iterable including the first_item
|
|
305
|
+
stream_input_iterable = chain([first_item], _iter)
|
|
329
306
|
|
|
330
|
-
# Determine if we're dealing with bytes or strings
|
|
331
307
|
if isinstance(first_item, bytes):
|
|
308
|
+
# Ensure stream_input_iterable is typed as Iterable[bytes] for _decode_byte_stream
|
|
332
309
|
line_iterator = _decode_byte_stream(
|
|
333
|
-
|
|
310
|
+
stream_input_iterable, # type: ignore
|
|
334
311
|
encoding=encoding,
|
|
335
312
|
errors=encoding_errors,
|
|
336
313
|
buffer_size=buffer_size
|
|
337
314
|
)
|
|
338
315
|
elif isinstance(first_item, str):
|
|
339
|
-
|
|
316
|
+
# Ensure stream_input_iterable is typed as Iterable[str]
|
|
317
|
+
line_iterator = stream_input_iterable # type: ignore
|
|
340
318
|
else:
|
|
341
|
-
raise TypeError(f"
|
|
319
|
+
raise TypeError(f"Iterable must yield strings or bytes, not {type(first_item).__name__}")
|
|
320
|
+
else: # Not a string and not an iterable
|
|
321
|
+
raise TypeError(f"Input must be a string or an iterable, not {type(data).__name__}")
|
|
342
322
|
|
|
323
|
+
try:
|
|
343
324
|
for line in line_iterator:
|
|
344
325
|
if not line:
|
|
345
326
|
continue
|
|
@@ -683,20 +664,32 @@ def sanitize_stream(
|
|
|
683
664
|
Union[Generator[Any, None, None], AsyncGenerator[Any, None]]:
|
|
684
665
|
A generator or an asynchronous generator yielding the processed data.
|
|
685
666
|
"""
|
|
667
|
+
# Determine the actual data payload to process
|
|
668
|
+
payload: Any # The type of payload can change based on data's attributes
|
|
686
669
|
|
|
687
|
-
|
|
670
|
+
text_attr = getattr(data, "text", None)
|
|
671
|
+
content_attr = getattr(data, "content", None)
|
|
672
|
+
|
|
673
|
+
if isinstance(text_attr, str):
|
|
674
|
+
payload = text_attr
|
|
675
|
+
elif isinstance(content_attr, bytes):
|
|
676
|
+
payload = content_attr.decode(encoding, encoding_errors)
|
|
677
|
+
else:
|
|
678
|
+
# Use the original data if .text or .content are not applicable or not found
|
|
679
|
+
payload = data
|
|
680
|
+
|
|
681
|
+
# Dispatch to sync or async worker based on the nature of the 'payload'
|
|
682
|
+
if hasattr(payload, "__aiter__"):
|
|
688
683
|
return _sanitize_stream_async(
|
|
689
|
-
|
|
684
|
+
payload, intro_value, to_json, skip_markers, strip_chars,
|
|
690
685
|
start_marker, end_marker, content_extractor, yield_raw_on_error,
|
|
691
686
|
encoding, encoding_errors, buffer_size, line_delimiter, error_handler,
|
|
692
687
|
)
|
|
693
688
|
return _sanitize_stream_sync(
|
|
694
|
-
|
|
689
|
+
payload, intro_value, to_json, skip_markers, strip_chars,
|
|
695
690
|
start_marker, end_marker, content_extractor, yield_raw_on_error,
|
|
696
691
|
encoding, encoding_errors, buffer_size, line_delimiter, error_handler,
|
|
697
692
|
)
|
|
698
|
-
|
|
699
|
-
|
|
700
693
|
from .conversation import Conversation # noqa: E402,F401
|
|
701
694
|
from .Extra.autocoder import AutoCoder # noqa: E402,F401
|
|
702
695
|
from .optimizers import Optimizers # noqa: E402,F401
|
webscout/Bing_search.py
ADDED
|
@@ -0,0 +1,418 @@
|
|
|
1
|
+
"""
|
|
2
|
+
BingSearch - A Bing search library with advanced features
|
|
3
|
+
"""
|
|
4
|
+
from time import sleep
|
|
5
|
+
from curl_cffi.requests import Session
|
|
6
|
+
from urllib.parse import urlencode, unquote, urlparse, parse_qs
|
|
7
|
+
import base64
|
|
8
|
+
from typing import List, Dict, Optional, Any
|
|
9
|
+
from concurrent.futures import ThreadPoolExecutor
|
|
10
|
+
from webscout.litagent import LitAgent
|
|
11
|
+
class BingSearchResult:
|
|
12
|
+
"""Class to represent a Bing search result with metadata."""
|
|
13
|
+
def __init__(self, url: str, title: str, description: str):
|
|
14
|
+
self.url = url
|
|
15
|
+
self.title = title
|
|
16
|
+
self.description = description
|
|
17
|
+
self.metadata: Dict[str, Any] = {}
|
|
18
|
+
|
|
19
|
+
def __repr__(self) -> str:
|
|
20
|
+
return f"BingSearchResult(url={self.url}, title={self.title}, description={self.description})"
|
|
21
|
+
|
|
22
|
+
class BingImageResult:
|
|
23
|
+
"""Class to represent a Bing image search result."""
|
|
24
|
+
def __init__(self, title: str, image: str, thumbnail: str, url: str, source: str):
|
|
25
|
+
self.title = title
|
|
26
|
+
self.image = image
|
|
27
|
+
self.thumbnail = thumbnail
|
|
28
|
+
self.url = url
|
|
29
|
+
self.source = source
|
|
30
|
+
def __repr__(self):
|
|
31
|
+
return f"BingImageResult(title={self.title}, image={self.image}, url={self.url}, source={self.source})"
|
|
32
|
+
|
|
33
|
+
class BingNewsResult:
|
|
34
|
+
"""Class to represent a Bing news search result."""
|
|
35
|
+
def __init__(self, title: str, url: str, description: str, source: str = ""):
|
|
36
|
+
self.title = title
|
|
37
|
+
self.url = url
|
|
38
|
+
self.description = description
|
|
39
|
+
self.source = source
|
|
40
|
+
def __repr__(self):
|
|
41
|
+
return f"BingNewsResult(title={self.title}, url={self.url}, source={self.source})"
|
|
42
|
+
|
|
43
|
+
class BingSearch:
|
|
44
|
+
"""Bing search implementation with configurable parameters and advanced features."""
|
|
45
|
+
_executor: ThreadPoolExecutor = ThreadPoolExecutor()
|
|
46
|
+
|
|
47
|
+
def __init__(
|
|
48
|
+
self,
|
|
49
|
+
timeout: int = 10,
|
|
50
|
+
proxies: Optional[Dict[str, str]] = None,
|
|
51
|
+
verify: bool = True,
|
|
52
|
+
lang: str = "en-US",
|
|
53
|
+
sleep_interval: float = 0.0,
|
|
54
|
+
impersonate: str = "chrome110"
|
|
55
|
+
):
|
|
56
|
+
self.timeout = timeout
|
|
57
|
+
self.proxies = proxies if proxies else {}
|
|
58
|
+
self.verify = verify
|
|
59
|
+
self.lang = lang
|
|
60
|
+
self.sleep_interval = sleep_interval
|
|
61
|
+
self._base_url = "https://www.bing.com"
|
|
62
|
+
self.session = Session(
|
|
63
|
+
proxies=self.proxies,
|
|
64
|
+
verify=self.verify,
|
|
65
|
+
timeout=self.timeout,
|
|
66
|
+
impersonate=impersonate
|
|
67
|
+
)
|
|
68
|
+
self.session.headers.update(LitAgent().generate_fingerprint())
|
|
69
|
+
|
|
70
|
+
def _selectors(self, element):
|
|
71
|
+
selectors = {
|
|
72
|
+
'url': 'h2 a',
|
|
73
|
+
'title': 'h2',
|
|
74
|
+
'text': 'p',
|
|
75
|
+
'links': 'ol#b_results > li.b_algo',
|
|
76
|
+
'next': 'div#b_content nav[role="navigation"] a.sb_pagN'
|
|
77
|
+
}
|
|
78
|
+
return selectors[element]
|
|
79
|
+
|
|
80
|
+
def _first_page(self, query):
|
|
81
|
+
url = f'{self._base_url}/search?q={query}&search=&form=QBLH'
|
|
82
|
+
return {'url': url, 'data': None}
|
|
83
|
+
|
|
84
|
+
def _next_page(self, soup):
|
|
85
|
+
selector = self._selectors('next')
|
|
86
|
+
next_page_tag = soup.select_one(selector)
|
|
87
|
+
url = None
|
|
88
|
+
if next_page_tag and next_page_tag.get('href'):
|
|
89
|
+
url = self._base_url + next_page_tag['href']
|
|
90
|
+
return {'url': url, 'data': None}
|
|
91
|
+
|
|
92
|
+
def _get_url(self, tag):
|
|
93
|
+
url = tag.get('href', '')
|
|
94
|
+
resp = url
|
|
95
|
+
try:
|
|
96
|
+
parsed_url = urlparse(url)
|
|
97
|
+
query_params = parse_qs(parsed_url.query)
|
|
98
|
+
if "u" in query_params:
|
|
99
|
+
encoded_url = query_params["u"][0][2:]
|
|
100
|
+
try:
|
|
101
|
+
decoded_bytes = base64.urlsafe_b64decode(encoded_url + '===')
|
|
102
|
+
except base64.binascii.Error as e:
|
|
103
|
+
print(f"Error decoding Base64 string: {e}")
|
|
104
|
+
return url
|
|
105
|
+
resp = decoded_bytes.decode('utf-8')
|
|
106
|
+
except Exception as e:
|
|
107
|
+
print(f"Error decoding Base64 string: {e}")
|
|
108
|
+
return resp
|
|
109
|
+
|
|
110
|
+
def _make_request(self, term: str, results: int, start: int = 0) -> str:
|
|
111
|
+
params = {
|
|
112
|
+
"q": term,
|
|
113
|
+
"count": results,
|
|
114
|
+
"first": start + 1,
|
|
115
|
+
"setlang": self.lang,
|
|
116
|
+
}
|
|
117
|
+
url = self._base_url + "/search"
|
|
118
|
+
try:
|
|
119
|
+
resp = self.session.get(
|
|
120
|
+
url=url,
|
|
121
|
+
params=params,
|
|
122
|
+
)
|
|
123
|
+
resp.raise_for_status()
|
|
124
|
+
return resp.text
|
|
125
|
+
except Exception as e:
|
|
126
|
+
if hasattr(e, 'response') and e.response is not None:
|
|
127
|
+
raise Exception(f"Bing search failed with status {e.response.status_code}: {str(e)}")
|
|
128
|
+
else:
|
|
129
|
+
raise Exception(f"Bing search failed: {str(e)}")
|
|
130
|
+
|
|
131
|
+
def text(
|
|
132
|
+
self,
|
|
133
|
+
keywords: str,
|
|
134
|
+
region: str = None,
|
|
135
|
+
safesearch: str = "moderate",
|
|
136
|
+
max_results: int = 10,
|
|
137
|
+
unique: bool = True
|
|
138
|
+
) -> List[BingSearchResult]:
|
|
139
|
+
"""
|
|
140
|
+
Perform a text search on Bing.
|
|
141
|
+
|
|
142
|
+
Args:
|
|
143
|
+
keywords (str): The search keywords.
|
|
144
|
+
region (str, optional): The region for the search. Defaults to None.
|
|
145
|
+
safesearch (str): The safe search level ("on", "moderate", "off"). Defaults to "moderate".
|
|
146
|
+
max_results (int): The maximum number of results to fetch. Defaults to 10.
|
|
147
|
+
unique (bool): Whether to exclude duplicate URLs from the results. Defaults to True.
|
|
148
|
+
|
|
149
|
+
Returns:
|
|
150
|
+
List[BingSearchResult]: A list of Bing search results.
|
|
151
|
+
"""
|
|
152
|
+
if not keywords:
|
|
153
|
+
raise ValueError("Search keywords cannot be empty")
|
|
154
|
+
from bs4 import BeautifulSoup
|
|
155
|
+
safe_map = {
|
|
156
|
+
"on": "Strict",
|
|
157
|
+
"moderate": "Moderate",
|
|
158
|
+
"off": "Off"
|
|
159
|
+
}
|
|
160
|
+
safe = safe_map.get(safesearch.lower(), "Moderate")
|
|
161
|
+
fetched_results = []
|
|
162
|
+
fetched_links = set()
|
|
163
|
+
def fetch_page(url):
|
|
164
|
+
try:
|
|
165
|
+
resp = self.session.get(url)
|
|
166
|
+
resp.raise_for_status()
|
|
167
|
+
return resp.text
|
|
168
|
+
except Exception as e:
|
|
169
|
+
if hasattr(e, 'response') and e.response is not None:
|
|
170
|
+
raise Exception(f"Bing search failed with status {e.response.status_code}: {str(e)}")
|
|
171
|
+
else:
|
|
172
|
+
raise Exception(f"Bing search failed: {str(e)}")
|
|
173
|
+
|
|
174
|
+
# Fix: get the first page URL
|
|
175
|
+
url = self._first_page(keywords)['url']
|
|
176
|
+
urls_to_fetch = [url]
|
|
177
|
+
while len(fetched_results) < max_results and urls_to_fetch:
|
|
178
|
+
with self._executor as executor:
|
|
179
|
+
html_pages = list(executor.map(fetch_page, urls_to_fetch))
|
|
180
|
+
urls_to_fetch = []
|
|
181
|
+
for html in html_pages:
|
|
182
|
+
soup = BeautifulSoup(html, "html.parser")
|
|
183
|
+
selector_links = self._selectors('links')
|
|
184
|
+
result_blocks = soup.select(selector_links)
|
|
185
|
+
for result in result_blocks:
|
|
186
|
+
link_tag = result.select_one(self._selectors('url'))
|
|
187
|
+
if not link_tag:
|
|
188
|
+
continue
|
|
189
|
+
url_val = self._get_url(link_tag)
|
|
190
|
+
title_tag = result.select_one(self._selectors('title'))
|
|
191
|
+
title = title_tag.get_text(strip=True) if title_tag else ''
|
|
192
|
+
desc_tag = result.select_one(self._selectors('text'))
|
|
193
|
+
description = desc_tag.get_text(strip=True) if desc_tag else ''
|
|
194
|
+
if url_val and title:
|
|
195
|
+
if unique and url_val in fetched_links:
|
|
196
|
+
continue
|
|
197
|
+
fetched_results.append(BingSearchResult(url_val, title, description))
|
|
198
|
+
fetched_links.add(url_val)
|
|
199
|
+
if len(fetched_results) >= max_results:
|
|
200
|
+
break
|
|
201
|
+
if len(fetched_results) >= max_results:
|
|
202
|
+
break
|
|
203
|
+
next_page_info = self._next_page(soup)
|
|
204
|
+
if next_page_info['url']:
|
|
205
|
+
urls_to_fetch.append(next_page_info['url'])
|
|
206
|
+
sleep(self.sleep_interval)
|
|
207
|
+
next_page_info = self._next_page(soup)
|
|
208
|
+
url = next_page_info['url']
|
|
209
|
+
sleep(self.sleep_interval)
|
|
210
|
+
return fetched_results[:max_results]
|
|
211
|
+
|
|
212
|
+
def suggestions(self, query: str, region: str = None) -> List[str]:
|
|
213
|
+
"""
|
|
214
|
+
Fetches search suggestions for a given query.
|
|
215
|
+
|
|
216
|
+
Args:
|
|
217
|
+
query (str): The search query for which suggestions are needed.
|
|
218
|
+
region (str, optional): The region code (e.g., "en-US") for localized suggestions.
|
|
219
|
+
|
|
220
|
+
Returns:
|
|
221
|
+
List[str]: A list of suggestion strings related to the query.
|
|
222
|
+
"""
|
|
223
|
+
if not query:
|
|
224
|
+
raise ValueError("Search query cannot be empty")
|
|
225
|
+
params = {
|
|
226
|
+
"query": query,
|
|
227
|
+
"mkt": region if region else "en-US"
|
|
228
|
+
}
|
|
229
|
+
url = f"https://api.bing.com/osjson.aspx?{urlencode(params)}"
|
|
230
|
+
try:
|
|
231
|
+
resp = self.session.get(url)
|
|
232
|
+
resp.raise_for_status()
|
|
233
|
+
data = resp.json()
|
|
234
|
+
if isinstance(data, list) and len(data) > 1 and isinstance(data[1], list):
|
|
235
|
+
return data[1]
|
|
236
|
+
return []
|
|
237
|
+
except Exception as e:
|
|
238
|
+
if hasattr(e, 'response') and e.response is not None:
|
|
239
|
+
raise Exception(f"Bing suggestions failed with status {e.response.status_code}: {str(e)}")
|
|
240
|
+
else:
|
|
241
|
+
raise Exception(f"Bing suggestions failed: {str(e)}")
|
|
242
|
+
|
|
243
|
+
def images(
|
|
244
|
+
self,
|
|
245
|
+
keywords: str,
|
|
246
|
+
region: str = None,
|
|
247
|
+
safesearch: str = "moderate",
|
|
248
|
+
max_results: int = 10
|
|
249
|
+
) -> List[BingImageResult]:
|
|
250
|
+
"""
|
|
251
|
+
Perform an image search on Bing.
|
|
252
|
+
|
|
253
|
+
Args:
|
|
254
|
+
keywords (str): The search keywords.
|
|
255
|
+
region (str, optional): The region for the search. Defaults to None.
|
|
256
|
+
safesearch (str): The safe search level ("on", "moderate", "off"). Defaults to "moderate".
|
|
257
|
+
max_results (int): The maximum number of results to fetch. Defaults to 10.
|
|
258
|
+
|
|
259
|
+
Returns:
|
|
260
|
+
List[BingImageResult]: A list of Bing image search results.
|
|
261
|
+
"""
|
|
262
|
+
if not keywords:
|
|
263
|
+
raise ValueError("Search keywords cannot be empty")
|
|
264
|
+
from bs4 import BeautifulSoup
|
|
265
|
+
safe_map = {
|
|
266
|
+
"on": "Strict",
|
|
267
|
+
"moderate": "Moderate",
|
|
268
|
+
"off": "Off"
|
|
269
|
+
}
|
|
270
|
+
safe = safe_map.get(safesearch.lower(), "Moderate")
|
|
271
|
+
params = {
|
|
272
|
+
"q": keywords,
|
|
273
|
+
"count": max_results,
|
|
274
|
+
"setlang": self.lang,
|
|
275
|
+
"safeSearch": safe,
|
|
276
|
+
}
|
|
277
|
+
if region:
|
|
278
|
+
params["mkt"] = region
|
|
279
|
+
url = f"{self._base_url}/images/search?{urlencode(params)}"
|
|
280
|
+
try:
|
|
281
|
+
resp = self.session.get(url)
|
|
282
|
+
resp.raise_for_status()
|
|
283
|
+
html = resp.text
|
|
284
|
+
except Exception as e:
|
|
285
|
+
if hasattr(e, 'response') and e.response is not None:
|
|
286
|
+
raise Exception(f"Bing image search failed with status {e.response.status_code}: {str(e)}")
|
|
287
|
+
else:
|
|
288
|
+
raise Exception(f"Bing image search failed: {str(e)}")
|
|
289
|
+
soup = BeautifulSoup(html, "html.parser")
|
|
290
|
+
results = []
|
|
291
|
+
for item in soup.select("a.iusc"):
|
|
292
|
+
try:
|
|
293
|
+
m = item.get("m")
|
|
294
|
+
import json
|
|
295
|
+
meta = json.loads(m) if m else {}
|
|
296
|
+
image_url = meta.get("murl", "")
|
|
297
|
+
thumb_url = meta.get("turl", "")
|
|
298
|
+
title = meta.get("t", "")
|
|
299
|
+
page_url = meta.get("purl", "")
|
|
300
|
+
source = meta.get("surl", "")
|
|
301
|
+
if image_url:
|
|
302
|
+
results.append(BingImageResult(title, image_url, thumb_url, page_url, source))
|
|
303
|
+
if len(results) >= max_results:
|
|
304
|
+
break
|
|
305
|
+
except Exception:
|
|
306
|
+
continue
|
|
307
|
+
return results[:max_results]
|
|
308
|
+
|
|
309
|
+
def news(
|
|
310
|
+
self,
|
|
311
|
+
keywords: str,
|
|
312
|
+
region: str = None,
|
|
313
|
+
safesearch: str = "moderate",
|
|
314
|
+
max_results: int = 10,
|
|
315
|
+
) -> List['BingNewsResult']:
|
|
316
|
+
"""Bing news search."""
|
|
317
|
+
if not keywords:
|
|
318
|
+
raise ValueError("Search keywords cannot be empty")
|
|
319
|
+
from bs4 import BeautifulSoup
|
|
320
|
+
safe_map = {
|
|
321
|
+
"on": "Strict",
|
|
322
|
+
"moderate": "Moderate",
|
|
323
|
+
"off": "Off"
|
|
324
|
+
}
|
|
325
|
+
safe = safe_map.get(safesearch.lower(), "Moderate")
|
|
326
|
+
params = {
|
|
327
|
+
"q": keywords,
|
|
328
|
+
"form": "QBNH",
|
|
329
|
+
"safeSearch": safe,
|
|
330
|
+
}
|
|
331
|
+
if region:
|
|
332
|
+
params["mkt"] = region
|
|
333
|
+
url = f"{self._base_url}/news/search?{urlencode(params)}"
|
|
334
|
+
try:
|
|
335
|
+
resp = self.session.get(url)
|
|
336
|
+
resp.raise_for_status()
|
|
337
|
+
except Exception as e:
|
|
338
|
+
if hasattr(e, 'response') and e.response is not None:
|
|
339
|
+
raise Exception(f"Bing news search failed with status {e.response.status_code}: {str(e)}")
|
|
340
|
+
else:
|
|
341
|
+
raise Exception(f"Bing news search failed: {str(e)}")
|
|
342
|
+
soup = BeautifulSoup(resp.text, "html.parser")
|
|
343
|
+
results = []
|
|
344
|
+
for item in soup.select("div.news-card, div.card, div.newsitem, div.card-content, div.t_s_main"):
|
|
345
|
+
a_tag = item.find("a")
|
|
346
|
+
title = a_tag.get_text(strip=True) if a_tag else ''
|
|
347
|
+
url_val = a_tag['href'] if a_tag and a_tag.has_attr('href') else ''
|
|
348
|
+
desc_tag = item.find("div", class_="snippet") or item.find("div", class_="news-card-snippet") or item.find("div", class_="snippetText")
|
|
349
|
+
description = desc_tag.get_text(strip=True) if desc_tag else ''
|
|
350
|
+
source_tag = item.find("div", class_="source")
|
|
351
|
+
source = source_tag.get_text(strip=True) if source_tag else ''
|
|
352
|
+
if url_val and title:
|
|
353
|
+
results.append(BingNewsResult(title, url_val, description, source))
|
|
354
|
+
if len(results) >= max_results:
|
|
355
|
+
break
|
|
356
|
+
# Fallback: try main news list if above selectors fail
|
|
357
|
+
if not results:
|
|
358
|
+
for item in soup.select("a.title"):
|
|
359
|
+
title = item.get_text(strip=True)
|
|
360
|
+
url_val = item['href'] if item.has_attr('href') else ''
|
|
361
|
+
description = ''
|
|
362
|
+
source = ''
|
|
363
|
+
if url_val and title:
|
|
364
|
+
results.append(BingNewsResult(title, url_val, description, source))
|
|
365
|
+
if len(results) >= max_results:
|
|
366
|
+
break
|
|
367
|
+
return results[:max_results]
|
|
368
|
+
|
|
369
|
+
if __name__ == "__main__":
|
|
370
|
+
from rich import print
|
|
371
|
+
bing = BingSearch(
|
|
372
|
+
timeout=10,
|
|
373
|
+
proxies=None,
|
|
374
|
+
verify=True
|
|
375
|
+
)
|
|
376
|
+
print("TEXT SEARCH RESULTS:")
|
|
377
|
+
text_results = bing.text(
|
|
378
|
+
keywords="Python programming",
|
|
379
|
+
region="us",
|
|
380
|
+
safesearch="moderate",
|
|
381
|
+
max_results=30
|
|
382
|
+
)
|
|
383
|
+
for result in text_results:
|
|
384
|
+
print(f"Title: {result.title}")
|
|
385
|
+
print(f"URL: {result.url}")
|
|
386
|
+
print(f"Description: {result.description}")
|
|
387
|
+
print("---")
|
|
388
|
+
print("\nSEARCH SUGGESTIONS:")
|
|
389
|
+
suggestions = bing.suggestions("how to")
|
|
390
|
+
print(suggestions)
|
|
391
|
+
|
|
392
|
+
print("\nIMAGE SEARCH RESULTS:")
|
|
393
|
+
image_results = bing.images(
|
|
394
|
+
keywords="Python programming",
|
|
395
|
+
region="us",
|
|
396
|
+
safesearch="moderate",
|
|
397
|
+
max_results=10
|
|
398
|
+
)
|
|
399
|
+
for result in image_results:
|
|
400
|
+
print(f"Title: {result.title}")
|
|
401
|
+
print(f"Image URL: {result.image}")
|
|
402
|
+
print(f"Page URL: {result.url}")
|
|
403
|
+
print(f"Source: {result.source}")
|
|
404
|
+
print("---")
|
|
405
|
+
|
|
406
|
+
print("\nNEWS SEARCH RESULTS:")
|
|
407
|
+
news_results = bing.news(
|
|
408
|
+
keywords="Python programming",
|
|
409
|
+
region="us",
|
|
410
|
+
safesearch="moderate",
|
|
411
|
+
max_results=10
|
|
412
|
+
)
|
|
413
|
+
for result in news_results:
|
|
414
|
+
print(f"Title: {result.title}")
|
|
415
|
+
print(f"URL: {result.url}")
|
|
416
|
+
print(f"Description: {result.description}")
|
|
417
|
+
print(f"Source: {result.source}")
|
|
418
|
+
print("---")
|