webscout 8.3__py3-none-any.whl → 8.3.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of webscout might be problematic. Click here for more details.

Files changed (120) hide show
  1. webscout/AIauto.py +4 -4
  2. webscout/AIbase.py +61 -1
  3. webscout/AIutel.py +46 -53
  4. webscout/Bing_search.py +418 -0
  5. webscout/Extra/YTToolkit/ytapi/patterns.py +45 -45
  6. webscout/Extra/YTToolkit/ytapi/stream.py +1 -1
  7. webscout/Extra/YTToolkit/ytapi/video.py +10 -10
  8. webscout/Extra/autocoder/autocoder_utiles.py +1 -1
  9. webscout/Extra/gguf.py +706 -177
  10. webscout/Litlogger/formats.py +9 -0
  11. webscout/Litlogger/handlers.py +18 -0
  12. webscout/Litlogger/logger.py +43 -1
  13. webscout/Provider/AISEARCH/genspark_search.py +7 -7
  14. webscout/Provider/AISEARCH/scira_search.py +3 -2
  15. webscout/Provider/GeminiProxy.py +140 -0
  16. webscout/Provider/LambdaChat.py +7 -1
  17. webscout/Provider/MCPCore.py +78 -75
  18. webscout/Provider/OPENAI/BLACKBOXAI.py +1046 -1017
  19. webscout/Provider/OPENAI/GeminiProxy.py +328 -0
  20. webscout/Provider/OPENAI/Qwen3.py +303 -303
  21. webscout/Provider/OPENAI/README.md +5 -0
  22. webscout/Provider/OPENAI/README_AUTOPROXY.md +238 -0
  23. webscout/Provider/OPENAI/TogetherAI.py +355 -0
  24. webscout/Provider/OPENAI/__init__.py +16 -1
  25. webscout/Provider/OPENAI/autoproxy.py +332 -0
  26. webscout/Provider/OPENAI/base.py +101 -14
  27. webscout/Provider/OPENAI/chatgpt.py +15 -2
  28. webscout/Provider/OPENAI/chatgptclone.py +14 -3
  29. webscout/Provider/OPENAI/deepinfra.py +339 -328
  30. webscout/Provider/OPENAI/e2b.py +295 -74
  31. webscout/Provider/OPENAI/mcpcore.py +109 -70
  32. webscout/Provider/OPENAI/opkfc.py +18 -6
  33. webscout/Provider/OPENAI/scirachat.py +59 -50
  34. webscout/Provider/OPENAI/toolbaz.py +2 -10
  35. webscout/Provider/OPENAI/writecream.py +166 -166
  36. webscout/Provider/OPENAI/x0gpt.py +367 -367
  37. webscout/Provider/OPENAI/xenai.py +514 -0
  38. webscout/Provider/OPENAI/yep.py +389 -383
  39. webscout/Provider/STT/__init__.py +3 -0
  40. webscout/Provider/STT/base.py +281 -0
  41. webscout/Provider/STT/elevenlabs.py +265 -0
  42. webscout/Provider/TTI/__init__.py +4 -1
  43. webscout/Provider/TTI/aiarta.py +399 -365
  44. webscout/Provider/TTI/base.py +74 -2
  45. webscout/Provider/TTI/bing.py +231 -0
  46. webscout/Provider/TTI/fastflux.py +63 -30
  47. webscout/Provider/TTI/gpt1image.py +149 -0
  48. webscout/Provider/TTI/imagen.py +196 -0
  49. webscout/Provider/TTI/magicstudio.py +60 -29
  50. webscout/Provider/TTI/piclumen.py +43 -32
  51. webscout/Provider/TTI/pixelmuse.py +232 -225
  52. webscout/Provider/TTI/pollinations.py +43 -32
  53. webscout/Provider/TTI/together.py +287 -0
  54. webscout/Provider/TTI/utils.py +2 -1
  55. webscout/Provider/TTS/README.md +1 -0
  56. webscout/Provider/TTS/__init__.py +2 -1
  57. webscout/Provider/TTS/freetts.py +140 -0
  58. webscout/Provider/TTS/speechma.py +45 -39
  59. webscout/Provider/TogetherAI.py +366 -0
  60. webscout/Provider/UNFINISHED/ChutesAI.py +314 -0
  61. webscout/Provider/UNFINISHED/fetch_together_models.py +95 -0
  62. webscout/Provider/XenAI.py +324 -0
  63. webscout/Provider/__init__.py +8 -0
  64. webscout/Provider/deepseek_assistant.py +378 -0
  65. webscout/Provider/scira_chat.py +3 -2
  66. webscout/Provider/toolbaz.py +0 -1
  67. webscout/auth/__init__.py +44 -0
  68. webscout/auth/api_key_manager.py +189 -0
  69. webscout/auth/auth_system.py +100 -0
  70. webscout/auth/config.py +76 -0
  71. webscout/auth/database.py +400 -0
  72. webscout/auth/exceptions.py +67 -0
  73. webscout/auth/middleware.py +248 -0
  74. webscout/auth/models.py +130 -0
  75. webscout/auth/providers.py +257 -0
  76. webscout/auth/rate_limiter.py +254 -0
  77. webscout/auth/request_models.py +127 -0
  78. webscout/auth/request_processing.py +226 -0
  79. webscout/auth/routes.py +526 -0
  80. webscout/auth/schemas.py +103 -0
  81. webscout/auth/server.py +312 -0
  82. webscout/auth/static/favicon.svg +11 -0
  83. webscout/auth/swagger_ui.py +203 -0
  84. webscout/auth/templates/components/authentication.html +237 -0
  85. webscout/auth/templates/components/base.html +103 -0
  86. webscout/auth/templates/components/endpoints.html +750 -0
  87. webscout/auth/templates/components/examples.html +491 -0
  88. webscout/auth/templates/components/footer.html +75 -0
  89. webscout/auth/templates/components/header.html +27 -0
  90. webscout/auth/templates/components/models.html +286 -0
  91. webscout/auth/templates/components/navigation.html +70 -0
  92. webscout/auth/templates/static/api.js +455 -0
  93. webscout/auth/templates/static/icons.js +168 -0
  94. webscout/auth/templates/static/main.js +784 -0
  95. webscout/auth/templates/static/particles.js +201 -0
  96. webscout/auth/templates/static/styles.css +3353 -0
  97. webscout/auth/templates/static/ui.js +374 -0
  98. webscout/auth/templates/swagger_ui.html +170 -0
  99. webscout/client.py +49 -3
  100. webscout/litagent/Readme.md +12 -3
  101. webscout/litagent/agent.py +99 -62
  102. webscout/scout/core/scout.py +104 -26
  103. webscout/scout/element.py +139 -18
  104. webscout/swiftcli/core/cli.py +14 -3
  105. webscout/swiftcli/decorators/output.py +59 -9
  106. webscout/update_checker.py +31 -49
  107. webscout/version.py +1 -1
  108. webscout/webscout_search.py +4 -12
  109. webscout/webscout_search_async.py +3 -10
  110. webscout/yep_search.py +2 -11
  111. {webscout-8.3.dist-info → webscout-8.3.2.dist-info}/METADATA +41 -11
  112. {webscout-8.3.dist-info → webscout-8.3.2.dist-info}/RECORD +116 -68
  113. {webscout-8.3.dist-info → webscout-8.3.2.dist-info}/entry_points.txt +1 -1
  114. webscout/Provider/HF_space/__init__.py +0 -0
  115. webscout/Provider/HF_space/qwen_qwen2.py +0 -206
  116. webscout/Provider/OPENAI/api.py +0 -1035
  117. webscout/Provider/TTI/artbit.py +0 -0
  118. {webscout-8.3.dist-info → webscout-8.3.2.dist-info}/WHEEL +0 -0
  119. {webscout-8.3.dist-info → webscout-8.3.2.dist-info}/licenses/LICENSE.md +0 -0
  120. {webscout-8.3.dist-info → webscout-8.3.2.dist-info}/top_level.txt +0 -0
webscout/AIauto.py CHANGED
@@ -6,7 +6,7 @@ API keys or cookies.
6
6
 
7
7
  from webscout.AIbase import Provider
8
8
  from webscout.exceptions import AllProvidersFailure
9
- from typing import Union, Any, Dict, Generator
9
+ from typing import Union, Any, Dict, Generator, Optional
10
10
  import importlib
11
11
  import pkgutil
12
12
  import random
@@ -71,7 +71,7 @@ class AUTO(Provider):
71
71
  proxies: dict = {},
72
72
  history_offset: int = 10250,
73
73
  act: str = None,
74
- exclude: list[str] = [],
74
+ exclude: Optional[list[str]] = None,
75
75
  print_provider_info: bool = False,
76
76
  ):
77
77
  """
@@ -90,7 +90,7 @@ class AUTO(Provider):
90
90
  proxies (dict): Proxies for requests. Defaults to {}.
91
91
  history_offset (int): History character offset limit. Defaults to 10250.
92
92
  act (str, optional): Awesome prompt key. Defaults to None.
93
- exclude (list[str]): List of provider names (uppercase) to exclude. Defaults to [].
93
+ exclude (Optional[list[str]]): List of provider names (uppercase) to exclude. Defaults to None.
94
94
  print_provider_info (bool): Whether to print the name of the successful provider. Defaults to False.
95
95
  """
96
96
  self.provider = None # type: Provider
@@ -104,7 +104,7 @@ class AUTO(Provider):
104
104
  self.proxies: dict = proxies
105
105
  self.history_offset: int = history_offset
106
106
  self.act: str = act
107
- self.exclude: list[str] = [e.upper() for e in exclude]
107
+ self.exclude: list[str] = [e.upper() for e in exclude] if exclude else []
108
108
  self.print_provider_info: bool = print_provider_info
109
109
 
110
110
 
webscout/AIbase.py CHANGED
@@ -1,6 +1,6 @@
1
1
  from abc import ABC, abstractmethod
2
2
  from pathlib import Path
3
- from typing import AsyncGenerator, Dict, List, Union, Generator, Optional
3
+ from typing import AsyncGenerator, Dict, List, Union, Generator, Optional, Any
4
4
  from typing_extensions import TypeAlias
5
5
 
6
6
  # Type aliases for better readability
@@ -178,6 +178,66 @@ class AsyncTTSProvider(ABC):
178
178
  """
179
179
  raise NotImplementedError("Method needs to be implemented in subclass")
180
180
 
181
+
182
+ class STTProvider(ABC):
183
+ """Abstract base class for Speech-to-Text providers."""
184
+
185
+ @abstractmethod
186
+ def transcribe(self, audio_path: Union[str, Path], **kwargs) -> Dict[str, Any]:
187
+ """Transcribe audio file to text.
188
+
189
+ Args:
190
+ audio_path (Union[str, Path]): Path to the audio file
191
+ **kwargs: Additional provider-specific parameters
192
+
193
+ Returns:
194
+ Dict[str, Any]: Transcription result in OpenAI Whisper format
195
+ """
196
+ raise NotImplementedError("Method needs to be implemented in subclass")
197
+
198
+ @abstractmethod
199
+ def transcribe_from_url(self, audio_url: str, **kwargs) -> Dict[str, Any]:
200
+ """Transcribe audio from URL to text.
201
+
202
+ Args:
203
+ audio_url (str): URL of the audio file
204
+ **kwargs: Additional provider-specific parameters
205
+
206
+ Returns:
207
+ Dict[str, Any]: Transcription result in OpenAI Whisper format
208
+ """
209
+ raise NotImplementedError("Method needs to be implemented in subclass")
210
+
211
+
212
+ class AsyncSTTProvider(ABC):
213
+ """Abstract base class for asynchronous Speech-to-Text providers."""
214
+
215
+ @abstractmethod
216
+ async def transcribe(self, audio_path: Union[str, Path], **kwargs) -> Dict[str, Any]:
217
+ """Transcribe audio file to text asynchronously.
218
+
219
+ Args:
220
+ audio_path (Union[str, Path]): Path to the audio file
221
+ **kwargs: Additional provider-specific parameters
222
+
223
+ Returns:
224
+ Dict[str, Any]: Transcription result in OpenAI Whisper format
225
+ """
226
+ raise NotImplementedError("Method needs to be implemented in subclass")
227
+
228
+ @abstractmethod
229
+ async def transcribe_from_url(self, audio_url: str, **kwargs) -> Dict[str, Any]:
230
+ """Transcribe audio from URL to text asynchronously.
231
+
232
+ Args:
233
+ audio_url (str): URL of the audio file
234
+ **kwargs: Additional provider-specific parameters
235
+
236
+ Returns:
237
+ Dict[str, Any]: Transcription result in OpenAI Whisper format
238
+ """
239
+ raise NotImplementedError("Method needs to be implemented in subclass")
240
+
181
241
  async def save_audio(self, audio_file: str, destination: str = None, verbose: bool = False) -> str:
182
242
  """Save audio to a specific destination asynchronously.
183
243
 
webscout/AIutel.py CHANGED
@@ -277,69 +277,50 @@ def _sanitize_stream_sync(
277
277
  processing_active = start_marker is None
278
278
  buffer = ""
279
279
  found_start = False if start_marker else True
280
+ line_iterator: Iterable[str]
280
281
 
281
- # Fast path for single string processing
282
282
  if isinstance(data, str):
283
- processed_item = None
284
- if processing_active:
285
- if to_json:
286
- try:
287
- data = data.strip()
288
- if data:
289
- processed_item = json.loads(data)
290
- except Exception as e:
291
- if error_handler:
292
- try:
293
- handled = error_handler(e, data)
294
- if handled is not None:
295
- processed_item = handled
296
-
297
- except Exception:
298
- pass
299
- if processed_item is None:
300
- processed_item = data if yield_raw_on_error else None
283
+ # If data is a string, decide whether to split it into lines
284
+ # or treat it as an iterable containing a single chunk.
285
+ temp_lines: List[str]
286
+ if line_delimiter is None: # Default: split by newlines if present
287
+ if '\n' in data or '\r' in data:
288
+ temp_lines = data.splitlines()
301
289
  else:
302
- processed_item = _process_chunk(
303
- data, intro_value, False, effective_skip_markers,
304
- strip_chars, yield_raw_on_error, error_handler
305
- )
306
- if processed_item is not None:
307
- if content_extractor:
308
- try:
309
- final_content = content_extractor(processed_item)
310
- if final_content is not None:
311
- yield final_content
312
- except Exception:
313
- pass
314
- else:
315
- yield processed_item
316
- return
317
-
318
- # Stream processing path
319
- if not hasattr(data, '__iter__'):
320
- raise TypeError(f"Input must be a string or an iterable, not {type(data).__name__}")
321
-
322
- try:
323
- iterator = iter(data)
324
- first_item = next(iterator, None)
325
- if first_item is None:
290
+ temp_lines = [data] # Treat as a single line/chunk
291
+ elif line_delimiter in data: # Custom delimiter found in string
292
+ temp_lines = data.split(line_delimiter)
293
+ else: # Custom delimiter not found, or string is effectively a single segment
294
+ temp_lines = [data]
295
+ line_iterator = iter(temp_lines)
296
+ elif hasattr(data, '__iter__'): # data is an iterable (but not a string)
297
+ _iter = iter(data)
298
+ first_item = next(_iter, None)
299
+
300
+ if first_item is None: # Iterable was empty
326
301
  return
302
+
327
303
  from itertools import chain
328
- stream = chain([first_item], iterator)
304
+ # Reconstruct the full iterable including the first_item
305
+ stream_input_iterable = chain([first_item], _iter)
329
306
 
330
- # Determine if we're dealing with bytes or strings
331
307
  if isinstance(first_item, bytes):
308
+ # Ensure stream_input_iterable is typed as Iterable[bytes] for _decode_byte_stream
332
309
  line_iterator = _decode_byte_stream(
333
- stream,
310
+ stream_input_iterable, # type: ignore
334
311
  encoding=encoding,
335
312
  errors=encoding_errors,
336
313
  buffer_size=buffer_size
337
314
  )
338
315
  elif isinstance(first_item, str):
339
- line_iterator = stream
316
+ # Ensure stream_input_iterable is typed as Iterable[str]
317
+ line_iterator = stream_input_iterable # type: ignore
340
318
  else:
341
- raise TypeError(f"Stream must yield strings or bytes, not {type(first_item).__name__}")
319
+ raise TypeError(f"Iterable must yield strings or bytes, not {type(first_item).__name__}")
320
+ else: # Not a string and not an iterable
321
+ raise TypeError(f"Input must be a string or an iterable, not {type(data).__name__}")
342
322
 
323
+ try:
343
324
  for line in line_iterator:
344
325
  if not line:
345
326
  continue
@@ -683,20 +664,32 @@ def sanitize_stream(
683
664
  Union[Generator[Any, None, None], AsyncGenerator[Any, None]]:
684
665
  A generator or an asynchronous generator yielding the processed data.
685
666
  """
667
+ # Determine the actual data payload to process
668
+ payload: Any # The type of payload can change based on data's attributes
686
669
 
687
- if hasattr(data, "__aiter__"):
670
+ text_attr = getattr(data, "text", None)
671
+ content_attr = getattr(data, "content", None)
672
+
673
+ if isinstance(text_attr, str):
674
+ payload = text_attr
675
+ elif isinstance(content_attr, bytes):
676
+ payload = content_attr.decode(encoding, encoding_errors)
677
+ else:
678
+ # Use the original data if .text or .content are not applicable or not found
679
+ payload = data
680
+
681
+ # Dispatch to sync or async worker based on the nature of the 'payload'
682
+ if hasattr(payload, "__aiter__"):
688
683
  return _sanitize_stream_async(
689
- data, intro_value, to_json, skip_markers, strip_chars,
684
+ payload, intro_value, to_json, skip_markers, strip_chars,
690
685
  start_marker, end_marker, content_extractor, yield_raw_on_error,
691
686
  encoding, encoding_errors, buffer_size, line_delimiter, error_handler,
692
687
  )
693
688
  return _sanitize_stream_sync(
694
- data, intro_value, to_json, skip_markers, strip_chars,
689
+ payload, intro_value, to_json, skip_markers, strip_chars,
695
690
  start_marker, end_marker, content_extractor, yield_raw_on_error,
696
691
  encoding, encoding_errors, buffer_size, line_delimiter, error_handler,
697
692
  )
698
-
699
-
700
693
  from .conversation import Conversation # noqa: E402,F401
701
694
  from .Extra.autocoder import AutoCoder # noqa: E402,F401
702
695
  from .optimizers import Optimizers # noqa: E402,F401
@@ -0,0 +1,418 @@
1
+ """
2
+ BingSearch - A Bing search library with advanced features
3
+ """
4
+ from time import sleep
5
+ from curl_cffi.requests import Session
6
+ from urllib.parse import urlencode, unquote, urlparse, parse_qs
7
+ import base64
8
+ from typing import List, Dict, Optional, Any
9
+ from concurrent.futures import ThreadPoolExecutor
10
+ from webscout.litagent import LitAgent
11
+ class BingSearchResult:
12
+ """Class to represent a Bing search result with metadata."""
13
+ def __init__(self, url: str, title: str, description: str):
14
+ self.url = url
15
+ self.title = title
16
+ self.description = description
17
+ self.metadata: Dict[str, Any] = {}
18
+
19
+ def __repr__(self) -> str:
20
+ return f"BingSearchResult(url={self.url}, title={self.title}, description={self.description})"
21
+
22
+ class BingImageResult:
23
+ """Class to represent a Bing image search result."""
24
+ def __init__(self, title: str, image: str, thumbnail: str, url: str, source: str):
25
+ self.title = title
26
+ self.image = image
27
+ self.thumbnail = thumbnail
28
+ self.url = url
29
+ self.source = source
30
+ def __repr__(self):
31
+ return f"BingImageResult(title={self.title}, image={self.image}, url={self.url}, source={self.source})"
32
+
33
+ class BingNewsResult:
34
+ """Class to represent a Bing news search result."""
35
+ def __init__(self, title: str, url: str, description: str, source: str = ""):
36
+ self.title = title
37
+ self.url = url
38
+ self.description = description
39
+ self.source = source
40
+ def __repr__(self):
41
+ return f"BingNewsResult(title={self.title}, url={self.url}, source={self.source})"
42
+
43
+ class BingSearch:
44
+ """Bing search implementation with configurable parameters and advanced features."""
45
+ _executor: ThreadPoolExecutor = ThreadPoolExecutor()
46
+
47
+ def __init__(
48
+ self,
49
+ timeout: int = 10,
50
+ proxies: Optional[Dict[str, str]] = None,
51
+ verify: bool = True,
52
+ lang: str = "en-US",
53
+ sleep_interval: float = 0.0,
54
+ impersonate: str = "chrome110"
55
+ ):
56
+ self.timeout = timeout
57
+ self.proxies = proxies if proxies else {}
58
+ self.verify = verify
59
+ self.lang = lang
60
+ self.sleep_interval = sleep_interval
61
+ self._base_url = "https://www.bing.com"
62
+ self.session = Session(
63
+ proxies=self.proxies,
64
+ verify=self.verify,
65
+ timeout=self.timeout,
66
+ impersonate=impersonate
67
+ )
68
+ self.session.headers.update(LitAgent().generate_fingerprint())
69
+
70
+ def _selectors(self, element):
71
+ selectors = {
72
+ 'url': 'h2 a',
73
+ 'title': 'h2',
74
+ 'text': 'p',
75
+ 'links': 'ol#b_results > li.b_algo',
76
+ 'next': 'div#b_content nav[role="navigation"] a.sb_pagN'
77
+ }
78
+ return selectors[element]
79
+
80
+ def _first_page(self, query):
81
+ url = f'{self._base_url}/search?q={query}&search=&form=QBLH'
82
+ return {'url': url, 'data': None}
83
+
84
+ def _next_page(self, soup):
85
+ selector = self._selectors('next')
86
+ next_page_tag = soup.select_one(selector)
87
+ url = None
88
+ if next_page_tag and next_page_tag.get('href'):
89
+ url = self._base_url + next_page_tag['href']
90
+ return {'url': url, 'data': None}
91
+
92
+ def _get_url(self, tag):
93
+ url = tag.get('href', '')
94
+ resp = url
95
+ try:
96
+ parsed_url = urlparse(url)
97
+ query_params = parse_qs(parsed_url.query)
98
+ if "u" in query_params:
99
+ encoded_url = query_params["u"][0][2:]
100
+ try:
101
+ decoded_bytes = base64.urlsafe_b64decode(encoded_url + '===')
102
+ except base64.binascii.Error as e:
103
+ print(f"Error decoding Base64 string: {e}")
104
+ return url
105
+ resp = decoded_bytes.decode('utf-8')
106
+ except Exception as e:
107
+ print(f"Error decoding Base64 string: {e}")
108
+ return resp
109
+
110
+ def _make_request(self, term: str, results: int, start: int = 0) -> str:
111
+ params = {
112
+ "q": term,
113
+ "count": results,
114
+ "first": start + 1,
115
+ "setlang": self.lang,
116
+ }
117
+ url = self._base_url + "/search"
118
+ try:
119
+ resp = self.session.get(
120
+ url=url,
121
+ params=params,
122
+ )
123
+ resp.raise_for_status()
124
+ return resp.text
125
+ except Exception as e:
126
+ if hasattr(e, 'response') and e.response is not None:
127
+ raise Exception(f"Bing search failed with status {e.response.status_code}: {str(e)}")
128
+ else:
129
+ raise Exception(f"Bing search failed: {str(e)}")
130
+
131
+ def text(
132
+ self,
133
+ keywords: str,
134
+ region: str = None,
135
+ safesearch: str = "moderate",
136
+ max_results: int = 10,
137
+ unique: bool = True
138
+ ) -> List[BingSearchResult]:
139
+ """
140
+ Perform a text search on Bing.
141
+
142
+ Args:
143
+ keywords (str): The search keywords.
144
+ region (str, optional): The region for the search. Defaults to None.
145
+ safesearch (str): The safe search level ("on", "moderate", "off"). Defaults to "moderate".
146
+ max_results (int): The maximum number of results to fetch. Defaults to 10.
147
+ unique (bool): Whether to exclude duplicate URLs from the results. Defaults to True.
148
+
149
+ Returns:
150
+ List[BingSearchResult]: A list of Bing search results.
151
+ """
152
+ if not keywords:
153
+ raise ValueError("Search keywords cannot be empty")
154
+ from bs4 import BeautifulSoup
155
+ safe_map = {
156
+ "on": "Strict",
157
+ "moderate": "Moderate",
158
+ "off": "Off"
159
+ }
160
+ safe = safe_map.get(safesearch.lower(), "Moderate")
161
+ fetched_results = []
162
+ fetched_links = set()
163
+ def fetch_page(url):
164
+ try:
165
+ resp = self.session.get(url)
166
+ resp.raise_for_status()
167
+ return resp.text
168
+ except Exception as e:
169
+ if hasattr(e, 'response') and e.response is not None:
170
+ raise Exception(f"Bing search failed with status {e.response.status_code}: {str(e)}")
171
+ else:
172
+ raise Exception(f"Bing search failed: {str(e)}")
173
+
174
+ # Fix: get the first page URL
175
+ url = self._first_page(keywords)['url']
176
+ urls_to_fetch = [url]
177
+ while len(fetched_results) < max_results and urls_to_fetch:
178
+ with self._executor as executor:
179
+ html_pages = list(executor.map(fetch_page, urls_to_fetch))
180
+ urls_to_fetch = []
181
+ for html in html_pages:
182
+ soup = BeautifulSoup(html, "html.parser")
183
+ selector_links = self._selectors('links')
184
+ result_blocks = soup.select(selector_links)
185
+ for result in result_blocks:
186
+ link_tag = result.select_one(self._selectors('url'))
187
+ if not link_tag:
188
+ continue
189
+ url_val = self._get_url(link_tag)
190
+ title_tag = result.select_one(self._selectors('title'))
191
+ title = title_tag.get_text(strip=True) if title_tag else ''
192
+ desc_tag = result.select_one(self._selectors('text'))
193
+ description = desc_tag.get_text(strip=True) if desc_tag else ''
194
+ if url_val and title:
195
+ if unique and url_val in fetched_links:
196
+ continue
197
+ fetched_results.append(BingSearchResult(url_val, title, description))
198
+ fetched_links.add(url_val)
199
+ if len(fetched_results) >= max_results:
200
+ break
201
+ if len(fetched_results) >= max_results:
202
+ break
203
+ next_page_info = self._next_page(soup)
204
+ if next_page_info['url']:
205
+ urls_to_fetch.append(next_page_info['url'])
206
+ sleep(self.sleep_interval)
207
+ next_page_info = self._next_page(soup)
208
+ url = next_page_info['url']
209
+ sleep(self.sleep_interval)
210
+ return fetched_results[:max_results]
211
+
212
+ def suggestions(self, query: str, region: str = None) -> List[str]:
213
+ """
214
+ Fetches search suggestions for a given query.
215
+
216
+ Args:
217
+ query (str): The search query for which suggestions are needed.
218
+ region (str, optional): The region code (e.g., "en-US") for localized suggestions.
219
+
220
+ Returns:
221
+ List[str]: A list of suggestion strings related to the query.
222
+ """
223
+ if not query:
224
+ raise ValueError("Search query cannot be empty")
225
+ params = {
226
+ "query": query,
227
+ "mkt": region if region else "en-US"
228
+ }
229
+ url = f"https://api.bing.com/osjson.aspx?{urlencode(params)}"
230
+ try:
231
+ resp = self.session.get(url)
232
+ resp.raise_for_status()
233
+ data = resp.json()
234
+ if isinstance(data, list) and len(data) > 1 and isinstance(data[1], list):
235
+ return data[1]
236
+ return []
237
+ except Exception as e:
238
+ if hasattr(e, 'response') and e.response is not None:
239
+ raise Exception(f"Bing suggestions failed with status {e.response.status_code}: {str(e)}")
240
+ else:
241
+ raise Exception(f"Bing suggestions failed: {str(e)}")
242
+
243
+ def images(
244
+ self,
245
+ keywords: str,
246
+ region: str = None,
247
+ safesearch: str = "moderate",
248
+ max_results: int = 10
249
+ ) -> List[BingImageResult]:
250
+ """
251
+ Perform an image search on Bing.
252
+
253
+ Args:
254
+ keywords (str): The search keywords.
255
+ region (str, optional): The region for the search. Defaults to None.
256
+ safesearch (str): The safe search level ("on", "moderate", "off"). Defaults to "moderate".
257
+ max_results (int): The maximum number of results to fetch. Defaults to 10.
258
+
259
+ Returns:
260
+ List[BingImageResult]: A list of Bing image search results.
261
+ """
262
+ if not keywords:
263
+ raise ValueError("Search keywords cannot be empty")
264
+ from bs4 import BeautifulSoup
265
+ safe_map = {
266
+ "on": "Strict",
267
+ "moderate": "Moderate",
268
+ "off": "Off"
269
+ }
270
+ safe = safe_map.get(safesearch.lower(), "Moderate")
271
+ params = {
272
+ "q": keywords,
273
+ "count": max_results,
274
+ "setlang": self.lang,
275
+ "safeSearch": safe,
276
+ }
277
+ if region:
278
+ params["mkt"] = region
279
+ url = f"{self._base_url}/images/search?{urlencode(params)}"
280
+ try:
281
+ resp = self.session.get(url)
282
+ resp.raise_for_status()
283
+ html = resp.text
284
+ except Exception as e:
285
+ if hasattr(e, 'response') and e.response is not None:
286
+ raise Exception(f"Bing image search failed with status {e.response.status_code}: {str(e)}")
287
+ else:
288
+ raise Exception(f"Bing image search failed: {str(e)}")
289
+ soup = BeautifulSoup(html, "html.parser")
290
+ results = []
291
+ for item in soup.select("a.iusc"):
292
+ try:
293
+ m = item.get("m")
294
+ import json
295
+ meta = json.loads(m) if m else {}
296
+ image_url = meta.get("murl", "")
297
+ thumb_url = meta.get("turl", "")
298
+ title = meta.get("t", "")
299
+ page_url = meta.get("purl", "")
300
+ source = meta.get("surl", "")
301
+ if image_url:
302
+ results.append(BingImageResult(title, image_url, thumb_url, page_url, source))
303
+ if len(results) >= max_results:
304
+ break
305
+ except Exception:
306
+ continue
307
+ return results[:max_results]
308
+
309
+ def news(
310
+ self,
311
+ keywords: str,
312
+ region: str = None,
313
+ safesearch: str = "moderate",
314
+ max_results: int = 10,
315
+ ) -> List['BingNewsResult']:
316
+ """Bing news search."""
317
+ if not keywords:
318
+ raise ValueError("Search keywords cannot be empty")
319
+ from bs4 import BeautifulSoup
320
+ safe_map = {
321
+ "on": "Strict",
322
+ "moderate": "Moderate",
323
+ "off": "Off"
324
+ }
325
+ safe = safe_map.get(safesearch.lower(), "Moderate")
326
+ params = {
327
+ "q": keywords,
328
+ "form": "QBNH",
329
+ "safeSearch": safe,
330
+ }
331
+ if region:
332
+ params["mkt"] = region
333
+ url = f"{self._base_url}/news/search?{urlencode(params)}"
334
+ try:
335
+ resp = self.session.get(url)
336
+ resp.raise_for_status()
337
+ except Exception as e:
338
+ if hasattr(e, 'response') and e.response is not None:
339
+ raise Exception(f"Bing news search failed with status {e.response.status_code}: {str(e)}")
340
+ else:
341
+ raise Exception(f"Bing news search failed: {str(e)}")
342
+ soup = BeautifulSoup(resp.text, "html.parser")
343
+ results = []
344
+ for item in soup.select("div.news-card, div.card, div.newsitem, div.card-content, div.t_s_main"):
345
+ a_tag = item.find("a")
346
+ title = a_tag.get_text(strip=True) if a_tag else ''
347
+ url_val = a_tag['href'] if a_tag and a_tag.has_attr('href') else ''
348
+ desc_tag = item.find("div", class_="snippet") or item.find("div", class_="news-card-snippet") or item.find("div", class_="snippetText")
349
+ description = desc_tag.get_text(strip=True) if desc_tag else ''
350
+ source_tag = item.find("div", class_="source")
351
+ source = source_tag.get_text(strip=True) if source_tag else ''
352
+ if url_val and title:
353
+ results.append(BingNewsResult(title, url_val, description, source))
354
+ if len(results) >= max_results:
355
+ break
356
+ # Fallback: try main news list if above selectors fail
357
+ if not results:
358
+ for item in soup.select("a.title"):
359
+ title = item.get_text(strip=True)
360
+ url_val = item['href'] if item.has_attr('href') else ''
361
+ description = ''
362
+ source = ''
363
+ if url_val and title:
364
+ results.append(BingNewsResult(title, url_val, description, source))
365
+ if len(results) >= max_results:
366
+ break
367
+ return results[:max_results]
368
+
369
+ if __name__ == "__main__":
370
+ from rich import print
371
+ bing = BingSearch(
372
+ timeout=10,
373
+ proxies=None,
374
+ verify=True
375
+ )
376
+ print("TEXT SEARCH RESULTS:")
377
+ text_results = bing.text(
378
+ keywords="Python programming",
379
+ region="us",
380
+ safesearch="moderate",
381
+ max_results=30
382
+ )
383
+ for result in text_results:
384
+ print(f"Title: {result.title}")
385
+ print(f"URL: {result.url}")
386
+ print(f"Description: {result.description}")
387
+ print("---")
388
+ print("\nSEARCH SUGGESTIONS:")
389
+ suggestions = bing.suggestions("how to")
390
+ print(suggestions)
391
+
392
+ print("\nIMAGE SEARCH RESULTS:")
393
+ image_results = bing.images(
394
+ keywords="Python programming",
395
+ region="us",
396
+ safesearch="moderate",
397
+ max_results=10
398
+ )
399
+ for result in image_results:
400
+ print(f"Title: {result.title}")
401
+ print(f"Image URL: {result.image}")
402
+ print(f"Page URL: {result.url}")
403
+ print(f"Source: {result.source}")
404
+ print("---")
405
+
406
+ print("\nNEWS SEARCH RESULTS:")
407
+ news_results = bing.news(
408
+ keywords="Python programming",
409
+ region="us",
410
+ safesearch="moderate",
411
+ max_results=10
412
+ )
413
+ for result in news_results:
414
+ print(f"Title: {result.title}")
415
+ print(f"URL: {result.url}")
416
+ print(f"Description: {result.description}")
417
+ print(f"Source: {result.source}")
418
+ print("---")