webscout 8.3.7__py3-none-any.whl → 2025.10.13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of webscout might be problematic. Click here for more details.
- webscout/AIauto.py +250 -250
- webscout/AIbase.py +379 -379
- webscout/AIutel.py +60 -60
- webscout/Bard.py +1012 -1012
- webscout/Bing_search.py +417 -417
- webscout/DWEBS.py +529 -529
- webscout/Extra/Act.md +309 -309
- webscout/Extra/GitToolkit/__init__.py +10 -10
- webscout/Extra/GitToolkit/gitapi/README.md +110 -110
- webscout/Extra/GitToolkit/gitapi/__init__.py +11 -11
- webscout/Extra/GitToolkit/gitapi/repository.py +195 -195
- webscout/Extra/GitToolkit/gitapi/user.py +96 -96
- webscout/Extra/GitToolkit/gitapi/utils.py +61 -61
- webscout/Extra/YTToolkit/README.md +375 -375
- webscout/Extra/YTToolkit/YTdownloader.py +956 -956
- webscout/Extra/YTToolkit/__init__.py +2 -2
- webscout/Extra/YTToolkit/transcriber.py +475 -475
- webscout/Extra/YTToolkit/ytapi/README.md +44 -44
- webscout/Extra/YTToolkit/ytapi/__init__.py +6 -6
- webscout/Extra/YTToolkit/ytapi/channel.py +307 -307
- webscout/Extra/YTToolkit/ytapi/errors.py +13 -13
- webscout/Extra/YTToolkit/ytapi/extras.py +118 -118
- webscout/Extra/YTToolkit/ytapi/https.py +88 -88
- webscout/Extra/YTToolkit/ytapi/patterns.py +61 -61
- webscout/Extra/YTToolkit/ytapi/playlist.py +58 -58
- webscout/Extra/YTToolkit/ytapi/pool.py +7 -7
- webscout/Extra/YTToolkit/ytapi/query.py +39 -39
- webscout/Extra/YTToolkit/ytapi/stream.py +62 -62
- webscout/Extra/YTToolkit/ytapi/utils.py +62 -62
- webscout/Extra/YTToolkit/ytapi/video.py +232 -232
- webscout/Extra/autocoder/__init__.py +9 -9
- webscout/Extra/autocoder/autocoder.py +1105 -1105
- webscout/Extra/autocoder/autocoder_utiles.py +332 -332
- webscout/Extra/gguf.md +429 -429
- webscout/Extra/gguf.py +1213 -1213
- webscout/Extra/tempmail/README.md +487 -487
- webscout/Extra/tempmail/__init__.py +27 -27
- webscout/Extra/tempmail/async_utils.py +140 -140
- webscout/Extra/tempmail/base.py +160 -160
- webscout/Extra/tempmail/cli.py +186 -186
- webscout/Extra/tempmail/emailnator.py +84 -84
- webscout/Extra/tempmail/mail_tm.py +360 -360
- webscout/Extra/tempmail/temp_mail_io.py +291 -291
- webscout/Extra/weather.md +281 -281
- webscout/Extra/weather.py +193 -193
- webscout/Litlogger/README.md +10 -10
- webscout/Litlogger/__init__.py +15 -15
- webscout/Litlogger/formats.py +13 -13
- webscout/Litlogger/handlers.py +121 -121
- webscout/Litlogger/levels.py +13 -13
- webscout/Litlogger/logger.py +134 -134
- webscout/Provider/AISEARCH/Perplexity.py +332 -332
- webscout/Provider/AISEARCH/README.md +279 -279
- webscout/Provider/AISEARCH/__init__.py +16 -1
- webscout/Provider/AISEARCH/felo_search.py +206 -206
- webscout/Provider/AISEARCH/genspark_search.py +323 -323
- webscout/Provider/AISEARCH/hika_search.py +185 -185
- webscout/Provider/AISEARCH/iask_search.py +410 -410
- webscout/Provider/AISEARCH/monica_search.py +219 -219
- webscout/Provider/AISEARCH/scira_search.py +316 -316
- webscout/Provider/AISEARCH/stellar_search.py +177 -177
- webscout/Provider/AISEARCH/webpilotai_search.py +255 -255
- webscout/Provider/Aitopia.py +314 -314
- webscout/Provider/Andi.py +1 -1
- webscout/Provider/Apriel.py +306 -0
- webscout/Provider/ChatGPTClone.py +237 -236
- webscout/Provider/ChatSandbox.py +343 -343
- webscout/Provider/Cloudflare.py +324 -324
- webscout/Provider/Cohere.py +208 -208
- webscout/Provider/Deepinfra.py +370 -366
- webscout/Provider/ExaAI.py +260 -260
- webscout/Provider/ExaChat.py +308 -308
- webscout/Provider/Flowith.py +221 -221
- webscout/Provider/GMI.py +293 -0
- webscout/Provider/Gemini.py +164 -164
- webscout/Provider/GeminiProxy.py +167 -167
- webscout/Provider/GithubChat.py +371 -372
- webscout/Provider/Groq.py +800 -800
- webscout/Provider/HeckAI.py +383 -383
- webscout/Provider/Jadve.py +282 -282
- webscout/Provider/K2Think.py +307 -307
- webscout/Provider/Koboldai.py +205 -205
- webscout/Provider/LambdaChat.py +423 -423
- webscout/Provider/Nemotron.py +244 -244
- webscout/Provider/Netwrck.py +248 -248
- webscout/Provider/OLLAMA.py +395 -395
- webscout/Provider/OPENAI/Cloudflare.py +393 -393
- webscout/Provider/OPENAI/FalconH1.py +451 -451
- webscout/Provider/OPENAI/FreeGemini.py +296 -296
- webscout/Provider/OPENAI/K2Think.py +431 -431
- webscout/Provider/OPENAI/NEMOTRON.py +240 -240
- webscout/Provider/OPENAI/PI.py +427 -427
- webscout/Provider/OPENAI/README.md +959 -959
- webscout/Provider/OPENAI/TogetherAI.py +345 -345
- webscout/Provider/OPENAI/TwoAI.py +465 -465
- webscout/Provider/OPENAI/__init__.py +33 -18
- webscout/Provider/OPENAI/base.py +248 -248
- webscout/Provider/OPENAI/chatglm.py +528 -0
- webscout/Provider/OPENAI/chatgpt.py +592 -592
- webscout/Provider/OPENAI/chatgptclone.py +521 -521
- webscout/Provider/OPENAI/chatsandbox.py +202 -202
- webscout/Provider/OPENAI/deepinfra.py +318 -314
- webscout/Provider/OPENAI/e2b.py +1665 -1665
- webscout/Provider/OPENAI/exaai.py +420 -420
- webscout/Provider/OPENAI/exachat.py +452 -452
- webscout/Provider/OPENAI/friendli.py +232 -232
- webscout/Provider/OPENAI/{refact.py → gmi.py} +324 -274
- webscout/Provider/OPENAI/groq.py +364 -364
- webscout/Provider/OPENAI/heckai.py +314 -314
- webscout/Provider/OPENAI/llmchatco.py +337 -337
- webscout/Provider/OPENAI/netwrck.py +355 -355
- webscout/Provider/OPENAI/oivscode.py +290 -290
- webscout/Provider/OPENAI/opkfc.py +518 -518
- webscout/Provider/OPENAI/pydantic_imports.py +1 -1
- webscout/Provider/OPENAI/scirachat.py +535 -535
- webscout/Provider/OPENAI/sonus.py +308 -308
- webscout/Provider/OPENAI/standardinput.py +442 -442
- webscout/Provider/OPENAI/textpollinations.py +340 -340
- webscout/Provider/OPENAI/toolbaz.py +419 -416
- webscout/Provider/OPENAI/typefully.py +362 -362
- webscout/Provider/OPENAI/utils.py +295 -295
- webscout/Provider/OPENAI/venice.py +436 -436
- webscout/Provider/OPENAI/wisecat.py +387 -387
- webscout/Provider/OPENAI/writecream.py +166 -166
- webscout/Provider/OPENAI/x0gpt.py +378 -378
- webscout/Provider/OPENAI/yep.py +389 -389
- webscout/Provider/OpenGPT.py +230 -230
- webscout/Provider/Openai.py +243 -243
- webscout/Provider/PI.py +405 -405
- webscout/Provider/Perplexitylabs.py +430 -430
- webscout/Provider/QwenLM.py +272 -272
- webscout/Provider/STT/__init__.py +16 -1
- webscout/Provider/Sambanova.py +257 -257
- webscout/Provider/StandardInput.py +309 -309
- webscout/Provider/TTI/README.md +82 -82
- webscout/Provider/TTI/__init__.py +33 -18
- webscout/Provider/TTI/aiarta.py +413 -413
- webscout/Provider/TTI/base.py +136 -136
- webscout/Provider/TTI/bing.py +243 -243
- webscout/Provider/TTI/gpt1image.py +149 -149
- webscout/Provider/TTI/imagen.py +196 -196
- webscout/Provider/TTI/infip.py +211 -211
- webscout/Provider/TTI/magicstudio.py +232 -232
- webscout/Provider/TTI/monochat.py +219 -219
- webscout/Provider/TTI/piclumen.py +214 -214
- webscout/Provider/TTI/pixelmuse.py +232 -232
- webscout/Provider/TTI/pollinations.py +232 -232
- webscout/Provider/TTI/together.py +288 -288
- webscout/Provider/TTI/utils.py +12 -12
- webscout/Provider/TTI/venice.py +367 -367
- webscout/Provider/TTS/README.md +192 -192
- webscout/Provider/TTS/__init__.py +33 -18
- webscout/Provider/TTS/parler.py +110 -110
- webscout/Provider/TTS/streamElements.py +333 -333
- webscout/Provider/TTS/utils.py +280 -280
- webscout/Provider/TeachAnything.py +237 -237
- webscout/Provider/TextPollinationsAI.py +310 -310
- webscout/Provider/TogetherAI.py +356 -356
- webscout/Provider/TwoAI.py +312 -312
- webscout/Provider/TypliAI.py +311 -311
- webscout/Provider/UNFINISHED/ChatHub.py +208 -208
- webscout/Provider/UNFINISHED/ChutesAI.py +313 -313
- webscout/Provider/UNFINISHED/GizAI.py +294 -294
- webscout/Provider/UNFINISHED/Marcus.py +198 -198
- webscout/Provider/UNFINISHED/Qodo.py +477 -477
- webscout/Provider/UNFINISHED/VercelAIGateway.py +338 -338
- webscout/Provider/UNFINISHED/XenAI.py +324 -324
- webscout/Provider/UNFINISHED/Youchat.py +330 -330
- webscout/Provider/UNFINISHED/liner.py +334 -0
- webscout/Provider/UNFINISHED/liner_api_request.py +262 -262
- webscout/Provider/UNFINISHED/puterjs.py +634 -634
- webscout/Provider/UNFINISHED/samurai.py +223 -223
- webscout/Provider/UNFINISHED/test_lmarena.py +119 -119
- webscout/Provider/Venice.py +250 -250
- webscout/Provider/VercelAI.py +256 -256
- webscout/Provider/WiseCat.py +231 -231
- webscout/Provider/WrDoChat.py +366 -366
- webscout/Provider/__init__.py +33 -18
- webscout/Provider/ai4chat.py +174 -174
- webscout/Provider/akashgpt.py +331 -331
- webscout/Provider/cerebras.py +446 -446
- webscout/Provider/chatglm.py +394 -301
- webscout/Provider/cleeai.py +211 -211
- webscout/Provider/elmo.py +282 -282
- webscout/Provider/geminiapi.py +208 -208
- webscout/Provider/granite.py +261 -261
- webscout/Provider/hermes.py +263 -263
- webscout/Provider/julius.py +223 -223
- webscout/Provider/learnfastai.py +309 -309
- webscout/Provider/llama3mitril.py +214 -214
- webscout/Provider/llmchat.py +243 -243
- webscout/Provider/llmchatco.py +290 -290
- webscout/Provider/meta.py +801 -801
- webscout/Provider/oivscode.py +309 -309
- webscout/Provider/scira_chat.py +383 -383
- webscout/Provider/searchchat.py +292 -292
- webscout/Provider/sonus.py +258 -258
- webscout/Provider/toolbaz.py +370 -367
- webscout/Provider/turboseek.py +273 -273
- webscout/Provider/typefully.py +207 -207
- webscout/Provider/yep.py +372 -372
- webscout/__init__.py +27 -31
- webscout/__main__.py +5 -5
- webscout/auth/api_key_manager.py +189 -189
- webscout/auth/config.py +175 -175
- webscout/auth/models.py +185 -185
- webscout/auth/routes.py +663 -664
- webscout/auth/simple_logger.py +236 -236
- webscout/cli.py +523 -523
- webscout/conversation.py +438 -438
- webscout/exceptions.py +361 -361
- webscout/litagent/Readme.md +298 -298
- webscout/litagent/__init__.py +28 -28
- webscout/litagent/agent.py +581 -581
- webscout/litagent/constants.py +59 -59
- webscout/litprinter/__init__.py +58 -58
- webscout/models.py +181 -181
- webscout/optimizers.py +419 -419
- webscout/prompt_manager.py +288 -288
- webscout/sanitize.py +1078 -1078
- webscout/scout/README.md +401 -401
- webscout/scout/__init__.py +8 -8
- webscout/scout/core/__init__.py +6 -6
- webscout/scout/core/crawler.py +297 -297
- webscout/scout/core/scout.py +706 -706
- webscout/scout/core/search_result.py +95 -95
- webscout/scout/core/text_analyzer.py +62 -62
- webscout/scout/core/text_utils.py +277 -277
- webscout/scout/core/web_analyzer.py +51 -51
- webscout/scout/element.py +599 -599
- webscout/scout/parsers/__init__.py +69 -69
- webscout/scout/parsers/html5lib_parser.py +172 -172
- webscout/scout/parsers/html_parser.py +236 -236
- webscout/scout/parsers/lxml_parser.py +178 -178
- webscout/scout/utils.py +37 -37
- webscout/search/__init__.py +51 -0
- webscout/search/base.py +195 -0
- webscout/search/duckduckgo_main.py +54 -0
- webscout/search/engines/__init__.py +48 -0
- webscout/search/engines/bing.py +84 -0
- webscout/search/engines/bing_news.py +52 -0
- webscout/search/engines/brave.py +43 -0
- webscout/search/engines/duckduckgo/__init__.py +25 -0
- webscout/search/engines/duckduckgo/answers.py +78 -0
- webscout/search/engines/duckduckgo/base.py +187 -0
- webscout/search/engines/duckduckgo/images.py +97 -0
- webscout/search/engines/duckduckgo/maps.py +168 -0
- webscout/search/engines/duckduckgo/news.py +68 -0
- webscout/search/engines/duckduckgo/suggestions.py +21 -0
- webscout/search/engines/duckduckgo/text.py +211 -0
- webscout/search/engines/duckduckgo/translate.py +47 -0
- webscout/search/engines/duckduckgo/videos.py +63 -0
- webscout/search/engines/duckduckgo/weather.py +74 -0
- webscout/search/engines/mojeek.py +37 -0
- webscout/search/engines/wikipedia.py +56 -0
- webscout/search/engines/yahoo.py +65 -0
- webscout/search/engines/yahoo_news.py +64 -0
- webscout/search/engines/yandex.py +43 -0
- webscout/search/engines/yep/__init__.py +13 -0
- webscout/search/engines/yep/base.py +32 -0
- webscout/search/engines/yep/images.py +99 -0
- webscout/search/engines/yep/suggestions.py +35 -0
- webscout/search/engines/yep/text.py +114 -0
- webscout/search/http_client.py +156 -0
- webscout/search/results.py +137 -0
- webscout/search/yep_main.py +44 -0
- webscout/swiftcli/Readme.md +323 -323
- webscout/swiftcli/__init__.py +95 -95
- webscout/swiftcli/core/__init__.py +7 -7
- webscout/swiftcli/core/cli.py +308 -308
- webscout/swiftcli/core/context.py +104 -104
- webscout/swiftcli/core/group.py +241 -241
- webscout/swiftcli/decorators/__init__.py +28 -28
- webscout/swiftcli/decorators/command.py +221 -221
- webscout/swiftcli/decorators/options.py +220 -220
- webscout/swiftcli/decorators/output.py +302 -302
- webscout/swiftcli/exceptions.py +21 -21
- webscout/swiftcli/plugins/__init__.py +9 -9
- webscout/swiftcli/plugins/base.py +135 -135
- webscout/swiftcli/plugins/manager.py +269 -269
- webscout/swiftcli/utils/__init__.py +59 -59
- webscout/swiftcli/utils/formatting.py +252 -252
- webscout/swiftcli/utils/parsing.py +267 -267
- webscout/update_checker.py +117 -117
- webscout/version.py +1 -1
- webscout/version.py.bak +2 -0
- webscout/zeroart/README.md +89 -89
- webscout/zeroart/__init__.py +134 -134
- webscout/zeroart/base.py +66 -66
- webscout/zeroart/effects.py +100 -100
- webscout/zeroart/fonts.py +1238 -1238
- {webscout-8.3.7.dist-info → webscout-2025.10.13.dist-info}/METADATA +936 -937
- webscout-2025.10.13.dist-info/RECORD +329 -0
- webscout/Provider/AISEARCH/DeepFind.py +0 -254
- webscout/Provider/OPENAI/Qwen3.py +0 -303
- webscout/Provider/OPENAI/qodo.py +0 -630
- webscout/Provider/OPENAI/xenai.py +0 -514
- webscout/tempid.py +0 -134
- webscout/webscout_search.py +0 -1183
- webscout/webscout_search_async.py +0 -649
- webscout/yep_search.py +0 -346
- webscout-8.3.7.dist-info/RECORD +0 -301
- {webscout-8.3.7.dist-info → webscout-2025.10.13.dist-info}/WHEEL +0 -0
- {webscout-8.3.7.dist-info → webscout-2025.10.13.dist-info}/entry_points.txt +0 -0
- {webscout-8.3.7.dist-info → webscout-2025.10.13.dist-info}/licenses/LICENSE.md +0 -0
- {webscout-8.3.7.dist-info → webscout-2025.10.13.dist-info}/top_level.txt +0 -0
webscout/Bing_search.py
CHANGED
|
@@ -1,417 +1,417 @@
|
|
|
1
|
-
"""
|
|
2
|
-
BingSearch - A Bing search library with advanced features
|
|
3
|
-
"""
|
|
4
|
-
from time import sleep
|
|
5
|
-
from curl_cffi.requests import Session
|
|
6
|
-
from urllib.parse import urlencode, unquote, urlparse, parse_qs
|
|
7
|
-
import base64
|
|
8
|
-
from typing import List, Dict, Optional, Any
|
|
9
|
-
from concurrent.futures import ThreadPoolExecutor
|
|
10
|
-
from webscout.litagent import LitAgent
|
|
11
|
-
class BingSearchResult:
|
|
12
|
-
"""Class to represent a Bing search result with metadata."""
|
|
13
|
-
def __init__(self, url: str, title: str, description: str):
|
|
14
|
-
self.url = url
|
|
15
|
-
self.title = title
|
|
16
|
-
self.description = description
|
|
17
|
-
self.metadata: Dict[str, Any] = {}
|
|
18
|
-
|
|
19
|
-
def __repr__(self) -> str:
|
|
20
|
-
return f"BingSearchResult(url={self.url}, title={self.title}, description={self.description})"
|
|
21
|
-
|
|
22
|
-
class BingImageResult:
|
|
23
|
-
"""Class to represent a Bing image search result."""
|
|
24
|
-
def __init__(self, title: str, image: str, thumbnail: str, url: str, source: str):
|
|
25
|
-
self.title = title
|
|
26
|
-
self.image = image
|
|
27
|
-
self.thumbnail = thumbnail
|
|
28
|
-
self.url = url
|
|
29
|
-
self.source = source
|
|
30
|
-
def __repr__(self):
|
|
31
|
-
return f"BingImageResult(title={self.title}, image={self.image}, url={self.url}, source={self.source})"
|
|
32
|
-
|
|
33
|
-
class BingNewsResult:
|
|
34
|
-
"""Class to represent a Bing news search result."""
|
|
35
|
-
def __init__(self, title: str, url: str, description: str, source: str = ""):
|
|
36
|
-
self.title = title
|
|
37
|
-
self.url = url
|
|
38
|
-
self.description = description
|
|
39
|
-
self.source = source
|
|
40
|
-
def __repr__(self):
|
|
41
|
-
return f"BingNewsResult(title={self.title}, url={self.url}, source={self.source})"
|
|
42
|
-
|
|
43
|
-
class BingSearch:
|
|
44
|
-
"""Bing search implementation with configurable parameters and advanced features."""
|
|
45
|
-
_executor: ThreadPoolExecutor = ThreadPoolExecutor()
|
|
46
|
-
|
|
47
|
-
def __init__(
|
|
48
|
-
self,
|
|
49
|
-
timeout: int = 10,
|
|
50
|
-
proxies: Optional[Dict[str, str]] = None,
|
|
51
|
-
verify: bool = True,
|
|
52
|
-
lang: str = "en-US",
|
|
53
|
-
sleep_interval: float = 0.0,
|
|
54
|
-
impersonate: str = "chrome110"
|
|
55
|
-
):
|
|
56
|
-
self.timeout = timeout
|
|
57
|
-
self.proxies = proxies if proxies else {}
|
|
58
|
-
self.verify = verify
|
|
59
|
-
self.lang = lang
|
|
60
|
-
self.sleep_interval = sleep_interval
|
|
61
|
-
self._base_url = "https://www.bing.com"
|
|
62
|
-
self.session = Session(
|
|
63
|
-
proxies=self.proxies,
|
|
64
|
-
verify=self.verify,
|
|
65
|
-
timeout=self.timeout,
|
|
66
|
-
impersonate=impersonate
|
|
67
|
-
)
|
|
68
|
-
self.session.headers.update(LitAgent().generate_fingerprint())
|
|
69
|
-
|
|
70
|
-
def _selectors(self, element):
|
|
71
|
-
selectors = {
|
|
72
|
-
'url': 'h2 a',
|
|
73
|
-
'title': 'h2',
|
|
74
|
-
'text': 'p',
|
|
75
|
-
'links': 'ol#b_results > li.b_algo',
|
|
76
|
-
'next': 'div#b_content nav[role="navigation"] a.sb_pagN'
|
|
77
|
-
}
|
|
78
|
-
return selectors[element]
|
|
79
|
-
|
|
80
|
-
def _first_page(self, query):
|
|
81
|
-
url = f'{self._base_url}/search?q={query}&search=&form=QBLH'
|
|
82
|
-
return {'url': url, 'data': None}
|
|
83
|
-
|
|
84
|
-
def _next_page(self, soup):
|
|
85
|
-
selector = self._selectors('next')
|
|
86
|
-
next_page_tag = soup.select_one(selector)
|
|
87
|
-
url = None
|
|
88
|
-
if next_page_tag and next_page_tag.get('href'):
|
|
89
|
-
url = self._base_url + next_page_tag['href']
|
|
90
|
-
return {'url': url, 'data': None}
|
|
91
|
-
|
|
92
|
-
def _get_url(self, tag):
|
|
93
|
-
url = tag.get('href', '')
|
|
94
|
-
resp = url
|
|
95
|
-
try:
|
|
96
|
-
parsed_url = urlparse(url)
|
|
97
|
-
query_params = parse_qs(parsed_url.query)
|
|
98
|
-
if "u" in query_params:
|
|
99
|
-
encoded_url = query_params["u"][0][2:]
|
|
100
|
-
try:
|
|
101
|
-
decoded_bytes = base64.urlsafe_b64decode(encoded_url + '===')
|
|
102
|
-
except base64.binascii.Error as e:
|
|
103
|
-
print(f"Error decoding Base64 string: {e}")
|
|
104
|
-
return url
|
|
105
|
-
resp = decoded_bytes.decode('utf-8')
|
|
106
|
-
except Exception as e:
|
|
107
|
-
print(f"Error decoding Base64 string: {e}")
|
|
108
|
-
return resp
|
|
109
|
-
|
|
110
|
-
def _make_request(self, term: str, results: int, start: int = 0) -> str:
|
|
111
|
-
params = {
|
|
112
|
-
"q": term,
|
|
113
|
-
"count": results,
|
|
114
|
-
"first": start + 1,
|
|
115
|
-
"setlang": self.lang,
|
|
116
|
-
}
|
|
117
|
-
url = self._base_url + "/search"
|
|
118
|
-
try:
|
|
119
|
-
resp = self.session.get(
|
|
120
|
-
url=url,
|
|
121
|
-
params=params,
|
|
122
|
-
)
|
|
123
|
-
resp.raise_for_status()
|
|
124
|
-
return resp.text
|
|
125
|
-
except Exception as e:
|
|
126
|
-
if hasattr(e, 'response') and e.response is not None:
|
|
127
|
-
raise Exception(f"Bing search failed with status {e.response.status_code}: {str(e)}")
|
|
128
|
-
else:
|
|
129
|
-
raise Exception(f"Bing search failed: {str(e)}")
|
|
130
|
-
|
|
131
|
-
def text(
|
|
132
|
-
self,
|
|
133
|
-
keywords: str,
|
|
134
|
-
region: str = None,
|
|
135
|
-
safesearch: str = "moderate",
|
|
136
|
-
max_results: int = 10,
|
|
137
|
-
unique: bool = True
|
|
138
|
-
) -> List[BingSearchResult]:
|
|
139
|
-
"""
|
|
140
|
-
Perform a text search on Bing.
|
|
141
|
-
|
|
142
|
-
Args:
|
|
143
|
-
keywords (str): The search keywords.
|
|
144
|
-
region (str, optional): The region for the search. Defaults to None.
|
|
145
|
-
safesearch (str): The safe search level ("on", "moderate", "off"). Defaults to "moderate".
|
|
146
|
-
max_results (int): The maximum number of results to fetch. Defaults to 10.
|
|
147
|
-
unique (bool): Whether to exclude duplicate URLs from the results. Defaults to True.
|
|
148
|
-
|
|
149
|
-
Returns:
|
|
150
|
-
List[BingSearchResult]: A list of Bing search results.
|
|
151
|
-
"""
|
|
152
|
-
if not keywords:
|
|
153
|
-
raise ValueError("Search keywords cannot be empty")
|
|
154
|
-
from bs4 import BeautifulSoup
|
|
155
|
-
safe_map = {
|
|
156
|
-
"on": "Strict",
|
|
157
|
-
"moderate": "Moderate",
|
|
158
|
-
"off": "Off"
|
|
159
|
-
}
|
|
160
|
-
safe = safe_map.get(safesearch.lower(), "Moderate")
|
|
161
|
-
fetched_results = []
|
|
162
|
-
fetched_links = set()
|
|
163
|
-
def fetch_page(url):
|
|
164
|
-
try:
|
|
165
|
-
resp = self.session.get(url)
|
|
166
|
-
resp.raise_for_status()
|
|
167
|
-
return resp.text
|
|
168
|
-
except Exception as e:
|
|
169
|
-
if hasattr(e, 'response') and e.response is not None:
|
|
170
|
-
raise Exception(f"Bing search failed with status {e.response.status_code}: {str(e)}")
|
|
171
|
-
else:
|
|
172
|
-
raise Exception(f"Bing search failed: {str(e)}")
|
|
173
|
-
|
|
174
|
-
# Fix: get the first page URL
|
|
175
|
-
url = self._first_page(keywords)['url']
|
|
176
|
-
urls_to_fetch = [url]
|
|
177
|
-
while len(fetched_results) < max_results and urls_to_fetch:
|
|
178
|
-
html_pages = list(self._executor.map(fetch_page, urls_to_fetch))
|
|
179
|
-
urls_to_fetch = []
|
|
180
|
-
for html in html_pages:
|
|
181
|
-
soup = BeautifulSoup(html, "html.parser")
|
|
182
|
-
selector_links = self._selectors('links')
|
|
183
|
-
result_blocks = soup.select(selector_links)
|
|
184
|
-
for result in result_blocks:
|
|
185
|
-
link_tag = result.select_one(self._selectors('url'))
|
|
186
|
-
if not link_tag:
|
|
187
|
-
continue
|
|
188
|
-
url_val = self._get_url(link_tag)
|
|
189
|
-
title_tag = result.select_one(self._selectors('title'))
|
|
190
|
-
title = title_tag.get_text(strip=True) if title_tag else ''
|
|
191
|
-
desc_tag = result.select_one(self._selectors('text'))
|
|
192
|
-
description = desc_tag.get_text(strip=True) if desc_tag else ''
|
|
193
|
-
if url_val and title:
|
|
194
|
-
if unique and url_val in fetched_links:
|
|
195
|
-
continue
|
|
196
|
-
fetched_results.append(BingSearchResult(url_val, title, description))
|
|
197
|
-
fetched_links.add(url_val)
|
|
198
|
-
if len(fetched_results) >= max_results:
|
|
199
|
-
break
|
|
200
|
-
if len(fetched_results) >= max_results:
|
|
201
|
-
break
|
|
202
|
-
next_page_info = self._next_page(soup)
|
|
203
|
-
if next_page_info['url']:
|
|
204
|
-
urls_to_fetch.append(next_page_info['url'])
|
|
205
|
-
sleep(self.sleep_interval)
|
|
206
|
-
next_page_info = self._next_page(soup)
|
|
207
|
-
url = next_page_info['url']
|
|
208
|
-
sleep(self.sleep_interval)
|
|
209
|
-
return fetched_results[:max_results]
|
|
210
|
-
|
|
211
|
-
def suggestions(self, query: str, region: str = None) -> List[str]:
|
|
212
|
-
"""
|
|
213
|
-
Fetches search suggestions for a given query.
|
|
214
|
-
|
|
215
|
-
Args:
|
|
216
|
-
query (str): The search query for which suggestions are needed.
|
|
217
|
-
region (str, optional): The region code (e.g., "en-US") for localized suggestions.
|
|
218
|
-
|
|
219
|
-
Returns:
|
|
220
|
-
List[str]: A list of suggestion strings related to the query.
|
|
221
|
-
"""
|
|
222
|
-
if not query:
|
|
223
|
-
raise ValueError("Search query cannot be empty")
|
|
224
|
-
params = {
|
|
225
|
-
"query": query,
|
|
226
|
-
"mkt": region if region else "en-US"
|
|
227
|
-
}
|
|
228
|
-
url = f"https://api.bing.com/osjson.aspx?{urlencode(params)}"
|
|
229
|
-
try:
|
|
230
|
-
resp = self.session.get(url)
|
|
231
|
-
resp.raise_for_status()
|
|
232
|
-
data = resp.json()
|
|
233
|
-
if isinstance(data, list) and len(data) > 1 and isinstance(data[1], list):
|
|
234
|
-
return data[1]
|
|
235
|
-
return []
|
|
236
|
-
except Exception as e:
|
|
237
|
-
if hasattr(e, 'response') and e.response is not None:
|
|
238
|
-
raise Exception(f"Bing suggestions failed with status {e.response.status_code}: {str(e)}")
|
|
239
|
-
else:
|
|
240
|
-
raise Exception(f"Bing suggestions failed: {str(e)}")
|
|
241
|
-
|
|
242
|
-
def images(
|
|
243
|
-
self,
|
|
244
|
-
keywords: str,
|
|
245
|
-
region: str = None,
|
|
246
|
-
safesearch: str = "moderate",
|
|
247
|
-
max_results: int = 10
|
|
248
|
-
) -> List[BingImageResult]:
|
|
249
|
-
"""
|
|
250
|
-
Perform an image search on Bing.
|
|
251
|
-
|
|
252
|
-
Args:
|
|
253
|
-
keywords (str): The search keywords.
|
|
254
|
-
region (str, optional): The region for the search. Defaults to None.
|
|
255
|
-
safesearch (str): The safe search level ("on", "moderate", "off"). Defaults to "moderate".
|
|
256
|
-
max_results (int): The maximum number of results to fetch. Defaults to 10.
|
|
257
|
-
|
|
258
|
-
Returns:
|
|
259
|
-
List[BingImageResult]: A list of Bing image search results.
|
|
260
|
-
"""
|
|
261
|
-
if not keywords:
|
|
262
|
-
raise ValueError("Search keywords cannot be empty")
|
|
263
|
-
from bs4 import BeautifulSoup
|
|
264
|
-
safe_map = {
|
|
265
|
-
"on": "Strict",
|
|
266
|
-
"moderate": "Moderate",
|
|
267
|
-
"off": "Off"
|
|
268
|
-
}
|
|
269
|
-
safe = safe_map.get(safesearch.lower(), "Moderate")
|
|
270
|
-
params = {
|
|
271
|
-
"q": keywords,
|
|
272
|
-
"count": max_results,
|
|
273
|
-
"setlang": self.lang,
|
|
274
|
-
"safeSearch": safe,
|
|
275
|
-
}
|
|
276
|
-
if region:
|
|
277
|
-
params["mkt"] = region
|
|
278
|
-
url = f"{self._base_url}/images/search?{urlencode(params)}"
|
|
279
|
-
try:
|
|
280
|
-
resp = self.session.get(url)
|
|
281
|
-
resp.raise_for_status()
|
|
282
|
-
html = resp.text
|
|
283
|
-
except Exception as e:
|
|
284
|
-
if hasattr(e, 'response') and e.response is not None:
|
|
285
|
-
raise Exception(f"Bing image search failed with status {e.response.status_code}: {str(e)}")
|
|
286
|
-
else:
|
|
287
|
-
raise Exception(f"Bing image search failed: {str(e)}")
|
|
288
|
-
soup = BeautifulSoup(html, "html.parser")
|
|
289
|
-
results = []
|
|
290
|
-
for item in soup.select("a.iusc"):
|
|
291
|
-
try:
|
|
292
|
-
m = item.get("m")
|
|
293
|
-
import json
|
|
294
|
-
meta = json.loads(m) if m else {}
|
|
295
|
-
image_url = meta.get("murl", "")
|
|
296
|
-
thumb_url = meta.get("turl", "")
|
|
297
|
-
title = meta.get("t", "")
|
|
298
|
-
page_url = meta.get("purl", "")
|
|
299
|
-
source = meta.get("surl", "")
|
|
300
|
-
if image_url:
|
|
301
|
-
results.append(BingImageResult(title, image_url, thumb_url, page_url, source))
|
|
302
|
-
if len(results) >= max_results:
|
|
303
|
-
break
|
|
304
|
-
except Exception:
|
|
305
|
-
continue
|
|
306
|
-
return results[:max_results]
|
|
307
|
-
|
|
308
|
-
def news(
|
|
309
|
-
self,
|
|
310
|
-
keywords: str,
|
|
311
|
-
region: str = None,
|
|
312
|
-
safesearch: str = "moderate",
|
|
313
|
-
max_results: int = 10,
|
|
314
|
-
) -> List['BingNewsResult']:
|
|
315
|
-
"""Bing news search."""
|
|
316
|
-
if not keywords:
|
|
317
|
-
raise ValueError("Search keywords cannot be empty")
|
|
318
|
-
from bs4 import BeautifulSoup
|
|
319
|
-
safe_map = {
|
|
320
|
-
"on": "Strict",
|
|
321
|
-
"moderate": "Moderate",
|
|
322
|
-
"off": "Off"
|
|
323
|
-
}
|
|
324
|
-
safe = safe_map.get(safesearch.lower(), "Moderate")
|
|
325
|
-
params = {
|
|
326
|
-
"q": keywords,
|
|
327
|
-
"form": "QBNH",
|
|
328
|
-
"safeSearch": safe,
|
|
329
|
-
}
|
|
330
|
-
if region:
|
|
331
|
-
params["mkt"] = region
|
|
332
|
-
url = f"{self._base_url}/news/search?{urlencode(params)}"
|
|
333
|
-
try:
|
|
334
|
-
resp = self.session.get(url)
|
|
335
|
-
resp.raise_for_status()
|
|
336
|
-
except Exception as e:
|
|
337
|
-
if hasattr(e, 'response') and e.response is not None:
|
|
338
|
-
raise Exception(f"Bing news search failed with status {e.response.status_code}: {str(e)}")
|
|
339
|
-
else:
|
|
340
|
-
raise Exception(f"Bing news search failed: {str(e)}")
|
|
341
|
-
soup = BeautifulSoup(resp.text, "html.parser")
|
|
342
|
-
results = []
|
|
343
|
-
for item in soup.select("div.news-card, div.card, div.newsitem, div.card-content, div.t_s_main"):
|
|
344
|
-
a_tag = item.find("a")
|
|
345
|
-
title = a_tag.get_text(strip=True) if a_tag else ''
|
|
346
|
-
url_val = a_tag['href'] if a_tag and a_tag.has_attr('href') else ''
|
|
347
|
-
desc_tag = item.find("div", class_="snippet") or item.find("div", class_="news-card-snippet") or item.find("div", class_="snippetText")
|
|
348
|
-
description = desc_tag.get_text(strip=True) if desc_tag else ''
|
|
349
|
-
source_tag = item.find("div", class_="source")
|
|
350
|
-
source = source_tag.get_text(strip=True) if source_tag else ''
|
|
351
|
-
if url_val and title:
|
|
352
|
-
results.append(BingNewsResult(title, url_val, description, source))
|
|
353
|
-
if len(results) >= max_results:
|
|
354
|
-
break
|
|
355
|
-
# Fallback: try main news list if above selectors fail
|
|
356
|
-
if not results:
|
|
357
|
-
for item in soup.select("a.title"):
|
|
358
|
-
title = item.get_text(strip=True)
|
|
359
|
-
url_val = item['href'] if item.has_attr('href') else ''
|
|
360
|
-
description = ''
|
|
361
|
-
source = ''
|
|
362
|
-
if url_val and title:
|
|
363
|
-
results.append(BingNewsResult(title, url_val, description, source))
|
|
364
|
-
if len(results) >= max_results:
|
|
365
|
-
break
|
|
366
|
-
return results[:max_results]
|
|
367
|
-
|
|
368
|
-
if __name__ == "__main__":
|
|
369
|
-
from rich import print
|
|
370
|
-
bing = BingSearch(
|
|
371
|
-
timeout=10,
|
|
372
|
-
proxies=None,
|
|
373
|
-
verify=True
|
|
374
|
-
)
|
|
375
|
-
print("TEXT SEARCH RESULTS:")
|
|
376
|
-
text_results = bing.text(
|
|
377
|
-
keywords="Python programming",
|
|
378
|
-
region="us",
|
|
379
|
-
safesearch="moderate",
|
|
380
|
-
max_results=30
|
|
381
|
-
)
|
|
382
|
-
for result in text_results:
|
|
383
|
-
print(f"Title: {result.title}")
|
|
384
|
-
print(f"URL: {result.url}")
|
|
385
|
-
print(f"Description: {result.description}")
|
|
386
|
-
print("---")
|
|
387
|
-
print("\nSEARCH SUGGESTIONS:")
|
|
388
|
-
suggestions = bing.suggestions("how to")
|
|
389
|
-
print(suggestions)
|
|
390
|
-
|
|
391
|
-
print("\nIMAGE SEARCH RESULTS:")
|
|
392
|
-
image_results = bing.images(
|
|
393
|
-
keywords="Python programming",
|
|
394
|
-
region="us",
|
|
395
|
-
safesearch="moderate",
|
|
396
|
-
max_results=10
|
|
397
|
-
)
|
|
398
|
-
for result in image_results:
|
|
399
|
-
print(f"Title: {result.title}")
|
|
400
|
-
print(f"Image URL: {result.image}")
|
|
401
|
-
print(f"Page URL: {result.url}")
|
|
402
|
-
print(f"Source: {result.source}")
|
|
403
|
-
print("---")
|
|
404
|
-
|
|
405
|
-
print("\nNEWS SEARCH RESULTS:")
|
|
406
|
-
news_results = bing.news(
|
|
407
|
-
keywords="Python programming",
|
|
408
|
-
region="us",
|
|
409
|
-
safesearch="moderate",
|
|
410
|
-
max_results=10
|
|
411
|
-
)
|
|
412
|
-
for result in news_results:
|
|
413
|
-
print(f"Title: {result.title}")
|
|
414
|
-
print(f"URL: {result.url}")
|
|
415
|
-
print(f"Description: {result.description}")
|
|
416
|
-
print(f"Source: {result.source}")
|
|
417
|
-
print("---")
|
|
1
|
+
"""
|
|
2
|
+
BingSearch - A Bing search library with advanced features
|
|
3
|
+
"""
|
|
4
|
+
from time import sleep
|
|
5
|
+
from curl_cffi.requests import Session
|
|
6
|
+
from urllib.parse import urlencode, unquote, urlparse, parse_qs
|
|
7
|
+
import base64
|
|
8
|
+
from typing import List, Dict, Optional, Any
|
|
9
|
+
from concurrent.futures import ThreadPoolExecutor
|
|
10
|
+
from webscout.litagent import LitAgent
|
|
11
|
+
class BingSearchResult:
|
|
12
|
+
"""Class to represent a Bing search result with metadata."""
|
|
13
|
+
def __init__(self, url: str, title: str, description: str):
|
|
14
|
+
self.url = url
|
|
15
|
+
self.title = title
|
|
16
|
+
self.description = description
|
|
17
|
+
self.metadata: Dict[str, Any] = {}
|
|
18
|
+
|
|
19
|
+
def __repr__(self) -> str:
|
|
20
|
+
return f"BingSearchResult(url={self.url}, title={self.title}, description={self.description})"
|
|
21
|
+
|
|
22
|
+
class BingImageResult:
|
|
23
|
+
"""Class to represent a Bing image search result."""
|
|
24
|
+
def __init__(self, title: str, image: str, thumbnail: str, url: str, source: str):
|
|
25
|
+
self.title = title
|
|
26
|
+
self.image = image
|
|
27
|
+
self.thumbnail = thumbnail
|
|
28
|
+
self.url = url
|
|
29
|
+
self.source = source
|
|
30
|
+
def __repr__(self):
|
|
31
|
+
return f"BingImageResult(title={self.title}, image={self.image}, url={self.url}, source={self.source})"
|
|
32
|
+
|
|
33
|
+
class BingNewsResult:
|
|
34
|
+
"""Class to represent a Bing news search result."""
|
|
35
|
+
def __init__(self, title: str, url: str, description: str, source: str = ""):
|
|
36
|
+
self.title = title
|
|
37
|
+
self.url = url
|
|
38
|
+
self.description = description
|
|
39
|
+
self.source = source
|
|
40
|
+
def __repr__(self):
|
|
41
|
+
return f"BingNewsResult(title={self.title}, url={self.url}, source={self.source})"
|
|
42
|
+
|
|
43
|
+
class BingSearch:
|
|
44
|
+
"""Bing search implementation with configurable parameters and advanced features."""
|
|
45
|
+
_executor: ThreadPoolExecutor = ThreadPoolExecutor()
|
|
46
|
+
|
|
47
|
+
def __init__(
|
|
48
|
+
self,
|
|
49
|
+
timeout: int = 10,
|
|
50
|
+
proxies: Optional[Dict[str, str]] = None,
|
|
51
|
+
verify: bool = True,
|
|
52
|
+
lang: str = "en-US",
|
|
53
|
+
sleep_interval: float = 0.0,
|
|
54
|
+
impersonate: str = "chrome110"
|
|
55
|
+
):
|
|
56
|
+
self.timeout = timeout
|
|
57
|
+
self.proxies = proxies if proxies else {}
|
|
58
|
+
self.verify = verify
|
|
59
|
+
self.lang = lang
|
|
60
|
+
self.sleep_interval = sleep_interval
|
|
61
|
+
self._base_url = "https://www.bing.com"
|
|
62
|
+
self.session = Session(
|
|
63
|
+
proxies=self.proxies,
|
|
64
|
+
verify=self.verify,
|
|
65
|
+
timeout=self.timeout,
|
|
66
|
+
impersonate=impersonate
|
|
67
|
+
)
|
|
68
|
+
self.session.headers.update(LitAgent().generate_fingerprint())
|
|
69
|
+
|
|
70
|
+
def _selectors(self, element):
|
|
71
|
+
selectors = {
|
|
72
|
+
'url': 'h2 a',
|
|
73
|
+
'title': 'h2',
|
|
74
|
+
'text': 'p',
|
|
75
|
+
'links': 'ol#b_results > li.b_algo',
|
|
76
|
+
'next': 'div#b_content nav[role="navigation"] a.sb_pagN'
|
|
77
|
+
}
|
|
78
|
+
return selectors[element]
|
|
79
|
+
|
|
80
|
+
def _first_page(self, query):
|
|
81
|
+
url = f'{self._base_url}/search?q={query}&search=&form=QBLH'
|
|
82
|
+
return {'url': url, 'data': None}
|
|
83
|
+
|
|
84
|
+
def _next_page(self, soup):
|
|
85
|
+
selector = self._selectors('next')
|
|
86
|
+
next_page_tag = soup.select_one(selector)
|
|
87
|
+
url = None
|
|
88
|
+
if next_page_tag and next_page_tag.get('href'):
|
|
89
|
+
url = self._base_url + next_page_tag['href']
|
|
90
|
+
return {'url': url, 'data': None}
|
|
91
|
+
|
|
92
|
+
def _get_url(self, tag):
|
|
93
|
+
url = tag.get('href', '')
|
|
94
|
+
resp = url
|
|
95
|
+
try:
|
|
96
|
+
parsed_url = urlparse(url)
|
|
97
|
+
query_params = parse_qs(parsed_url.query)
|
|
98
|
+
if "u" in query_params:
|
|
99
|
+
encoded_url = query_params["u"][0][2:]
|
|
100
|
+
try:
|
|
101
|
+
decoded_bytes = base64.urlsafe_b64decode(encoded_url + '===')
|
|
102
|
+
except base64.binascii.Error as e:
|
|
103
|
+
print(f"Error decoding Base64 string: {e}")
|
|
104
|
+
return url
|
|
105
|
+
resp = decoded_bytes.decode('utf-8')
|
|
106
|
+
except Exception as e:
|
|
107
|
+
print(f"Error decoding Base64 string: {e}")
|
|
108
|
+
return resp
|
|
109
|
+
|
|
110
|
+
def _make_request(self, term: str, results: int, start: int = 0) -> str:
|
|
111
|
+
params = {
|
|
112
|
+
"q": term,
|
|
113
|
+
"count": results,
|
|
114
|
+
"first": start + 1,
|
|
115
|
+
"setlang": self.lang,
|
|
116
|
+
}
|
|
117
|
+
url = self._base_url + "/search"
|
|
118
|
+
try:
|
|
119
|
+
resp = self.session.get(
|
|
120
|
+
url=url,
|
|
121
|
+
params=params,
|
|
122
|
+
)
|
|
123
|
+
resp.raise_for_status()
|
|
124
|
+
return resp.text
|
|
125
|
+
except Exception as e:
|
|
126
|
+
if hasattr(e, 'response') and e.response is not None:
|
|
127
|
+
raise Exception(f"Bing search failed with status {e.response.status_code}: {str(e)}")
|
|
128
|
+
else:
|
|
129
|
+
raise Exception(f"Bing search failed: {str(e)}")
|
|
130
|
+
|
|
131
|
+
def text(
|
|
132
|
+
self,
|
|
133
|
+
keywords: str,
|
|
134
|
+
region: str = None,
|
|
135
|
+
safesearch: str = "moderate",
|
|
136
|
+
max_results: int = 10,
|
|
137
|
+
unique: bool = True
|
|
138
|
+
) -> List[BingSearchResult]:
|
|
139
|
+
"""
|
|
140
|
+
Perform a text search on Bing.
|
|
141
|
+
|
|
142
|
+
Args:
|
|
143
|
+
keywords (str): The search keywords.
|
|
144
|
+
region (str, optional): The region for the search. Defaults to None.
|
|
145
|
+
safesearch (str): The safe search level ("on", "moderate", "off"). Defaults to "moderate".
|
|
146
|
+
max_results (int): The maximum number of results to fetch. Defaults to 10.
|
|
147
|
+
unique (bool): Whether to exclude duplicate URLs from the results. Defaults to True.
|
|
148
|
+
|
|
149
|
+
Returns:
|
|
150
|
+
List[BingSearchResult]: A list of Bing search results.
|
|
151
|
+
"""
|
|
152
|
+
if not keywords:
|
|
153
|
+
raise ValueError("Search keywords cannot be empty")
|
|
154
|
+
from bs4 import BeautifulSoup
|
|
155
|
+
safe_map = {
|
|
156
|
+
"on": "Strict",
|
|
157
|
+
"moderate": "Moderate",
|
|
158
|
+
"off": "Off"
|
|
159
|
+
}
|
|
160
|
+
safe = safe_map.get(safesearch.lower(), "Moderate")
|
|
161
|
+
fetched_results = []
|
|
162
|
+
fetched_links = set()
|
|
163
|
+
def fetch_page(url):
|
|
164
|
+
try:
|
|
165
|
+
resp = self.session.get(url)
|
|
166
|
+
resp.raise_for_status()
|
|
167
|
+
return resp.text
|
|
168
|
+
except Exception as e:
|
|
169
|
+
if hasattr(e, 'response') and e.response is not None:
|
|
170
|
+
raise Exception(f"Bing search failed with status {e.response.status_code}: {str(e)}")
|
|
171
|
+
else:
|
|
172
|
+
raise Exception(f"Bing search failed: {str(e)}")
|
|
173
|
+
|
|
174
|
+
# Fix: get the first page URL
|
|
175
|
+
url = self._first_page(keywords)['url']
|
|
176
|
+
urls_to_fetch = [url]
|
|
177
|
+
while len(fetched_results) < max_results and urls_to_fetch:
|
|
178
|
+
html_pages = list(self._executor.map(fetch_page, urls_to_fetch))
|
|
179
|
+
urls_to_fetch = []
|
|
180
|
+
for html in html_pages:
|
|
181
|
+
soup = BeautifulSoup(html, "html.parser")
|
|
182
|
+
selector_links = self._selectors('links')
|
|
183
|
+
result_blocks = soup.select(selector_links)
|
|
184
|
+
for result in result_blocks:
|
|
185
|
+
link_tag = result.select_one(self._selectors('url'))
|
|
186
|
+
if not link_tag:
|
|
187
|
+
continue
|
|
188
|
+
url_val = self._get_url(link_tag)
|
|
189
|
+
title_tag = result.select_one(self._selectors('title'))
|
|
190
|
+
title = title_tag.get_text(strip=True) if title_tag else ''
|
|
191
|
+
desc_tag = result.select_one(self._selectors('text'))
|
|
192
|
+
description = desc_tag.get_text(strip=True) if desc_tag else ''
|
|
193
|
+
if url_val and title:
|
|
194
|
+
if unique and url_val in fetched_links:
|
|
195
|
+
continue
|
|
196
|
+
fetched_results.append(BingSearchResult(url_val, title, description))
|
|
197
|
+
fetched_links.add(url_val)
|
|
198
|
+
if len(fetched_results) >= max_results:
|
|
199
|
+
break
|
|
200
|
+
if len(fetched_results) >= max_results:
|
|
201
|
+
break
|
|
202
|
+
next_page_info = self._next_page(soup)
|
|
203
|
+
if next_page_info['url']:
|
|
204
|
+
urls_to_fetch.append(next_page_info['url'])
|
|
205
|
+
sleep(self.sleep_interval)
|
|
206
|
+
next_page_info = self._next_page(soup)
|
|
207
|
+
url = next_page_info['url']
|
|
208
|
+
sleep(self.sleep_interval)
|
|
209
|
+
return fetched_results[:max_results]
|
|
210
|
+
|
|
211
|
+
def suggestions(self, query: str, region: str = None) -> List[str]:
|
|
212
|
+
"""
|
|
213
|
+
Fetches search suggestions for a given query.
|
|
214
|
+
|
|
215
|
+
Args:
|
|
216
|
+
query (str): The search query for which suggestions are needed.
|
|
217
|
+
region (str, optional): The region code (e.g., "en-US") for localized suggestions.
|
|
218
|
+
|
|
219
|
+
Returns:
|
|
220
|
+
List[str]: A list of suggestion strings related to the query.
|
|
221
|
+
"""
|
|
222
|
+
if not query:
|
|
223
|
+
raise ValueError("Search query cannot be empty")
|
|
224
|
+
params = {
|
|
225
|
+
"query": query,
|
|
226
|
+
"mkt": region if region else "en-US"
|
|
227
|
+
}
|
|
228
|
+
url = f"https://api.bing.com/osjson.aspx?{urlencode(params)}"
|
|
229
|
+
try:
|
|
230
|
+
resp = self.session.get(url)
|
|
231
|
+
resp.raise_for_status()
|
|
232
|
+
data = resp.json()
|
|
233
|
+
if isinstance(data, list) and len(data) > 1 and isinstance(data[1], list):
|
|
234
|
+
return data[1]
|
|
235
|
+
return []
|
|
236
|
+
except Exception as e:
|
|
237
|
+
if hasattr(e, 'response') and e.response is not None:
|
|
238
|
+
raise Exception(f"Bing suggestions failed with status {e.response.status_code}: {str(e)}")
|
|
239
|
+
else:
|
|
240
|
+
raise Exception(f"Bing suggestions failed: {str(e)}")
|
|
241
|
+
|
|
242
|
+
def images(
|
|
243
|
+
self,
|
|
244
|
+
keywords: str,
|
|
245
|
+
region: str = None,
|
|
246
|
+
safesearch: str = "moderate",
|
|
247
|
+
max_results: int = 10
|
|
248
|
+
) -> List[BingImageResult]:
|
|
249
|
+
"""
|
|
250
|
+
Perform an image search on Bing.
|
|
251
|
+
|
|
252
|
+
Args:
|
|
253
|
+
keywords (str): The search keywords.
|
|
254
|
+
region (str, optional): The region for the search. Defaults to None.
|
|
255
|
+
safesearch (str): The safe search level ("on", "moderate", "off"). Defaults to "moderate".
|
|
256
|
+
max_results (int): The maximum number of results to fetch. Defaults to 10.
|
|
257
|
+
|
|
258
|
+
Returns:
|
|
259
|
+
List[BingImageResult]: A list of Bing image search results.
|
|
260
|
+
"""
|
|
261
|
+
if not keywords:
|
|
262
|
+
raise ValueError("Search keywords cannot be empty")
|
|
263
|
+
from bs4 import BeautifulSoup
|
|
264
|
+
safe_map = {
|
|
265
|
+
"on": "Strict",
|
|
266
|
+
"moderate": "Moderate",
|
|
267
|
+
"off": "Off"
|
|
268
|
+
}
|
|
269
|
+
safe = safe_map.get(safesearch.lower(), "Moderate")
|
|
270
|
+
params = {
|
|
271
|
+
"q": keywords,
|
|
272
|
+
"count": max_results,
|
|
273
|
+
"setlang": self.lang,
|
|
274
|
+
"safeSearch": safe,
|
|
275
|
+
}
|
|
276
|
+
if region:
|
|
277
|
+
params["mkt"] = region
|
|
278
|
+
url = f"{self._base_url}/images/search?{urlencode(params)}"
|
|
279
|
+
try:
|
|
280
|
+
resp = self.session.get(url)
|
|
281
|
+
resp.raise_for_status()
|
|
282
|
+
html = resp.text
|
|
283
|
+
except Exception as e:
|
|
284
|
+
if hasattr(e, 'response') and e.response is not None:
|
|
285
|
+
raise Exception(f"Bing image search failed with status {e.response.status_code}: {str(e)}")
|
|
286
|
+
else:
|
|
287
|
+
raise Exception(f"Bing image search failed: {str(e)}")
|
|
288
|
+
soup = BeautifulSoup(html, "html.parser")
|
|
289
|
+
results = []
|
|
290
|
+
for item in soup.select("a.iusc"):
|
|
291
|
+
try:
|
|
292
|
+
m = item.get("m")
|
|
293
|
+
import json
|
|
294
|
+
meta = json.loads(m) if m else {}
|
|
295
|
+
image_url = meta.get("murl", "")
|
|
296
|
+
thumb_url = meta.get("turl", "")
|
|
297
|
+
title = meta.get("t", "")
|
|
298
|
+
page_url = meta.get("purl", "")
|
|
299
|
+
source = meta.get("surl", "")
|
|
300
|
+
if image_url:
|
|
301
|
+
results.append(BingImageResult(title, image_url, thumb_url, page_url, source))
|
|
302
|
+
if len(results) >= max_results:
|
|
303
|
+
break
|
|
304
|
+
except Exception:
|
|
305
|
+
continue
|
|
306
|
+
return results[:max_results]
|
|
307
|
+
|
|
308
|
+
def news(
|
|
309
|
+
self,
|
|
310
|
+
keywords: str,
|
|
311
|
+
region: str = None,
|
|
312
|
+
safesearch: str = "moderate",
|
|
313
|
+
max_results: int = 10,
|
|
314
|
+
) -> List['BingNewsResult']:
|
|
315
|
+
"""Bing news search."""
|
|
316
|
+
if not keywords:
|
|
317
|
+
raise ValueError("Search keywords cannot be empty")
|
|
318
|
+
from bs4 import BeautifulSoup
|
|
319
|
+
safe_map = {
|
|
320
|
+
"on": "Strict",
|
|
321
|
+
"moderate": "Moderate",
|
|
322
|
+
"off": "Off"
|
|
323
|
+
}
|
|
324
|
+
safe = safe_map.get(safesearch.lower(), "Moderate")
|
|
325
|
+
params = {
|
|
326
|
+
"q": keywords,
|
|
327
|
+
"form": "QBNH",
|
|
328
|
+
"safeSearch": safe,
|
|
329
|
+
}
|
|
330
|
+
if region:
|
|
331
|
+
params["mkt"] = region
|
|
332
|
+
url = f"{self._base_url}/news/search?{urlencode(params)}"
|
|
333
|
+
try:
|
|
334
|
+
resp = self.session.get(url)
|
|
335
|
+
resp.raise_for_status()
|
|
336
|
+
except Exception as e:
|
|
337
|
+
if hasattr(e, 'response') and e.response is not None:
|
|
338
|
+
raise Exception(f"Bing news search failed with status {e.response.status_code}: {str(e)}")
|
|
339
|
+
else:
|
|
340
|
+
raise Exception(f"Bing news search failed: {str(e)}")
|
|
341
|
+
soup = BeautifulSoup(resp.text, "html.parser")
|
|
342
|
+
results = []
|
|
343
|
+
for item in soup.select("div.news-card, div.card, div.newsitem, div.card-content, div.t_s_main"):
|
|
344
|
+
a_tag = item.find("a")
|
|
345
|
+
title = a_tag.get_text(strip=True) if a_tag else ''
|
|
346
|
+
url_val = a_tag['href'] if a_tag and a_tag.has_attr('href') else ''
|
|
347
|
+
desc_tag = item.find("div", class_="snippet") or item.find("div", class_="news-card-snippet") or item.find("div", class_="snippetText")
|
|
348
|
+
description = desc_tag.get_text(strip=True) if desc_tag else ''
|
|
349
|
+
source_tag = item.find("div", class_="source")
|
|
350
|
+
source = source_tag.get_text(strip=True) if source_tag else ''
|
|
351
|
+
if url_val and title:
|
|
352
|
+
results.append(BingNewsResult(title, url_val, description, source))
|
|
353
|
+
if len(results) >= max_results:
|
|
354
|
+
break
|
|
355
|
+
# Fallback: try main news list if above selectors fail
|
|
356
|
+
if not results:
|
|
357
|
+
for item in soup.select("a.title"):
|
|
358
|
+
title = item.get_text(strip=True)
|
|
359
|
+
url_val = item['href'] if item.has_attr('href') else ''
|
|
360
|
+
description = ''
|
|
361
|
+
source = ''
|
|
362
|
+
if url_val and title:
|
|
363
|
+
results.append(BingNewsResult(title, url_val, description, source))
|
|
364
|
+
if len(results) >= max_results:
|
|
365
|
+
break
|
|
366
|
+
return results[:max_results]
|
|
367
|
+
|
|
368
|
+
if __name__ == "__main__":
|
|
369
|
+
from rich import print
|
|
370
|
+
bing = BingSearch(
|
|
371
|
+
timeout=10,
|
|
372
|
+
proxies=None,
|
|
373
|
+
verify=True
|
|
374
|
+
)
|
|
375
|
+
print("TEXT SEARCH RESULTS:")
|
|
376
|
+
text_results = bing.text(
|
|
377
|
+
keywords="Python programming",
|
|
378
|
+
region="us",
|
|
379
|
+
safesearch="moderate",
|
|
380
|
+
max_results=30
|
|
381
|
+
)
|
|
382
|
+
for result in text_results:
|
|
383
|
+
print(f"Title: {result.title}")
|
|
384
|
+
print(f"URL: {result.url}")
|
|
385
|
+
print(f"Description: {result.description}")
|
|
386
|
+
print("---")
|
|
387
|
+
print("\nSEARCH SUGGESTIONS:")
|
|
388
|
+
suggestions = bing.suggestions("how to")
|
|
389
|
+
print(suggestions)
|
|
390
|
+
|
|
391
|
+
print("\nIMAGE SEARCH RESULTS:")
|
|
392
|
+
image_results = bing.images(
|
|
393
|
+
keywords="Python programming",
|
|
394
|
+
region="us",
|
|
395
|
+
safesearch="moderate",
|
|
396
|
+
max_results=10
|
|
397
|
+
)
|
|
398
|
+
for result in image_results:
|
|
399
|
+
print(f"Title: {result.title}")
|
|
400
|
+
print(f"Image URL: {result.image}")
|
|
401
|
+
print(f"Page URL: {result.url}")
|
|
402
|
+
print(f"Source: {result.source}")
|
|
403
|
+
print("---")
|
|
404
|
+
|
|
405
|
+
print("\nNEWS SEARCH RESULTS:")
|
|
406
|
+
news_results = bing.news(
|
|
407
|
+
keywords="Python programming",
|
|
408
|
+
region="us",
|
|
409
|
+
safesearch="moderate",
|
|
410
|
+
max_results=10
|
|
411
|
+
)
|
|
412
|
+
for result in news_results:
|
|
413
|
+
print(f"Title: {result.title}")
|
|
414
|
+
print(f"URL: {result.url}")
|
|
415
|
+
print(f"Description: {result.description}")
|
|
416
|
+
print(f"Source: {result.source}")
|
|
417
|
+
print("---")
|