webscout 2025.10.14.1__py3-none-any.whl → 2025.10.15__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of webscout might be problematic. Click here for more details.
- webscout/__init__.py +1 -1
- webscout/auth/routes.py +8 -22
- webscout/cli.py +0 -147
- webscout/search/__init__.py +2 -4
- webscout/search/bing_main.py +42 -0
- webscout/search/engines/bing/__init__.py +1 -0
- webscout/search/engines/bing/base.py +33 -0
- webscout/search/engines/bing/images.py +105 -0
- webscout/search/engines/bing/news.py +89 -0
- webscout/search/engines/bing/suggestions.py +34 -0
- webscout/search/engines/bing/text.py +103 -0
- webscout/search/engines/duckduckgo/maps.py +13 -0
- webscout/version.py +1 -1
- webscout/version.py.bak +1 -1
- {webscout-2025.10.14.1.dist-info → webscout-2025.10.15.dist-info}/METADATA +6 -281
- {webscout-2025.10.14.1.dist-info → webscout-2025.10.15.dist-info}/RECORD +20 -17
- webscout/Bing_search.py +0 -417
- webscout/DWEBS.py +0 -529
- webscout/search/engines/bing.py +0 -84
- webscout/search/engines/bing_news.py +0 -52
- {webscout-2025.10.14.1.dist-info → webscout-2025.10.15.dist-info}/WHEEL +0 -0
- {webscout-2025.10.14.1.dist-info → webscout-2025.10.15.dist-info}/entry_points.txt +0 -0
- {webscout-2025.10.14.1.dist-info → webscout-2025.10.15.dist-info}/licenses/LICENSE.md +0 -0
- {webscout-2025.10.14.1.dist-info → webscout-2025.10.15.dist-info}/top_level.txt +0 -0
webscout/__init__.py
CHANGED
webscout/auth/routes.py
CHANGED
|
@@ -43,7 +43,6 @@ from .request_processing import (
|
|
|
43
43
|
from .auth_system import get_auth_components
|
|
44
44
|
from .simple_logger import request_logger
|
|
45
45
|
from ..search import DuckDuckGoSearch, YepSearch
|
|
46
|
-
from ..DWEBS import GoogleSearch
|
|
47
46
|
from webscout.Bing_search import BingSearch
|
|
48
47
|
|
|
49
48
|
# Setup logger
|
|
@@ -514,33 +513,20 @@ class Api:
|
|
|
514
513
|
@self.app.get(
|
|
515
514
|
"/search",
|
|
516
515
|
tags=["Web search"],
|
|
517
|
-
description="Unified web search endpoint supporting
|
|
516
|
+
description="Unified web search endpoint supporting Yep, DuckDuckGo, and Bing with text, news, images, and suggestions search types."
|
|
518
517
|
)
|
|
519
518
|
async def websearch(
|
|
520
519
|
q: str = Query(..., description="Search query"),
|
|
521
|
-
engine: str = Query("
|
|
520
|
+
engine: str = Query("duckduckgo", description="Search engine: yep, duckduckgo, bing"),
|
|
522
521
|
max_results: int = Query(10, description="Maximum number of results"),
|
|
523
522
|
region: str = Query("all", description="Region code (optional)"),
|
|
524
523
|
safesearch: str = Query("moderate", description="Safe search: on, moderate, off"),
|
|
525
524
|
type: str = Query("text", description="Search type: text, news, images, suggestions"),
|
|
526
525
|
):
|
|
527
526
|
"""Unified web search endpoint."""
|
|
528
|
-
github_footer = "If you believe this is a bug, please pull an issue at https://github.com/
|
|
527
|
+
github_footer = "If you believe this is a bug, please pull an issue at https://github.com/pyscout/Webscout."
|
|
529
528
|
try:
|
|
530
|
-
if engine == "
|
|
531
|
-
gs = GoogleSearch()
|
|
532
|
-
if type == "text":
|
|
533
|
-
results = gs.text(keywords=q, region=region, safesearch=safesearch, max_results=max_results)
|
|
534
|
-
return {"engine": "google", "type": "text", "results": [r.__dict__ for r in results]}
|
|
535
|
-
elif type == "news":
|
|
536
|
-
results = gs.news(keywords=q, region=region, safesearch=safesearch, max_results=max_results)
|
|
537
|
-
return {"engine": "google", "type": "news", "results": [r.__dict__ for r in results]}
|
|
538
|
-
elif type == "suggestions":
|
|
539
|
-
results = gs.suggestions(q, region=region)
|
|
540
|
-
return {"engine": "google", "type": "suggestions", "results": results}
|
|
541
|
-
else:
|
|
542
|
-
return {"error": "Google only supports text, news, and suggestions in this API.", "footer": github_footer}
|
|
543
|
-
elif engine == "yep":
|
|
529
|
+
if engine == "yep":
|
|
544
530
|
ys = YepSearch()
|
|
545
531
|
if type == "text":
|
|
546
532
|
results = ys.text(keywords=q, region=region, safesearch=safesearch, max_results=max_results)
|
|
@@ -554,12 +540,12 @@ class Api:
|
|
|
554
540
|
else:
|
|
555
541
|
return {"error": "Yep only supports text, images, and suggestions in this API.", "footer": github_footer}
|
|
556
542
|
elif engine == "duckduckgo":
|
|
557
|
-
|
|
543
|
+
ddg = DuckDuckGoSearch()
|
|
558
544
|
if type == "text":
|
|
559
|
-
results =
|
|
545
|
+
results = ddg.text(keywords=q, region=region, safesearch=safesearch, max_results=max_results)
|
|
560
546
|
return {"engine": "duckduckgo", "type": "text", "results": results}
|
|
561
547
|
elif type == "suggestions":
|
|
562
|
-
results =
|
|
548
|
+
results = ddg.suggestions(keywords=q, region=region)
|
|
563
549
|
return {"engine": "duckduckgo", "type": "suggestions", "results": results}
|
|
564
550
|
else:
|
|
565
551
|
return {"error": "DuckDuckGo only supports text and suggestions in this API.", "footer": github_footer}
|
|
@@ -580,7 +566,7 @@ class Api:
|
|
|
580
566
|
else:
|
|
581
567
|
return {"error": "Bing only supports text, news, images, and suggestions in this API.", "footer": github_footer}
|
|
582
568
|
else:
|
|
583
|
-
return {"error": "Unknown engine. Use one of:
|
|
569
|
+
return {"error": "Unknown engine. Use one of: yep, duckduckgo, bing.", "footer": github_footer}
|
|
584
570
|
except Exception as e:
|
|
585
571
|
# Special handling for rate limit errors
|
|
586
572
|
msg = str(e)
|
webscout/cli.py
CHANGED
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
import sys
|
|
2
2
|
from .swiftcli import CLI, option
|
|
3
3
|
from .search import DuckDuckGoSearch, YepSearch # Import search classes
|
|
4
|
-
from .DWEBS import GoogleSearch # Import GoogleSearch from DWEBS
|
|
5
4
|
from .version import __version__
|
|
6
5
|
|
|
7
6
|
# Alias for backward compatibility
|
|
@@ -262,152 +261,6 @@ def weather(location: str, language: str, proxy: str = None, timeout: int = 10):
|
|
|
262
261
|
raise e
|
|
263
262
|
|
|
264
263
|
@app.command()
|
|
265
|
-
@option("--keywords", "-k", help="Search keywords", required=True)
|
|
266
|
-
@option("--region", "-r", help="Region for search results (ISO country code)", default="all")
|
|
267
|
-
@option("--safesearch", "-s", help="SafeSearch setting (on, moderate, off)", default="moderate")
|
|
268
|
-
@option("--max-results", "-m", help="Maximum number of results", type=int, default=10)
|
|
269
|
-
@option("--start-num", "-start", help="Starting position for pagination", type=int, default=0)
|
|
270
|
-
@option("--unique", "-u", help="Filter duplicate results", type=bool, default=True)
|
|
271
|
-
@option("--timeout", "-timeout", help="Timeout value for requests", type=int, default=10)
|
|
272
|
-
@option("--proxy", "-p", help="Proxy URL to use for requests")
|
|
273
|
-
@option("--impersonate", "-i", help="Browser to impersonate", default="chrome110")
|
|
274
|
-
def google_text(
|
|
275
|
-
keywords: str,
|
|
276
|
-
region: str,
|
|
277
|
-
safesearch: str,
|
|
278
|
-
max_results: int,
|
|
279
|
-
start_num: int,
|
|
280
|
-
unique: bool,
|
|
281
|
-
timeout: int = 10,
|
|
282
|
-
proxy: str = None,
|
|
283
|
-
impersonate: str = "chrome110"
|
|
284
|
-
):
|
|
285
|
-
"""Perform a text search using Google Search."""
|
|
286
|
-
google = GoogleSearch(
|
|
287
|
-
timeout=timeout,
|
|
288
|
-
proxies={"https": proxy, "http": proxy} if proxy else None,
|
|
289
|
-
verify=True,
|
|
290
|
-
lang="en",
|
|
291
|
-
sleep_interval=0.0,
|
|
292
|
-
impersonate=impersonate
|
|
293
|
-
)
|
|
294
|
-
|
|
295
|
-
try:
|
|
296
|
-
results = google.text(
|
|
297
|
-
keywords=keywords,
|
|
298
|
-
region=region,
|
|
299
|
-
safesearch=safesearch,
|
|
300
|
-
max_results=max_results,
|
|
301
|
-
start_num=start_num,
|
|
302
|
-
unique=unique
|
|
303
|
-
)
|
|
304
|
-
|
|
305
|
-
# Convert SearchResult objects to dictionaries for printing
|
|
306
|
-
formatted_results = []
|
|
307
|
-
for result in results:
|
|
308
|
-
result_dict = {
|
|
309
|
-
"title": result.title,
|
|
310
|
-
"url": result.url,
|
|
311
|
-
"description": result.description,
|
|
312
|
-
}
|
|
313
|
-
# Add any metadata to the result dictionary
|
|
314
|
-
for k, v in result.metadata.items():
|
|
315
|
-
result_dict[k] = v
|
|
316
|
-
|
|
317
|
-
formatted_results.append(result_dict)
|
|
318
|
-
|
|
319
|
-
_print_data(formatted_results)
|
|
320
|
-
except Exception as e:
|
|
321
|
-
raise e
|
|
322
|
-
|
|
323
|
-
@app.command()
|
|
324
|
-
@option("--keywords", "-k", help="Search keywords", required=True)
|
|
325
|
-
@option("--region", "-r", help="Region for search results (ISO country code)", default="all")
|
|
326
|
-
@option("--safesearch", "-s", help="SafeSearch setting (on, moderate, off)", default="moderate")
|
|
327
|
-
@option("--max-results", "-m", help="Maximum number of results", type=int, default=10)
|
|
328
|
-
@option("--timeout", "-timeout", help="Timeout value for requests", type=int, default=10)
|
|
329
|
-
@option("--proxy", "-p", help="Proxy URL to use for requests")
|
|
330
|
-
@option("--impersonate", "-i", help="Browser to impersonate", default="chrome110")
|
|
331
|
-
def google_news(
|
|
332
|
-
keywords: str,
|
|
333
|
-
region: str,
|
|
334
|
-
safesearch: str,
|
|
335
|
-
max_results: int,
|
|
336
|
-
timeout: int = 10,
|
|
337
|
-
proxy: str = None,
|
|
338
|
-
impersonate: str = "chrome110"
|
|
339
|
-
):
|
|
340
|
-
"""Perform a news search using Google Search."""
|
|
341
|
-
google = GoogleSearch(
|
|
342
|
-
timeout=timeout,
|
|
343
|
-
proxies={"https": proxy, "http": proxy} if proxy else None,
|
|
344
|
-
verify=True,
|
|
345
|
-
lang="en",
|
|
346
|
-
sleep_interval=0.0,
|
|
347
|
-
impersonate=impersonate
|
|
348
|
-
)
|
|
349
|
-
|
|
350
|
-
try:
|
|
351
|
-
results = google.news(
|
|
352
|
-
keywords=keywords,
|
|
353
|
-
region=region,
|
|
354
|
-
safesearch=safesearch,
|
|
355
|
-
max_results=max_results
|
|
356
|
-
)
|
|
357
|
-
|
|
358
|
-
# Convert SearchResult objects to dictionaries for printing
|
|
359
|
-
formatted_results = []
|
|
360
|
-
for result in results:
|
|
361
|
-
result_dict = {
|
|
362
|
-
"title": result.title,
|
|
363
|
-
"url": result.url,
|
|
364
|
-
"description": result.description,
|
|
365
|
-
}
|
|
366
|
-
# Add any metadata to the result dictionary
|
|
367
|
-
for k, v in result.metadata.items():
|
|
368
|
-
result_dict[k] = v
|
|
369
|
-
|
|
370
|
-
formatted_results.append(result_dict)
|
|
371
|
-
|
|
372
|
-
_print_data(formatted_results)
|
|
373
|
-
except Exception as e:
|
|
374
|
-
raise e
|
|
375
|
-
|
|
376
|
-
@app.command()
|
|
377
|
-
@option("--query", "-q", help="Search query", required=True)
|
|
378
|
-
@option("--region", "-r", help="Region for suggestions (ISO country code)", default="all")
|
|
379
|
-
@option("--timeout", "-timeout", help="Timeout value for requests", type=int, default=10)
|
|
380
|
-
@option("--proxy", "-p", help="Proxy URL to use for requests")
|
|
381
|
-
@option("--impersonate", "-i", help="Browser to impersonate", default="chrome110")
|
|
382
|
-
def google_suggestions(
|
|
383
|
-
query: str,
|
|
384
|
-
region: str,
|
|
385
|
-
timeout: int = 10,
|
|
386
|
-
proxy: str = None,
|
|
387
|
-
impersonate: str = "chrome110"
|
|
388
|
-
):
|
|
389
|
-
"""Get search suggestions from Google Search."""
|
|
390
|
-
google = GoogleSearch(
|
|
391
|
-
timeout=timeout,
|
|
392
|
-
proxies={"https": proxy, "http": proxy} if proxy else None,
|
|
393
|
-
verify=True,
|
|
394
|
-
lang="en",
|
|
395
|
-
sleep_interval=0.0,
|
|
396
|
-
impersonate=impersonate
|
|
397
|
-
)
|
|
398
|
-
|
|
399
|
-
try:
|
|
400
|
-
results = google.suggestions(query=query, region=region)
|
|
401
|
-
|
|
402
|
-
# Format suggestions for printing
|
|
403
|
-
formatted_results = []
|
|
404
|
-
for i, suggestion in enumerate(results, 1):
|
|
405
|
-
formatted_results.append({"position": i, "suggestion": suggestion})
|
|
406
|
-
|
|
407
|
-
_print_data(formatted_results)
|
|
408
|
-
except Exception as e:
|
|
409
|
-
raise e
|
|
410
|
-
|
|
411
264
|
@app.command()
|
|
412
265
|
@option("--keywords", "-k", help="Search keywords", required=True)
|
|
413
266
|
@option("--region", "-r", help="Region for search results", default="all")
|
webscout/search/__init__.py
CHANGED
|
@@ -3,15 +3,14 @@
|
|
|
3
3
|
from .base import BaseSearch, BaseSearchEngine
|
|
4
4
|
from .duckduckgo_main import DuckDuckGoSearch
|
|
5
5
|
from .yep_main import YepSearch
|
|
6
|
+
from .bing_main import BingSearch
|
|
6
7
|
|
|
7
8
|
# Import new search engines
|
|
8
|
-
from .engines.bing import Bing
|
|
9
9
|
from .engines.brave import Brave
|
|
10
10
|
from .engines.mojeek import Mojeek
|
|
11
11
|
from .engines.yahoo import Yahoo
|
|
12
12
|
from .engines.yandex import Yandex
|
|
13
13
|
from .engines.wikipedia import Wikipedia
|
|
14
|
-
from .engines.bing_news import BingNews
|
|
15
14
|
from .engines.yahoo_news import YahooNews
|
|
16
15
|
|
|
17
16
|
# Import result models
|
|
@@ -31,15 +30,14 @@ __all__ = [
|
|
|
31
30
|
# Main search interfaces
|
|
32
31
|
"DuckDuckGoSearch",
|
|
33
32
|
"YepSearch",
|
|
33
|
+
"BingSearch",
|
|
34
34
|
|
|
35
35
|
# Individual engines
|
|
36
|
-
"Bing",
|
|
37
36
|
"Brave",
|
|
38
37
|
"Mojeek",
|
|
39
38
|
"Yahoo",
|
|
40
39
|
"Yandex",
|
|
41
40
|
"Wikipedia",
|
|
42
|
-
"BingNews",
|
|
43
41
|
"YahooNews",
|
|
44
42
|
|
|
45
43
|
# Result models
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
"""Bing unified search interface."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
from typing import Dict, List, Optional
|
|
5
|
+
from .base import BaseSearch
|
|
6
|
+
from .engines.bing.text import BingTextSearch
|
|
7
|
+
from .engines.bing.images import BingImagesSearch
|
|
8
|
+
from .engines.bing.news import BingNewsSearch
|
|
9
|
+
from .engines.bing.suggestions import BingSuggestionsSearch
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class BingSearch(BaseSearch):
|
|
13
|
+
"""Unified Bing search interface."""
|
|
14
|
+
|
|
15
|
+
def text(self, keywords: str, region: str = "us", safesearch: str = "moderate", max_results: Optional[int] = None, unique: bool = True) -> List[Dict[str, str]]:
|
|
16
|
+
search = BingTextSearch()
|
|
17
|
+
return search.run(keywords, region, safesearch, max_results, unique=unique)
|
|
18
|
+
|
|
19
|
+
def images(self, keywords: str, region: str = "us", safesearch: str = "moderate", max_results: Optional[int] = None) -> List[Dict[str, str]]:
|
|
20
|
+
search = BingImagesSearch()
|
|
21
|
+
return search.run(keywords, region, safesearch, max_results)
|
|
22
|
+
|
|
23
|
+
def news(self, keywords: str, region: str = "us", safesearch: str = "moderate", max_results: Optional[int] = None) -> List[Dict[str, str]]:
|
|
24
|
+
search = BingNewsSearch()
|
|
25
|
+
return search.run(keywords, region, safesearch, max_results)
|
|
26
|
+
|
|
27
|
+
def suggestions(self, query: str, region: str = "en-US") -> List[Dict[str, str]]:
|
|
28
|
+
search = BingSuggestionsSearch()
|
|
29
|
+
result = search.run(query, region)
|
|
30
|
+
return [{'suggestion': s} for s in result]
|
|
31
|
+
|
|
32
|
+
def answers(self, keywords: str) -> List[Dict[str, str]]:
|
|
33
|
+
raise NotImplementedError("Answers not implemented for Bing")
|
|
34
|
+
|
|
35
|
+
def maps(self, *args, **kwargs) -> List[Dict[str, str]]:
|
|
36
|
+
raise NotImplementedError("Maps not implemented for Bing")
|
|
37
|
+
|
|
38
|
+
def translate(self, keywords: str, from_lang: Optional[str] = None, to_lang: str = "en") -> List[Dict[str, str]]:
|
|
39
|
+
raise NotImplementedError("Translate not implemented for Bing")
|
|
40
|
+
|
|
41
|
+
def videos(self, *args, **kwargs) -> List[Dict[str, str]]:
|
|
42
|
+
raise NotImplementedError("Videos not implemented for Bing")
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Bing search engines."""
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
"""Base class for Bing search implementations."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from ....litagent import LitAgent
|
|
6
|
+
from curl_cffi.requests import Session
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class BingBase:
|
|
10
|
+
"""Base class for Bing search engines."""
|
|
11
|
+
|
|
12
|
+
def __init__(
|
|
13
|
+
self,
|
|
14
|
+
timeout: int = 10,
|
|
15
|
+
proxies: dict[str, str] | None = None,
|
|
16
|
+
verify: bool = True,
|
|
17
|
+
lang: str = "en-US",
|
|
18
|
+
sleep_interval: float = 0.0,
|
|
19
|
+
impersonate: str = "chrome110",
|
|
20
|
+
):
|
|
21
|
+
self.timeout = timeout
|
|
22
|
+
self.proxies = proxies
|
|
23
|
+
self.verify = verify
|
|
24
|
+
self.lang = lang
|
|
25
|
+
self.sleep_interval = sleep_interval
|
|
26
|
+
self.base_url = "https://www.bing.com"
|
|
27
|
+
self.session = Session(
|
|
28
|
+
proxies=proxies,
|
|
29
|
+
verify=verify,
|
|
30
|
+
timeout=timeout,
|
|
31
|
+
impersonate=impersonate,
|
|
32
|
+
)
|
|
33
|
+
self.session.headers.update(LitAgent().generate_fingerprint())
|
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
"""Bing images search."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import Dict, List
|
|
6
|
+
from urllib.parse import urlencode
|
|
7
|
+
from bs4 import BeautifulSoup
|
|
8
|
+
from time import sleep
|
|
9
|
+
|
|
10
|
+
from .base import BingBase
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class BingImagesSearch(BingBase):
|
|
14
|
+
def run(self, *args, **kwargs) -> List[Dict[str, str]]:
|
|
15
|
+
keywords = args[0] if args else kwargs.get("keywords")
|
|
16
|
+
region = args[1] if len(args) > 1 else kwargs.get("region", "us")
|
|
17
|
+
safesearch = args[2] if len(args) > 2 else kwargs.get("safesearch", "moderate")
|
|
18
|
+
max_results = args[3] if len(args) > 3 else kwargs.get("max_results", 10)
|
|
19
|
+
|
|
20
|
+
if not keywords:
|
|
21
|
+
raise ValueError("Keywords are mandatory")
|
|
22
|
+
|
|
23
|
+
safe_map = {
|
|
24
|
+
"on": "Strict",
|
|
25
|
+
"moderate": "Moderate",
|
|
26
|
+
"off": "Off"
|
|
27
|
+
}
|
|
28
|
+
safe = safe_map.get(safesearch.lower(), "Moderate")
|
|
29
|
+
|
|
30
|
+
# Bing images URL
|
|
31
|
+
url = f"{self.base_url}/images/async"
|
|
32
|
+
params = {
|
|
33
|
+
'q': keywords,
|
|
34
|
+
'first': '1',
|
|
35
|
+
'count': '35', # Fetch more to get max_results
|
|
36
|
+
'cw': '1177',
|
|
37
|
+
'ch': '759',
|
|
38
|
+
'tsc': 'ImageHoverTitle',
|
|
39
|
+
'layout': 'RowBased_Landscape',
|
|
40
|
+
't': '0',
|
|
41
|
+
'IG': '',
|
|
42
|
+
'SFX': '0',
|
|
43
|
+
'iid': 'images.1'
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
results = []
|
|
47
|
+
first = 1
|
|
48
|
+
sfx = 0
|
|
49
|
+
|
|
50
|
+
while len(results) < max_results:
|
|
51
|
+
params['first'] = str(first)
|
|
52
|
+
params['SFX'] = str(sfx)
|
|
53
|
+
full_url = f"{url}?{urlencode(params)}"
|
|
54
|
+
|
|
55
|
+
try:
|
|
56
|
+
response = self.session.get(full_url, timeout=self.timeout)
|
|
57
|
+
response.raise_for_status()
|
|
58
|
+
html = response.text
|
|
59
|
+
except Exception as e:
|
|
60
|
+
raise Exception(f"Failed to fetch images: {str(e)}")
|
|
61
|
+
|
|
62
|
+
soup = BeautifulSoup(html, 'html.parser')
|
|
63
|
+
img_tags = soup.select('a.iusc img')
|
|
64
|
+
|
|
65
|
+
for img in img_tags:
|
|
66
|
+
if len(results) >= max_results:
|
|
67
|
+
break
|
|
68
|
+
|
|
69
|
+
title = img.get('alt', '')
|
|
70
|
+
src = img.get('src', '')
|
|
71
|
+
m_attr = img.parent.get('m', '') if img.parent else ''
|
|
72
|
+
|
|
73
|
+
# Parse m attribute for full image URL
|
|
74
|
+
image_url = src
|
|
75
|
+
thumbnail = src
|
|
76
|
+
if m_attr:
|
|
77
|
+
try:
|
|
78
|
+
import json
|
|
79
|
+
m_data = json.loads(m_attr)
|
|
80
|
+
image_url = m_data.get('murl', src)
|
|
81
|
+
thumbnail = m_data.get('turl', src)
|
|
82
|
+
except:
|
|
83
|
+
pass
|
|
84
|
+
|
|
85
|
+
source = ''
|
|
86
|
+
if img.parent and img.parent.parent:
|
|
87
|
+
source_tag = img.parent.parent.select_one('.iusc .lnk')
|
|
88
|
+
if source_tag:
|
|
89
|
+
source = source_tag.get_text(strip=True)
|
|
90
|
+
|
|
91
|
+
results.append({
|
|
92
|
+
'title': title,
|
|
93
|
+
'image': image_url,
|
|
94
|
+
'thumbnail': thumbnail,
|
|
95
|
+
'url': image_url, # For compatibility
|
|
96
|
+
'source': source
|
|
97
|
+
})
|
|
98
|
+
|
|
99
|
+
first += 35
|
|
100
|
+
sfx += 1
|
|
101
|
+
|
|
102
|
+
if self.sleep_interval:
|
|
103
|
+
sleep(self.sleep_interval)
|
|
104
|
+
|
|
105
|
+
return results[:max_results]
|
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
"""Bing news search."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import Dict, List
|
|
6
|
+
from urllib.parse import urlencode
|
|
7
|
+
from bs4 import BeautifulSoup
|
|
8
|
+
from time import sleep
|
|
9
|
+
|
|
10
|
+
from .base import BingBase
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class BingNewsSearch(BingBase):
|
|
14
|
+
def run(self, *args, **kwargs) -> List[Dict[str, str]]:
|
|
15
|
+
keywords = args[0] if args else kwargs.get("keywords")
|
|
16
|
+
region = args[1] if len(args) > 1 else kwargs.get("region", "us")
|
|
17
|
+
safesearch = args[2] if len(args) > 2 else kwargs.get("safesearch", "moderate")
|
|
18
|
+
max_results = args[3] if len(args) > 3 else kwargs.get("max_results", 10)
|
|
19
|
+
|
|
20
|
+
if not keywords:
|
|
21
|
+
raise ValueError("Keywords are mandatory")
|
|
22
|
+
|
|
23
|
+
safe_map = {
|
|
24
|
+
"on": "Strict",
|
|
25
|
+
"moderate": "Moderate",
|
|
26
|
+
"off": "Off"
|
|
27
|
+
}
|
|
28
|
+
safe = safe_map.get(safesearch.lower(), "Moderate")
|
|
29
|
+
|
|
30
|
+
# Bing news URL
|
|
31
|
+
url = f"{self.base_url}/news/infinitescrollajax"
|
|
32
|
+
params = {
|
|
33
|
+
'q': keywords,
|
|
34
|
+
'InfiniteScroll': '1',
|
|
35
|
+
'first': '1',
|
|
36
|
+
'SFX': '0',
|
|
37
|
+
'cc': region.lower(),
|
|
38
|
+
'setlang': self.lang.split('-')[0]
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
results = []
|
|
42
|
+
first = 1
|
|
43
|
+
sfx = 0
|
|
44
|
+
|
|
45
|
+
while len(results) < max_results:
|
|
46
|
+
params['first'] = str(first)
|
|
47
|
+
params['SFX'] = str(sfx)
|
|
48
|
+
full_url = f"{url}?{urlencode(params)}"
|
|
49
|
+
|
|
50
|
+
try:
|
|
51
|
+
response = self.session.get(full_url, timeout=self.timeout)
|
|
52
|
+
response.raise_for_status()
|
|
53
|
+
data = response.json()
|
|
54
|
+
except Exception as e:
|
|
55
|
+
raise Exception(f"Failed to fetch news: {str(e)}")
|
|
56
|
+
|
|
57
|
+
html = data.get('html', '')
|
|
58
|
+
if not html:
|
|
59
|
+
break
|
|
60
|
+
|
|
61
|
+
soup = BeautifulSoup(html, 'html.parser')
|
|
62
|
+
news_items = soup.select('div.newsitem')
|
|
63
|
+
|
|
64
|
+
for item in news_items:
|
|
65
|
+
if len(results) >= max_results:
|
|
66
|
+
break
|
|
67
|
+
|
|
68
|
+
title = item.select_one('a.title')
|
|
69
|
+
snippet = item.select_one('div.snippet')
|
|
70
|
+
source = item.select_one('div.source')
|
|
71
|
+
date = item.select_one('span.date')
|
|
72
|
+
|
|
73
|
+
if title:
|
|
74
|
+
news_result = {
|
|
75
|
+
'title': title.get_text(strip=True),
|
|
76
|
+
'url': title.get('href', ''),
|
|
77
|
+
'body': snippet.get_text(strip=True) if snippet else '',
|
|
78
|
+
'source': source.get_text(strip=True) if source else '',
|
|
79
|
+
'date': date.get_text(strip=True) if date else ''
|
|
80
|
+
}
|
|
81
|
+
results.append(news_result)
|
|
82
|
+
|
|
83
|
+
first += 10
|
|
84
|
+
sfx += 1
|
|
85
|
+
|
|
86
|
+
if self.sleep_interval:
|
|
87
|
+
sleep(self.sleep_interval)
|
|
88
|
+
|
|
89
|
+
return results[:max_results]
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
"""Bing suggestions search."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import List
|
|
6
|
+
from urllib.parse import urlencode
|
|
7
|
+
|
|
8
|
+
from .base import BingBase
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class BingSuggestionsSearch(BingBase):
|
|
12
|
+
def run(self, *args, **kwargs) -> List[str]:
|
|
13
|
+
query = args[0] if args else kwargs.get("query")
|
|
14
|
+
region = args[1] if len(args) > 1 else kwargs.get("region", "en-US")
|
|
15
|
+
|
|
16
|
+
if not query:
|
|
17
|
+
raise ValueError("Query is mandatory")
|
|
18
|
+
|
|
19
|
+
params = {
|
|
20
|
+
"query": query,
|
|
21
|
+
"mkt": region
|
|
22
|
+
}
|
|
23
|
+
url = f"https://api.bing.com/osjson.aspx?{urlencode(params)}"
|
|
24
|
+
|
|
25
|
+
try:
|
|
26
|
+
response = self.session.get(url, timeout=self.timeout)
|
|
27
|
+
response.raise_for_status()
|
|
28
|
+
data = response.json()
|
|
29
|
+
# Bing suggestions API returns [query, [suggestions]]
|
|
30
|
+
if len(data) > 1 and isinstance(data[1], list):
|
|
31
|
+
return data[1]
|
|
32
|
+
return []
|
|
33
|
+
except Exception as e:
|
|
34
|
+
raise Exception(f"Failed to fetch suggestions: {str(e)}")
|