webscout 2025.10.13__py3-none-any.whl → 2025.10.15__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of webscout might be problematic. Click here for more details.

webscout/__init__.py CHANGED
@@ -1,6 +1,6 @@
1
1
  # webscout/__init__.py
2
2
 
3
- from .search import DuckDuckGoSearch, YepSearch
3
+ from .search import DuckDuckGoSearch, YepSearch, BingSearch
4
4
  from .version import __version__
5
5
  from .Provider import *
6
6
  from .Provider.TTI import *
webscout/auth/routes.py CHANGED
@@ -43,7 +43,6 @@ from .request_processing import (
43
43
  from .auth_system import get_auth_components
44
44
  from .simple_logger import request_logger
45
45
  from ..search import DuckDuckGoSearch, YepSearch
46
- from ..DWEBS import GoogleSearch
47
46
  from webscout.Bing_search import BingSearch
48
47
 
49
48
  # Setup logger
@@ -514,33 +513,20 @@ class Api:
514
513
  @self.app.get(
515
514
  "/search",
516
515
  tags=["Web search"],
517
- description="Unified web search endpoint supporting Google, Yep, DuckDuckGo, and Bing with text, news, images, and suggestions search types."
516
+ description="Unified web search endpoint supporting Yep, DuckDuckGo, and Bing with text, news, images, and suggestions search types."
518
517
  )
519
518
  async def websearch(
520
519
  q: str = Query(..., description="Search query"),
521
- engine: str = Query("google", description="Search engine: google, yep, duckduckgo, bing"),
520
+ engine: str = Query("duckduckgo", description="Search engine: yep, duckduckgo, bing"),
522
521
  max_results: int = Query(10, description="Maximum number of results"),
523
522
  region: str = Query("all", description="Region code (optional)"),
524
523
  safesearch: str = Query("moderate", description="Safe search: on, moderate, off"),
525
524
  type: str = Query("text", description="Search type: text, news, images, suggestions"),
526
525
  ):
527
526
  """Unified web search endpoint."""
528
- github_footer = "If you believe this is a bug, please pull an issue at https://github.com/OEvortex/Webscout."
527
+ github_footer = "If you believe this is a bug, please pull an issue at https://github.com/pyscout/Webscout."
529
528
  try:
530
- if engine == "google":
531
- gs = GoogleSearch()
532
- if type == "text":
533
- results = gs.text(keywords=q, region=region, safesearch=safesearch, max_results=max_results)
534
- return {"engine": "google", "type": "text", "results": [r.__dict__ for r in results]}
535
- elif type == "news":
536
- results = gs.news(keywords=q, region=region, safesearch=safesearch, max_results=max_results)
537
- return {"engine": "google", "type": "news", "results": [r.__dict__ for r in results]}
538
- elif type == "suggestions":
539
- results = gs.suggestions(q, region=region)
540
- return {"engine": "google", "type": "suggestions", "results": results}
541
- else:
542
- return {"error": "Google only supports text, news, and suggestions in this API.", "footer": github_footer}
543
- elif engine == "yep":
529
+ if engine == "yep":
544
530
  ys = YepSearch()
545
531
  if type == "text":
546
532
  results = ys.text(keywords=q, region=region, safesearch=safesearch, max_results=max_results)
@@ -554,12 +540,12 @@ class Api:
554
540
  else:
555
541
  return {"error": "Yep only supports text, images, and suggestions in this API.", "footer": github_footer}
556
542
  elif engine == "duckduckgo":
557
- ws = WEBS()
543
+ ddg = DuckDuckGoSearch()
558
544
  if type == "text":
559
- results = ws.text(keywords=q, region=region, safesearch=safesearch, max_results=max_results)
545
+ results = ddg.text(keywords=q, region=region, safesearch=safesearch, max_results=max_results)
560
546
  return {"engine": "duckduckgo", "type": "text", "results": results}
561
547
  elif type == "suggestions":
562
- results = ws.suggestions(keywords=q, region=region)
548
+ results = ddg.suggestions(keywords=q, region=region)
563
549
  return {"engine": "duckduckgo", "type": "suggestions", "results": results}
564
550
  else:
565
551
  return {"error": "DuckDuckGo only supports text and suggestions in this API.", "footer": github_footer}
@@ -580,7 +566,7 @@ class Api:
580
566
  else:
581
567
  return {"error": "Bing only supports text, news, images, and suggestions in this API.", "footer": github_footer}
582
568
  else:
583
- return {"error": "Unknown engine. Use one of: google, yep, duckduckgo, bing.", "footer": github_footer}
569
+ return {"error": "Unknown engine. Use one of: yep, duckduckgo, bing.", "footer": github_footer}
584
570
  except Exception as e:
585
571
  # Special handling for rate limit errors
586
572
  msg = str(e)
webscout/cli.py CHANGED
@@ -1,10 +1,11 @@
1
1
  import sys
2
2
  from .swiftcli import CLI, option
3
- from .webscout_search import DuckDuckGoSearch # Import the WEBS class from webscout_search
4
- from .DWEBS import GoogleSearch # Import GoogleSearch from DWEBS
5
- from .yep_search import YepSearch # Import YepSearch from yep_search
3
+ from .search import DuckDuckGoSearch, YepSearch # Import search classes
6
4
  from .version import __version__
7
5
 
6
+ # Alias for backward compatibility
7
+ WEBS = DuckDuckGoSearch
8
+
8
9
 
9
10
  def _print_data(data):
10
11
  """Prints data in a simple formatted way."""
@@ -260,152 +261,6 @@ def weather(location: str, language: str, proxy: str = None, timeout: int = 10):
260
261
  raise e
261
262
 
262
263
  @app.command()
263
- @option("--keywords", "-k", help="Search keywords", required=True)
264
- @option("--region", "-r", help="Region for search results (ISO country code)", default="all")
265
- @option("--safesearch", "-s", help="SafeSearch setting (on, moderate, off)", default="moderate")
266
- @option("--max-results", "-m", help="Maximum number of results", type=int, default=10)
267
- @option("--start-num", "-start", help="Starting position for pagination", type=int, default=0)
268
- @option("--unique", "-u", help="Filter duplicate results", type=bool, default=True)
269
- @option("--timeout", "-timeout", help="Timeout value for requests", type=int, default=10)
270
- @option("--proxy", "-p", help="Proxy URL to use for requests")
271
- @option("--impersonate", "-i", help="Browser to impersonate", default="chrome110")
272
- def google_text(
273
- keywords: str,
274
- region: str,
275
- safesearch: str,
276
- max_results: int,
277
- start_num: int,
278
- unique: bool,
279
- timeout: int = 10,
280
- proxy: str = None,
281
- impersonate: str = "chrome110"
282
- ):
283
- """Perform a text search using Google Search."""
284
- google = GoogleSearch(
285
- timeout=timeout,
286
- proxies={"https": proxy, "http": proxy} if proxy else None,
287
- verify=True,
288
- lang="en",
289
- sleep_interval=0.0,
290
- impersonate=impersonate
291
- )
292
-
293
- try:
294
- results = google.text(
295
- keywords=keywords,
296
- region=region,
297
- safesearch=safesearch,
298
- max_results=max_results,
299
- start_num=start_num,
300
- unique=unique
301
- )
302
-
303
- # Convert SearchResult objects to dictionaries for printing
304
- formatted_results = []
305
- for result in results:
306
- result_dict = {
307
- "title": result.title,
308
- "url": result.url,
309
- "description": result.description,
310
- }
311
- # Add any metadata to the result dictionary
312
- for k, v in result.metadata.items():
313
- result_dict[k] = v
314
-
315
- formatted_results.append(result_dict)
316
-
317
- _print_data(formatted_results)
318
- except Exception as e:
319
- raise e
320
-
321
- @app.command()
322
- @option("--keywords", "-k", help="Search keywords", required=True)
323
- @option("--region", "-r", help="Region for search results (ISO country code)", default="all")
324
- @option("--safesearch", "-s", help="SafeSearch setting (on, moderate, off)", default="moderate")
325
- @option("--max-results", "-m", help="Maximum number of results", type=int, default=10)
326
- @option("--timeout", "-timeout", help="Timeout value for requests", type=int, default=10)
327
- @option("--proxy", "-p", help="Proxy URL to use for requests")
328
- @option("--impersonate", "-i", help="Browser to impersonate", default="chrome110")
329
- def google_news(
330
- keywords: str,
331
- region: str,
332
- safesearch: str,
333
- max_results: int,
334
- timeout: int = 10,
335
- proxy: str = None,
336
- impersonate: str = "chrome110"
337
- ):
338
- """Perform a news search using Google Search."""
339
- google = GoogleSearch(
340
- timeout=timeout,
341
- proxies={"https": proxy, "http": proxy} if proxy else None,
342
- verify=True,
343
- lang="en",
344
- sleep_interval=0.0,
345
- impersonate=impersonate
346
- )
347
-
348
- try:
349
- results = google.news(
350
- keywords=keywords,
351
- region=region,
352
- safesearch=safesearch,
353
- max_results=max_results
354
- )
355
-
356
- # Convert SearchResult objects to dictionaries for printing
357
- formatted_results = []
358
- for result in results:
359
- result_dict = {
360
- "title": result.title,
361
- "url": result.url,
362
- "description": result.description,
363
- }
364
- # Add any metadata to the result dictionary
365
- for k, v in result.metadata.items():
366
- result_dict[k] = v
367
-
368
- formatted_results.append(result_dict)
369
-
370
- _print_data(formatted_results)
371
- except Exception as e:
372
- raise e
373
-
374
- @app.command()
375
- @option("--query", "-q", help="Search query", required=True)
376
- @option("--region", "-r", help="Region for suggestions (ISO country code)", default="all")
377
- @option("--timeout", "-timeout", help="Timeout value for requests", type=int, default=10)
378
- @option("--proxy", "-p", help="Proxy URL to use for requests")
379
- @option("--impersonate", "-i", help="Browser to impersonate", default="chrome110")
380
- def google_suggestions(
381
- query: str,
382
- region: str,
383
- timeout: int = 10,
384
- proxy: str = None,
385
- impersonate: str = "chrome110"
386
- ):
387
- """Get search suggestions from Google Search."""
388
- google = GoogleSearch(
389
- timeout=timeout,
390
- proxies={"https": proxy, "http": proxy} if proxy else None,
391
- verify=True,
392
- lang="en",
393
- sleep_interval=0.0,
394
- impersonate=impersonate
395
- )
396
-
397
- try:
398
- results = google.suggestions(query=query, region=region)
399
-
400
- # Format suggestions for printing
401
- formatted_results = []
402
- for i, suggestion in enumerate(results, 1):
403
- formatted_results.append({"position": i, "suggestion": suggestion})
404
-
405
- _print_data(formatted_results)
406
- except Exception as e:
407
- raise e
408
-
409
264
  @app.command()
410
265
  @option("--keywords", "-k", help="Search keywords", required=True)
411
266
  @option("--region", "-r", help="Region for search results", default="all")
@@ -3,15 +3,14 @@
3
3
  from .base import BaseSearch, BaseSearchEngine
4
4
  from .duckduckgo_main import DuckDuckGoSearch
5
5
  from .yep_main import YepSearch
6
+ from .bing_main import BingSearch
6
7
 
7
8
  # Import new search engines
8
- from .engines.bing import Bing
9
9
  from .engines.brave import Brave
10
10
  from .engines.mojeek import Mojeek
11
11
  from .engines.yahoo import Yahoo
12
12
  from .engines.yandex import Yandex
13
13
  from .engines.wikipedia import Wikipedia
14
- from .engines.bing_news import BingNews
15
14
  from .engines.yahoo_news import YahooNews
16
15
 
17
16
  # Import result models
@@ -31,15 +30,14 @@ __all__ = [
31
30
  # Main search interfaces
32
31
  "DuckDuckGoSearch",
33
32
  "YepSearch",
33
+ "BingSearch",
34
34
 
35
35
  # Individual engines
36
- "Bing",
37
36
  "Brave",
38
37
  "Mojeek",
39
38
  "Yahoo",
40
39
  "Yandex",
41
40
  "Wikipedia",
42
- "BingNews",
43
41
  "YahooNews",
44
42
 
45
43
  # Result models
@@ -0,0 +1,42 @@
1
+ """Bing unified search interface."""
2
+
3
+ from __future__ import annotations
4
+ from typing import Dict, List, Optional
5
+ from .base import BaseSearch
6
+ from .engines.bing.text import BingTextSearch
7
+ from .engines.bing.images import BingImagesSearch
8
+ from .engines.bing.news import BingNewsSearch
9
+ from .engines.bing.suggestions import BingSuggestionsSearch
10
+
11
+
12
+ class BingSearch(BaseSearch):
13
+ """Unified Bing search interface."""
14
+
15
+ def text(self, keywords: str, region: str = "us", safesearch: str = "moderate", max_results: Optional[int] = None, unique: bool = True) -> List[Dict[str, str]]:
16
+ search = BingTextSearch()
17
+ return search.run(keywords, region, safesearch, max_results, unique=unique)
18
+
19
+ def images(self, keywords: str, region: str = "us", safesearch: str = "moderate", max_results: Optional[int] = None) -> List[Dict[str, str]]:
20
+ search = BingImagesSearch()
21
+ return search.run(keywords, region, safesearch, max_results)
22
+
23
+ def news(self, keywords: str, region: str = "us", safesearch: str = "moderate", max_results: Optional[int] = None) -> List[Dict[str, str]]:
24
+ search = BingNewsSearch()
25
+ return search.run(keywords, region, safesearch, max_results)
26
+
27
+ def suggestions(self, query: str, region: str = "en-US") -> List[Dict[str, str]]:
28
+ search = BingSuggestionsSearch()
29
+ result = search.run(query, region)
30
+ return [{'suggestion': s} for s in result]
31
+
32
+ def answers(self, keywords: str) -> List[Dict[str, str]]:
33
+ raise NotImplementedError("Answers not implemented for Bing")
34
+
35
+ def maps(self, *args, **kwargs) -> List[Dict[str, str]]:
36
+ raise NotImplementedError("Maps not implemented for Bing")
37
+
38
+ def translate(self, keywords: str, from_lang: Optional[str] = None, to_lang: str = "en") -> List[Dict[str, str]]:
39
+ raise NotImplementedError("Translate not implemented for Bing")
40
+
41
+ def videos(self, *args, **kwargs) -> List[Dict[str, str]]:
42
+ raise NotImplementedError("Videos not implemented for Bing")
@@ -0,0 +1 @@
1
+ """Bing search engines."""
@@ -0,0 +1,33 @@
1
+ """Base class for Bing search implementations."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from ....litagent import LitAgent
6
+ from curl_cffi.requests import Session
7
+
8
+
9
+ class BingBase:
10
+ """Base class for Bing search engines."""
11
+
12
+ def __init__(
13
+ self,
14
+ timeout: int = 10,
15
+ proxies: dict[str, str] | None = None,
16
+ verify: bool = True,
17
+ lang: str = "en-US",
18
+ sleep_interval: float = 0.0,
19
+ impersonate: str = "chrome110",
20
+ ):
21
+ self.timeout = timeout
22
+ self.proxies = proxies
23
+ self.verify = verify
24
+ self.lang = lang
25
+ self.sleep_interval = sleep_interval
26
+ self.base_url = "https://www.bing.com"
27
+ self.session = Session(
28
+ proxies=proxies,
29
+ verify=verify,
30
+ timeout=timeout,
31
+ impersonate=impersonate,
32
+ )
33
+ self.session.headers.update(LitAgent().generate_fingerprint())
@@ -0,0 +1,105 @@
1
+ """Bing images search."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Dict, List
6
+ from urllib.parse import urlencode
7
+ from bs4 import BeautifulSoup
8
+ from time import sleep
9
+
10
+ from .base import BingBase
11
+
12
+
13
+ class BingImagesSearch(BingBase):
14
+ def run(self, *args, **kwargs) -> List[Dict[str, str]]:
15
+ keywords = args[0] if args else kwargs.get("keywords")
16
+ region = args[1] if len(args) > 1 else kwargs.get("region", "us")
17
+ safesearch = args[2] if len(args) > 2 else kwargs.get("safesearch", "moderate")
18
+ max_results = args[3] if len(args) > 3 else kwargs.get("max_results", 10)
19
+
20
+ if not keywords:
21
+ raise ValueError("Keywords are mandatory")
22
+
23
+ safe_map = {
24
+ "on": "Strict",
25
+ "moderate": "Moderate",
26
+ "off": "Off"
27
+ }
28
+ safe = safe_map.get(safesearch.lower(), "Moderate")
29
+
30
+ # Bing images URL
31
+ url = f"{self.base_url}/images/async"
32
+ params = {
33
+ 'q': keywords,
34
+ 'first': '1',
35
+ 'count': '35', # Fetch more to get max_results
36
+ 'cw': '1177',
37
+ 'ch': '759',
38
+ 'tsc': 'ImageHoverTitle',
39
+ 'layout': 'RowBased_Landscape',
40
+ 't': '0',
41
+ 'IG': '',
42
+ 'SFX': '0',
43
+ 'iid': 'images.1'
44
+ }
45
+
46
+ results = []
47
+ first = 1
48
+ sfx = 0
49
+
50
+ while len(results) < max_results:
51
+ params['first'] = str(first)
52
+ params['SFX'] = str(sfx)
53
+ full_url = f"{url}?{urlencode(params)}"
54
+
55
+ try:
56
+ response = self.session.get(full_url, timeout=self.timeout)
57
+ response.raise_for_status()
58
+ html = response.text
59
+ except Exception as e:
60
+ raise Exception(f"Failed to fetch images: {str(e)}")
61
+
62
+ soup = BeautifulSoup(html, 'html.parser')
63
+ img_tags = soup.select('a.iusc img')
64
+
65
+ for img in img_tags:
66
+ if len(results) >= max_results:
67
+ break
68
+
69
+ title = img.get('alt', '')
70
+ src = img.get('src', '')
71
+ m_attr = img.parent.get('m', '') if img.parent else ''
72
+
73
+ # Parse m attribute for full image URL
74
+ image_url = src
75
+ thumbnail = src
76
+ if m_attr:
77
+ try:
78
+ import json
79
+ m_data = json.loads(m_attr)
80
+ image_url = m_data.get('murl', src)
81
+ thumbnail = m_data.get('turl', src)
82
+ except:
83
+ pass
84
+
85
+ source = ''
86
+ if img.parent and img.parent.parent:
87
+ source_tag = img.parent.parent.select_one('.iusc .lnk')
88
+ if source_tag:
89
+ source = source_tag.get_text(strip=True)
90
+
91
+ results.append({
92
+ 'title': title,
93
+ 'image': image_url,
94
+ 'thumbnail': thumbnail,
95
+ 'url': image_url, # For compatibility
96
+ 'source': source
97
+ })
98
+
99
+ first += 35
100
+ sfx += 1
101
+
102
+ if self.sleep_interval:
103
+ sleep(self.sleep_interval)
104
+
105
+ return results[:max_results]
@@ -0,0 +1,89 @@
1
+ """Bing news search."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Dict, List
6
+ from urllib.parse import urlencode
7
+ from bs4 import BeautifulSoup
8
+ from time import sleep
9
+
10
+ from .base import BingBase
11
+
12
+
13
+ class BingNewsSearch(BingBase):
14
+ def run(self, *args, **kwargs) -> List[Dict[str, str]]:
15
+ keywords = args[0] if args else kwargs.get("keywords")
16
+ region = args[1] if len(args) > 1 else kwargs.get("region", "us")
17
+ safesearch = args[2] if len(args) > 2 else kwargs.get("safesearch", "moderate")
18
+ max_results = args[3] if len(args) > 3 else kwargs.get("max_results", 10)
19
+
20
+ if not keywords:
21
+ raise ValueError("Keywords are mandatory")
22
+
23
+ safe_map = {
24
+ "on": "Strict",
25
+ "moderate": "Moderate",
26
+ "off": "Off"
27
+ }
28
+ safe = safe_map.get(safesearch.lower(), "Moderate")
29
+
30
+ # Bing news URL
31
+ url = f"{self.base_url}/news/infinitescrollajax"
32
+ params = {
33
+ 'q': keywords,
34
+ 'InfiniteScroll': '1',
35
+ 'first': '1',
36
+ 'SFX': '0',
37
+ 'cc': region.lower(),
38
+ 'setlang': self.lang.split('-')[0]
39
+ }
40
+
41
+ results = []
42
+ first = 1
43
+ sfx = 0
44
+
45
+ while len(results) < max_results:
46
+ params['first'] = str(first)
47
+ params['SFX'] = str(sfx)
48
+ full_url = f"{url}?{urlencode(params)}"
49
+
50
+ try:
51
+ response = self.session.get(full_url, timeout=self.timeout)
52
+ response.raise_for_status()
53
+ data = response.json()
54
+ except Exception as e:
55
+ raise Exception(f"Failed to fetch news: {str(e)}")
56
+
57
+ html = data.get('html', '')
58
+ if not html:
59
+ break
60
+
61
+ soup = BeautifulSoup(html, 'html.parser')
62
+ news_items = soup.select('div.newsitem')
63
+
64
+ for item in news_items:
65
+ if len(results) >= max_results:
66
+ break
67
+
68
+ title = item.select_one('a.title')
69
+ snippet = item.select_one('div.snippet')
70
+ source = item.select_one('div.source')
71
+ date = item.select_one('span.date')
72
+
73
+ if title:
74
+ news_result = {
75
+ 'title': title.get_text(strip=True),
76
+ 'url': title.get('href', ''),
77
+ 'body': snippet.get_text(strip=True) if snippet else '',
78
+ 'source': source.get_text(strip=True) if source else '',
79
+ 'date': date.get_text(strip=True) if date else ''
80
+ }
81
+ results.append(news_result)
82
+
83
+ first += 10
84
+ sfx += 1
85
+
86
+ if self.sleep_interval:
87
+ sleep(self.sleep_interval)
88
+
89
+ return results[:max_results]
@@ -0,0 +1,34 @@
1
+ """Bing suggestions search."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import List
6
+ from urllib.parse import urlencode
7
+
8
+ from .base import BingBase
9
+
10
+
11
+ class BingSuggestionsSearch(BingBase):
12
+ def run(self, *args, **kwargs) -> List[str]:
13
+ query = args[0] if args else kwargs.get("query")
14
+ region = args[1] if len(args) > 1 else kwargs.get("region", "en-US")
15
+
16
+ if not query:
17
+ raise ValueError("Query is mandatory")
18
+
19
+ params = {
20
+ "query": query,
21
+ "mkt": region
22
+ }
23
+ url = f"https://api.bing.com/osjson.aspx?{urlencode(params)}"
24
+
25
+ try:
26
+ response = self.session.get(url, timeout=self.timeout)
27
+ response.raise_for_status()
28
+ data = response.json()
29
+ # Bing suggestions API returns [query, [suggestions]]
30
+ if len(data) > 1 and isinstance(data[1], list):
31
+ return data[1]
32
+ return []
33
+ except Exception as e:
34
+ raise Exception(f"Failed to fetch suggestions: {str(e)}")