webscout 4.8__py3-none-any.whl → 4.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of webscout might be problematic. Click here for more details.

@@ -0,0 +1,124 @@
1
+ from bs4 import BeautifulSoup
2
+ import requests
3
+ from typing import Dict, List, Optional, Union
4
+ from concurrent.futures import ThreadPoolExecutor, as_completed
5
+ from urllib.parse import urlparse
6
+ from termcolor import colored
7
+ import time
8
+ import random
9
+
10
+ class BingS:
11
+ """Bing search class to get search results from bing.com."""
12
+
13
+ _executor: ThreadPoolExecutor = ThreadPoolExecutor(max_workers=10)
14
+
15
+ def __init__(
16
+ self,
17
+ headers: Optional[Dict[str, str]] = None,
18
+ proxy: Optional[str] = None,
19
+ timeout: Optional[int] = 10,
20
+ ) -> None:
21
+ """Initialize the BingS object."""
22
+ self.proxy: Optional[str] = proxy
23
+ self.headers = headers if headers else {
24
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/111.0.0.0 Safari/537.36 Edg/111.0.1661.62"
25
+ }
26
+ self.headers["Referer"] = "https://www.bing.com/"
27
+ self.client = requests.Session()
28
+ self.client.headers.update(self.headers)
29
+ self.client.proxies.update({"http": self.proxy, "https": self.proxy})
30
+ self.timeout = timeout
31
+
32
+ def __enter__(self) -> "BingS":
33
+ return self
34
+
35
+ def __exit__(self, exc_type, exc_val, exc_tb):
36
+ self.client.close()
37
+
38
+ def _get_url(
39
+ self,
40
+ method: str,
41
+ url: str,
42
+ params: Optional[Dict[str, str]] = None,
43
+ data: Optional[Union[Dict[str, str], bytes]] = None,
44
+ ) -> bytes:
45
+ try:
46
+ resp = self.client.request(method, url, params=params, data=data, timeout=self.timeout)
47
+ except Exception as ex:
48
+ raise Exception(f"{url} {type(ex).__name__}: {ex}") from ex
49
+ if resp.status_code == 200:
50
+ return resp.content
51
+ raise Exception(f"{resp.url} returned status code {resp.status_code}. {params=} {data=}")
52
+
53
+ def search(
54
+ self,
55
+ keywords: str,
56
+ region: str = "us-EN", # Bing uses us-EN
57
+ lang: str = "en",
58
+ safe: str = "off",
59
+ timelimit: Optional[str] = None, # Not directly supported by Bing
60
+ max_results: Optional[int] = None,
61
+ ) -> List[Dict[str, str]]:
62
+ """Bing text search."""
63
+ assert keywords, "keywords is mandatory"
64
+
65
+ results = []
66
+ start = 1 # Bing uses 1-based indexing for pages
67
+ while len(results) < (max_results or float('inf')):
68
+ params = {
69
+ "q": keywords,
70
+ "count": 10, # Number of results per page
71
+ "mkt": region,
72
+ "setlang": lang,
73
+ "safeSearch": safe,
74
+ "first": start, # Bing uses 'first' for pagination
75
+ }
76
+
77
+ try:
78
+ resp_content = self._get_url("GET", "https://www.bing.com/search", params=params)
79
+ soup = BeautifulSoup(resp_content, "html.parser")
80
+ result_block = soup.find_all("li", class_="b_algo")
81
+
82
+ if not result_block:
83
+ break
84
+
85
+ for result in result_block:
86
+ try:
87
+ link = result.find("a", href=True)
88
+ if link:
89
+ initial_url = link["href"]
90
+
91
+ title = result.find("h2").text if result.find("h2") else ""
92
+ description = result.find("p").text.strip() if result.find("p") else "" # Strip whitespace
93
+
94
+ # Remove 'WEB' prefix if present
95
+ if description.startswith("WEB"):
96
+ description = description[4:] # Skip the first 4 characters ('WEB ')
97
+
98
+ results.append({
99
+ "title": title,
100
+ "href": initial_url,
101
+ "abstract": description,
102
+ "index": len(results),
103
+ "type": "web",
104
+ })
105
+
106
+ if len(results) >= max_results:
107
+ return results
108
+
109
+ except Exception as e:
110
+ print(f"Error extracting result: {e}")
111
+
112
+ except Exception as e:
113
+ print(f"Error fetching URL: {e}")
114
+
115
+ start += 10
116
+
117
+ return results
118
+
119
+ if __name__ == "__main__":
120
+ from rich import print
121
+ searcher = BingS()
122
+ results = searcher.search("Python development tools", max_results=30)
123
+ for result in results:
124
+ print(result)