webscout 7.8__py3-none-any.whl → 7.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of webscout might be problematic. Click here for more details.

Files changed (41) hide show
  1. webscout/Bard.py +5 -25
  2. webscout/DWEBS.py +476 -476
  3. webscout/Extra/__init__.py +2 -0
  4. webscout/Extra/autocoder/__init__.py +1 -1
  5. webscout/Extra/autocoder/{rawdog.py → autocoder.py} +849 -849
  6. webscout/Extra/tempmail/__init__.py +26 -0
  7. webscout/Extra/tempmail/async_utils.py +141 -0
  8. webscout/Extra/tempmail/base.py +156 -0
  9. webscout/Extra/tempmail/cli.py +187 -0
  10. webscout/Extra/tempmail/mail_tm.py +361 -0
  11. webscout/Extra/tempmail/temp_mail_io.py +292 -0
  12. webscout/Provider/Deepinfra.py +288 -286
  13. webscout/Provider/ElectronHub.py +709 -716
  14. webscout/Provider/ExaChat.py +20 -5
  15. webscout/Provider/Gemini.py +167 -165
  16. webscout/Provider/Groq.py +38 -24
  17. webscout/Provider/LambdaChat.py +2 -1
  18. webscout/Provider/TextPollinationsAI.py +232 -230
  19. webscout/Provider/__init__.py +0 -4
  20. webscout/Provider/copilot.py +427 -427
  21. webscout/Provider/freeaichat.py +8 -1
  22. webscout/Provider/uncovr.py +312 -299
  23. webscout/Provider/yep.py +64 -12
  24. webscout/__init__.py +38 -36
  25. webscout/cli.py +293 -293
  26. webscout/conversation.py +350 -17
  27. webscout/litprinter/__init__.py +59 -667
  28. webscout/optimizers.py +419 -419
  29. webscout/update_checker.py +14 -12
  30. webscout/version.py +1 -1
  31. webscout/webscout_search.py +1282 -1282
  32. webscout/webscout_search_async.py +813 -813
  33. {webscout-7.8.dist-info → webscout-7.9.dist-info}/METADATA +44 -39
  34. {webscout-7.8.dist-info → webscout-7.9.dist-info}/RECORD +38 -35
  35. webscout/Provider/DARKAI.py +0 -225
  36. webscout/Provider/EDITEE.py +0 -192
  37. webscout/litprinter/colors.py +0 -54
  38. {webscout-7.8.dist-info → webscout-7.9.dist-info}/LICENSE.md +0 -0
  39. {webscout-7.8.dist-info → webscout-7.9.dist-info}/WHEEL +0 -0
  40. {webscout-7.8.dist-info → webscout-7.9.dist-info}/entry_points.txt +0 -0
  41. {webscout-7.8.dist-info → webscout-7.9.dist-info}/top_level.txt +0 -0
webscout/DWEBS.py CHANGED
@@ -1,477 +1,477 @@
1
- """
2
- DWEBS - A Google search library with advanced features
3
- """
4
- import random
5
- from time import sleep
6
- from webscout.scout import Scout
7
- from requests import get
8
- from urllib.parse import unquote, urlencode
9
- from typing import List, Dict, Optional, Union, Iterator, Any
10
- from concurrent.futures import ThreadPoolExecutor
11
-
12
-
13
- class SearchResult:
14
- """Class to represent a search result with metadata."""
15
-
16
- def __init__(self, url: str, title: str, description: str):
17
- """
18
- Initialize a search result.
19
-
20
- Args:
21
- url: The URL of the search result
22
- title: The title of the search result
23
- description: The description/snippet of the search result
24
- """
25
- self.url = url
26
- self.title = title
27
- self.description = description
28
- # Additional metadata that can be populated
29
- self.metadata: Dict[str, Any] = {}
30
-
31
- def __repr__(self) -> str:
32
- """Return string representation of search result."""
33
- return f"SearchResult(url={self.url}, title={self.title}, description={self.description})"
34
-
35
-
36
- class GoogleSearch:
37
- """Google search implementation with configurable parameters and advanced features."""
38
-
39
- _executor: ThreadPoolExecutor = ThreadPoolExecutor()
40
-
41
- def __init__(
42
- self,
43
- timeout: int = 10,
44
- proxies: Optional[Dict[str, str]] = None,
45
- verify: bool = True,
46
- lang: str = "en",
47
- sleep_interval: float = 0.0
48
- ):
49
- """
50
- Initialize GoogleSearch with custom settings.
51
-
52
- Args:
53
- timeout: Request timeout in seconds
54
- proxies: Proxy configuration for requests
55
- verify: Whether to verify SSL certificates
56
- lang: Search language
57
- sleep_interval: Sleep time between pagination requests
58
- """
59
- self.timeout = timeout
60
- self.proxies = proxies if proxies else {}
61
- self.verify = verify
62
- self.lang = lang
63
- self.sleep_interval = sleep_interval
64
- self.base_url = "https://www.google.com/search"
65
-
66
- def _get_useragent(self) -> str:
67
- """
68
- Generate a random user agent string.
69
-
70
- Returns:
71
- Random user agent string
72
- """
73
- lynx_version = f"Lynx/{random.randint(2, 3)}.{random.randint(8, 9)}.{random.randint(0, 2)}"
74
- libwww_version = f"libwww-FM/{random.randint(2, 3)}.{random.randint(13, 15)}"
75
- ssl_mm_version = f"SSL-MM/{random.randint(1, 2)}.{random.randint(3, 5)}"
76
- openssl_version = f"OpenSSL/{random.randint(1, 3)}.{random.randint(0, 4)}.{random.randint(0, 9)}"
77
- return f"{lynx_version} {libwww_version} {ssl_mm_version} {openssl_version}"
78
-
79
- def _make_request(self, term: str, results: int, start: int = 0, search_type: str = None) -> str:
80
- """
81
- Make a request to Google search.
82
-
83
- Args:
84
- term: Search query
85
- results: Number of results to request
86
- start: Start position for pagination
87
- search_type: Type of search ('', 'nws', 'isch')
88
-
89
- Returns:
90
- HTML response content
91
- """
92
- params = {
93
- "q": term,
94
- "num": results + 2, # Request slightly more than needed
95
- "hl": self.lang,
96
- "start": start,
97
- }
98
-
99
- # Add search type if specified
100
- if search_type:
101
- params["tbm"] = search_type
102
-
103
- try:
104
- resp = get(
105
- url=self.base_url,
106
- headers={
107
- "User-Agent": self._get_useragent(),
108
- "Accept-Language": self.lang,
109
- "Accept-Encoding": "gzip, deflate, br",
110
- "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
111
- },
112
- params=params,
113
- proxies=self.proxies if any(self.proxies) else None,
114
- timeout=self.timeout,
115
- verify=self.verify,
116
- cookies={
117
- 'CONSENT': 'PENDING+987',
118
- 'SOCS': 'CAESHAgBEhIaAB',
119
- }
120
- )
121
- resp.raise_for_status()
122
- return resp.text
123
- except Exception as e:
124
- raise RuntimeError(f"Search request failed: {str(e)}")
125
-
126
- def _extract_url(self, raw_link: str) -> Optional[str]:
127
- """
128
- Extract actual URL from Google redirect URL.
129
-
130
- Args:
131
- raw_link: Raw link from Google search
132
-
133
- Returns:
134
- Actual URL or None if invalid
135
- """
136
- if not raw_link:
137
- return None
138
-
139
- if raw_link.startswith("/url?"):
140
- try:
141
- link = unquote(raw_link.split("&")[0].replace("/url?q=", ""))
142
- return link
143
- except Exception:
144
- return None
145
- elif raw_link.startswith("http"):
146
- return unquote(raw_link)
147
-
148
- return None
149
-
150
- def _is_valid_result(self, link: str, fetched_links: set, unique: bool) -> bool:
151
- """
152
- Check if search result is valid.
153
-
154
- Args:
155
- link: URL to check
156
- fetched_links: Set of already fetched links
157
- unique: Whether to filter duplicate links
158
-
159
- Returns:
160
- Boolean indicating if result is valid
161
- """
162
- if any(x in link for x in ["google.", "/search?", "webcache."]):
163
- return False
164
-
165
- if link in fetched_links and unique:
166
- return False
167
-
168
- return True
169
-
170
- def _parse_search_results(
171
- self,
172
- html: str,
173
- num_results: int,
174
- fetched_links: set,
175
- unique: bool
176
- ) -> List[SearchResult]:
177
- """
178
- Parse search results from HTML.
179
-
180
- Args:
181
- html: HTML content to parse
182
- num_results: Maximum number of results to return
183
- fetched_links: Set of already fetched links
184
- unique: Filter duplicate links
185
-
186
- Returns:
187
- List of SearchResult objects
188
- """
189
- results = []
190
- soup = Scout(html, features="html.parser")
191
- result_blocks = soup.find_all("div", class_="ezO2md")
192
-
193
- if not result_blocks:
194
- # Try alternative class patterns if the main one doesn't match
195
- result_blocks = soup.find_all("div", attrs={"class": lambda c: c and "g" in c.split()})
196
-
197
- for result in result_blocks:
198
- # Find the link - looking for various potential Google result classes
199
- link_tag = result.find("a", class_=["fuLhoc", "ZWRArf"])
200
- if not link_tag:
201
- link_tag = result.find("a")
202
- if not link_tag:
203
- continue
204
-
205
- raw_link = link_tag.get("href", "")
206
- link = self._extract_url(raw_link)
207
-
208
- if not link:
209
- continue
210
-
211
- if not self._is_valid_result(link, fetched_links, unique):
212
- continue
213
-
214
- # Get title - it's the text content of the link tag for these results
215
- title = link_tag.get_text(strip=True)
216
- if not title:
217
- continue
218
-
219
- # Get description - it's in a span with class FrIlee or potentially other classes
220
- description_tag = result.find("span", class_="FrIlee")
221
- if not description_tag:
222
- description_tag = result.find(["div", "span"], class_=lambda c: c and any(x in c for x in ["snippet", "description", "VwiC3b"]))
223
-
224
- description = description_tag.get_text(strip=True) if description_tag else ""
225
-
226
- # Create result object
227
- search_result = SearchResult(link, title, description)
228
-
229
- # Add extra metadata if available
230
- citation = result.find("cite")
231
- if citation:
232
- search_result.metadata["source"] = citation.get_text(strip=True)
233
-
234
- timestamp = result.find("span", class_=lambda c: c and "ZE5qJf" in c)
235
- if timestamp:
236
- search_result.metadata["date"] = timestamp.get_text(strip=True)
237
-
238
- fetched_links.add(link)
239
- results.append(search_result)
240
-
241
- if len(results) >= num_results:
242
- break
243
-
244
- return results
245
-
246
- def text(
247
- self,
248
- keywords: str,
249
- region: str = None,
250
- safesearch: str = "moderate",
251
- max_results: int = 10,
252
- start_num: int = 0,
253
- unique: bool = True
254
- ) -> List[SearchResult]:
255
- """
256
- Search Google for web results.
257
-
258
- Args:
259
- keywords: Search query
260
- region: Region for search results (ISO country code)
261
- safesearch: SafeSearch setting ("on", "moderate", "off")
262
- max_results: Maximum number of results to return
263
- start_num: Starting position for pagination
264
- unique: Filter duplicate results
265
-
266
- Returns:
267
- List of SearchResult objects with search results
268
- """
269
- if not keywords:
270
- raise ValueError("Search keywords cannot be empty")
271
-
272
- # Map safesearch values to Google's safe parameter
273
- safe_map = {
274
- "on": "active",
275
- "moderate": "moderate",
276
- "off": "off"
277
- }
278
- safe = safe_map.get(safesearch.lower(), "moderate")
279
-
280
- # Keep track of unique results
281
- fetched_results = []
282
- fetched_links = set()
283
- start = start_num
284
-
285
- while len(fetched_results) < max_results:
286
- response_html = self._make_request(
287
- term=keywords,
288
- results=max_results - len(fetched_results),
289
- start=start
290
- )
291
-
292
- results = self._parse_search_results(
293
- html=response_html,
294
- num_results=max_results - len(fetched_results),
295
- fetched_links=fetched_links,
296
- unique=unique
297
- )
298
-
299
- if not results:
300
- break
301
-
302
- fetched_results.extend(results)
303
-
304
- if len(fetched_results) >= max_results:
305
- break
306
-
307
- start += 10
308
- sleep(self.sleep_interval)
309
-
310
- return fetched_results[:max_results]
311
-
312
- def news(
313
- self,
314
- keywords: str,
315
- region: str = None,
316
- safesearch: str = "moderate",
317
- max_results: int = 10
318
- ) -> List[SearchResult]:
319
- """
320
- Search Google News for news results.
321
-
322
- Args:
323
- keywords: Search query
324
- region: Region for search results (ISO country code)
325
- safesearch: SafeSearch setting ("on", "moderate", "off")
326
- max_results: Maximum number of results to return
327
-
328
- Returns:
329
- List of SearchResult objects with news results
330
- """
331
- if not keywords:
332
- raise ValueError("Search keywords cannot be empty")
333
-
334
- # Map safesearch values to Google's safe parameter
335
- safe_map = {
336
- "on": "active",
337
- "moderate": "moderate",
338
- "off": "off"
339
- }
340
- safe = safe_map.get(safesearch.lower(), "moderate")
341
-
342
- # Keep track of unique results
343
- fetched_results = []
344
- fetched_links = set()
345
-
346
- response_html = self._make_request(
347
- term=keywords,
348
- results=max_results,
349
- search_type="nws"
350
- )
351
-
352
- results = self._parse_search_results(
353
- html=response_html,
354
- num_results=max_results,
355
- fetched_links=fetched_links,
356
- unique=True
357
- )
358
-
359
- return results[:max_results]
360
-
361
- def suggestions(self, query: str, region: str = None) -> List[str]:
362
- """
363
- Get search suggestions for a query term.
364
-
365
- Args:
366
- query: Search query
367
- region: Region for suggestions (ISO country code)
368
-
369
- Returns:
370
- List of search suggestions
371
- """
372
- if not query:
373
- raise ValueError("Search query cannot be empty")
374
-
375
- try:
376
- params = {
377
- "client": "firefox",
378
- "q": query,
379
- }
380
-
381
- # Add region if specified
382
- if region and region.lower() != "all":
383
- params["gl"] = region
384
-
385
- url = f"https://www.google.com/complete/search?{urlencode(params)}"
386
-
387
- headers = {
388
- "User-Agent": self._get_useragent(),
389
- "Accept": "application/json, text/javascript, */*",
390
- "Accept-Language": self.lang,
391
- }
392
-
393
- response = get(
394
- url=url,
395
- headers=headers,
396
- timeout=self.timeout,
397
- verify=self.verify
398
- )
399
- response.raise_for_status()
400
-
401
- # Response format is typically: ["original query", ["suggestion1", "suggestion2", ...]]
402
- data = response.json()
403
- if isinstance(data, list) and len(data) > 1 and isinstance(data[1], list):
404
- return data[1]
405
- return []
406
-
407
- except Exception as e:
408
- # Return empty list on error instead of raising exception
409
- return []
410
-
411
-
412
- # Legacy function support for backward compatibility
413
- def search(term, num_results=10, lang="en", proxy=None, advanced=False, sleep_interval=0, timeout=5, safe="active", ssl_verify=True, region=None, start_num=0, unique=False):
414
- """Legacy function for backward compatibility."""
415
- google_search = GoogleSearch(
416
- timeout=timeout,
417
- proxies={"https": proxy, "http": proxy} if proxy else None,
418
- verify=ssl_verify,
419
- lang=lang,
420
- sleep_interval=sleep_interval
421
- )
422
-
423
- results = google_search.text(
424
- keywords=term,
425
- region=region,
426
- safesearch="on" if safe == "active" else "moderate" if safe == "moderate" else "off",
427
- max_results=num_results,
428
- start_num=start_num,
429
- unique=unique
430
- )
431
-
432
- # Convert to simple URLs if not advanced mode
433
- if not advanced:
434
- return [result.url for result in results]
435
- return results
436
-
437
-
438
- if __name__ == "__main__":
439
- from rich import print
440
- google = GoogleSearch(
441
- timeout=10, # Optional: Set custom timeout
442
- proxies=None, # Optional: Use proxies
443
- verify=True # Optional: SSL verification
444
- )
445
-
446
- # Text Search
447
- print("TEXT SEARCH RESULTS:")
448
- text_results = google.text(
449
- keywords="Python programming",
450
- region="us", # Optional: Region for results
451
- safesearch="moderate", # Optional: "on", "moderate", "off"
452
- max_results=3 # Optional: Limit number of results
453
- )
454
- for result in text_results:
455
- print(f"Title: {result.title}")
456
- print(f"URL: {result.url}")
457
- print(f"Description: {result.description}")
458
- print("---")
459
-
460
- # News Search
461
- print("\nNEWS SEARCH RESULTS:")
462
- news_results = google.news(
463
- keywords="artificial intelligence",
464
- region="us",
465
- safesearch="moderate",
466
- max_results=2
467
- )
468
- for result in news_results:
469
- print(f"Title: {result.title}")
470
- print(f"URL: {result.url}")
471
- print(f"Description: {result.description}")
472
- print("---")
473
-
474
- # Search Suggestions
475
- print("\nSEARCH SUGGESTIONS:")
476
- suggestions = google.suggestions("how to")
1
+ """
2
+ DWEBS - A Google search library with advanced features
3
+ """
4
+ import random
5
+ from time import sleep
6
+ from webscout.scout import Scout
7
+ from requests import get
8
+ from urllib.parse import unquote, urlencode
9
+ from typing import List, Dict, Optional, Union, Iterator, Any
10
+ from concurrent.futures import ThreadPoolExecutor
11
+
12
+
13
+ class SearchResult:
14
+ """Class to represent a search result with metadata."""
15
+
16
+ def __init__(self, url: str, title: str, description: str):
17
+ """
18
+ Initialize a search result.
19
+
20
+ Args:
21
+ url: The URL of the search result
22
+ title: The title of the search result
23
+ description: The description/snippet of the search result
24
+ """
25
+ self.url = url
26
+ self.title = title
27
+ self.description = description
28
+ # Additional metadata that can be populated
29
+ self.metadata: Dict[str, Any] = {}
30
+
31
+ def __repr__(self) -> str:
32
+ """Return string representation of search result."""
33
+ return f"SearchResult(url={self.url}, title={self.title}, description={self.description})"
34
+
35
+
36
+ class GoogleSearch:
37
+ """Google search implementation with configurable parameters and advanced features."""
38
+
39
+ _executor: ThreadPoolExecutor = ThreadPoolExecutor()
40
+
41
+ def __init__(
42
+ self,
43
+ timeout: int = 10,
44
+ proxies: Optional[Dict[str, str]] = None,
45
+ verify: bool = True,
46
+ lang: str = "en",
47
+ sleep_interval: float = 0.0
48
+ ):
49
+ """
50
+ Initialize GoogleSearch with custom settings.
51
+
52
+ Args:
53
+ timeout: Request timeout in seconds
54
+ proxies: Proxy configuration for requests
55
+ verify: Whether to verify SSL certificates
56
+ lang: Search language
57
+ sleep_interval: Sleep time between pagination requests
58
+ """
59
+ self.timeout = timeout
60
+ self.proxies = proxies if proxies else {}
61
+ self.verify = verify
62
+ self.lang = lang
63
+ self.sleep_interval = sleep_interval
64
+ self.base_url = "https://www.google.com/search"
65
+
66
+ def _get_useragent(self) -> str:
67
+ """
68
+ Generate a random user agent string.
69
+
70
+ Returns:
71
+ Random user agent string
72
+ """
73
+ lynx_version = f"Lynx/{random.randint(2, 3)}.{random.randint(8, 9)}.{random.randint(0, 2)}"
74
+ libwww_version = f"libwww-FM/{random.randint(2, 3)}.{random.randint(13, 15)}"
75
+ ssl_mm_version = f"SSL-MM/{random.randint(1, 2)}.{random.randint(3, 5)}"
76
+ openssl_version = f"OpenSSL/{random.randint(1, 3)}.{random.randint(0, 4)}.{random.randint(0, 9)}"
77
+ return f"{lynx_version} {libwww_version} {ssl_mm_version} {openssl_version}"
78
+
79
+ def _make_request(self, term: str, results: int, start: int = 0, search_type: str = None) -> str:
80
+ """
81
+ Make a request to Google search.
82
+
83
+ Args:
84
+ term: Search query
85
+ results: Number of results to request
86
+ start: Start position for pagination
87
+ search_type: Type of search ('', 'nws', 'isch')
88
+
89
+ Returns:
90
+ HTML response content
91
+ """
92
+ params = {
93
+ "q": term,
94
+ "num": results + 2, # Request slightly more than needed
95
+ "hl": self.lang,
96
+ "start": start,
97
+ }
98
+
99
+ # Add search type if specified
100
+ if search_type:
101
+ params["tbm"] = search_type
102
+
103
+ try:
104
+ resp = get(
105
+ url=self.base_url,
106
+ headers={
107
+ "User-Agent": self._get_useragent(),
108
+ "Accept-Language": self.lang,
109
+ "Accept-Encoding": "gzip, deflate, br",
110
+ "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8",
111
+ },
112
+ params=params,
113
+ proxies=self.proxies if any(self.proxies) else None,
114
+ timeout=self.timeout,
115
+ verify=self.verify,
116
+ cookies={
117
+ 'CONSENT': 'PENDING+987',
118
+ 'SOCS': 'CAESHAgBEhIaAB',
119
+ }
120
+ )
121
+ resp.raise_for_status()
122
+ return resp.text
123
+ except Exception as e:
124
+ raise RuntimeError(f"Search request failed: {str(e)}")
125
+
126
+ def _extract_url(self, raw_link: str) -> Optional[str]:
127
+ """
128
+ Extract actual URL from Google redirect URL.
129
+
130
+ Args:
131
+ raw_link: Raw link from Google search
132
+
133
+ Returns:
134
+ Actual URL or None if invalid
135
+ """
136
+ if not raw_link:
137
+ return None
138
+
139
+ if raw_link.startswith("/url?"):
140
+ try:
141
+ link = unquote(raw_link.split("&")[0].replace("/url?q=", ""))
142
+ return link
143
+ except Exception:
144
+ return None
145
+ elif raw_link.startswith("http"):
146
+ return unquote(raw_link)
147
+
148
+ return None
149
+
150
+ def _is_valid_result(self, link: str, fetched_links: set, unique: bool) -> bool:
151
+ """
152
+ Check if search result is valid.
153
+
154
+ Args:
155
+ link: URL to check
156
+ fetched_links: Set of already fetched links
157
+ unique: Whether to filter duplicate links
158
+
159
+ Returns:
160
+ Boolean indicating if result is valid
161
+ """
162
+ if any(x in link for x in ["google.", "/search?", "webcache."]):
163
+ return False
164
+
165
+ if link in fetched_links and unique:
166
+ return False
167
+
168
+ return True
169
+
170
+ def _parse_search_results(
171
+ self,
172
+ html: str,
173
+ num_results: int,
174
+ fetched_links: set,
175
+ unique: bool
176
+ ) -> List[SearchResult]:
177
+ """
178
+ Parse search results from HTML.
179
+
180
+ Args:
181
+ html: HTML content to parse
182
+ num_results: Maximum number of results to return
183
+ fetched_links: Set of already fetched links
184
+ unique: Filter duplicate links
185
+
186
+ Returns:
187
+ List of SearchResult objects
188
+ """
189
+ results = []
190
+ soup = Scout(html, features="html.parser")
191
+ result_blocks = soup.find_all("div", class_="ezO2md")
192
+
193
+ if not result_blocks:
194
+ # Try alternative class patterns if the main one doesn't match
195
+ result_blocks = soup.find_all("div", attrs={"class": lambda c: c and "g" in c.split()})
196
+
197
+ for result in result_blocks:
198
+ # Find the link - looking for various potential Google result classes
199
+ link_tag = result.find("a", class_=["fuLhoc", "ZWRArf"])
200
+ if not link_tag:
201
+ link_tag = result.find("a")
202
+ if not link_tag:
203
+ continue
204
+
205
+ raw_link = link_tag.get("href", "")
206
+ link = self._extract_url(raw_link)
207
+
208
+ if not link:
209
+ continue
210
+
211
+ if not self._is_valid_result(link, fetched_links, unique):
212
+ continue
213
+
214
+ # Get title - it's the text content of the link tag for these results
215
+ title = link_tag.get_text(strip=True)
216
+ if not title:
217
+ continue
218
+
219
+ # Get description - it's in a span with class FrIlee or potentially other classes
220
+ description_tag = result.find("span", class_="FrIlee")
221
+ if not description_tag:
222
+ description_tag = result.find(["div", "span"], class_=lambda c: c and any(x in c for x in ["snippet", "description", "VwiC3b"]))
223
+
224
+ description = description_tag.get_text(strip=True) if description_tag else ""
225
+
226
+ # Create result object
227
+ search_result = SearchResult(link, title, description)
228
+
229
+ # Add extra metadata if available
230
+ citation = result.find("cite")
231
+ if citation:
232
+ search_result.metadata["source"] = citation.get_text(strip=True)
233
+
234
+ timestamp = result.find("span", class_=lambda c: c and "ZE5qJf" in c)
235
+ if timestamp:
236
+ search_result.metadata["date"] = timestamp.get_text(strip=True)
237
+
238
+ fetched_links.add(link)
239
+ results.append(search_result)
240
+
241
+ if len(results) >= num_results:
242
+ break
243
+
244
+ return results
245
+
246
+ def text(
247
+ self,
248
+ keywords: str,
249
+ region: str = None,
250
+ safesearch: str = "moderate",
251
+ max_results: int = 10,
252
+ start_num: int = 0,
253
+ unique: bool = True
254
+ ) -> List[SearchResult]:
255
+ """
256
+ Search Google for web results.
257
+
258
+ Args:
259
+ keywords: Search query
260
+ region: Region for search results (ISO country code)
261
+ safesearch: SafeSearch setting ("on", "moderate", "off")
262
+ max_results: Maximum number of results to return
263
+ start_num: Starting position for pagination
264
+ unique: Filter duplicate results
265
+
266
+ Returns:
267
+ List of SearchResult objects with search results
268
+ """
269
+ if not keywords:
270
+ raise ValueError("Search keywords cannot be empty")
271
+
272
+ # Map safesearch values to Google's safe parameter
273
+ safe_map = {
274
+ "on": "active",
275
+ "moderate": "moderate",
276
+ "off": "off"
277
+ }
278
+ safe = safe_map.get(safesearch.lower(), "moderate")
279
+
280
+ # Keep track of unique results
281
+ fetched_results = []
282
+ fetched_links = set()
283
+ start = start_num
284
+
285
+ while len(fetched_results) < max_results:
286
+ response_html = self._make_request(
287
+ term=keywords,
288
+ results=max_results - len(fetched_results),
289
+ start=start
290
+ )
291
+
292
+ results = self._parse_search_results(
293
+ html=response_html,
294
+ num_results=max_results - len(fetched_results),
295
+ fetched_links=fetched_links,
296
+ unique=unique
297
+ )
298
+
299
+ if not results:
300
+ break
301
+
302
+ fetched_results.extend(results)
303
+
304
+ if len(fetched_results) >= max_results:
305
+ break
306
+
307
+ start += 10
308
+ sleep(self.sleep_interval)
309
+
310
+ return fetched_results[:max_results]
311
+
312
+ def news(
313
+ self,
314
+ keywords: str,
315
+ region: str = None,
316
+ safesearch: str = "moderate",
317
+ max_results: int = 10
318
+ ) -> List[SearchResult]:
319
+ """
320
+ Search Google News for news results.
321
+
322
+ Args:
323
+ keywords: Search query
324
+ region: Region for search results (ISO country code)
325
+ safesearch: SafeSearch setting ("on", "moderate", "off")
326
+ max_results: Maximum number of results to return
327
+
328
+ Returns:
329
+ List of SearchResult objects with news results
330
+ """
331
+ if not keywords:
332
+ raise ValueError("Search keywords cannot be empty")
333
+
334
+ # Map safesearch values to Google's safe parameter
335
+ safe_map = {
336
+ "on": "active",
337
+ "moderate": "moderate",
338
+ "off": "off"
339
+ }
340
+ safe = safe_map.get(safesearch.lower(), "moderate")
341
+
342
+ # Keep track of unique results
343
+ fetched_results = []
344
+ fetched_links = set()
345
+
346
+ response_html = self._make_request(
347
+ term=keywords,
348
+ results=max_results,
349
+ search_type="nws"
350
+ )
351
+
352
+ results = self._parse_search_results(
353
+ html=response_html,
354
+ num_results=max_results,
355
+ fetched_links=fetched_links,
356
+ unique=True
357
+ )
358
+
359
+ return results[:max_results]
360
+
361
+ def suggestions(self, query: str, region: str = None) -> List[str]:
362
+ """
363
+ Get search suggestions for a query term.
364
+
365
+ Args:
366
+ query: Search query
367
+ region: Region for suggestions (ISO country code)
368
+
369
+ Returns:
370
+ List of search suggestions
371
+ """
372
+ if not query:
373
+ raise ValueError("Search query cannot be empty")
374
+
375
+ try:
376
+ params = {
377
+ "client": "firefox",
378
+ "q": query,
379
+ }
380
+
381
+ # Add region if specified
382
+ if region and region.lower() != "all":
383
+ params["gl"] = region
384
+
385
+ url = f"https://www.google.com/complete/search?{urlencode(params)}"
386
+
387
+ headers = {
388
+ "User-Agent": self._get_useragent(),
389
+ "Accept": "application/json, text/javascript, */*",
390
+ "Accept-Language": self.lang,
391
+ }
392
+
393
+ response = get(
394
+ url=url,
395
+ headers=headers,
396
+ timeout=self.timeout,
397
+ verify=self.verify
398
+ )
399
+ response.raise_for_status()
400
+
401
+ # Response format is typically: ["original query", ["suggestion1", "suggestion2", ...]]
402
+ data = response.json()
403
+ if isinstance(data, list) and len(data) > 1 and isinstance(data[1], list):
404
+ return data[1]
405
+ return []
406
+
407
+ except Exception as e:
408
+ # Return empty list on error instead of raising exception
409
+ return []
410
+
411
+
412
+ # Legacy function support for backward compatibility
413
+ def search(term, num_results=10, lang="en", proxy=None, advanced=False, sleep_interval=0, timeout=5, safe="active", ssl_verify=True, region=None, start_num=0, unique=False):
414
+ """Legacy function for backward compatibility."""
415
+ google_search = GoogleSearch(
416
+ timeout=timeout,
417
+ proxies={"https": proxy, "http": proxy} if proxy else None,
418
+ verify=ssl_verify,
419
+ lang=lang,
420
+ sleep_interval=sleep_interval
421
+ )
422
+
423
+ results = google_search.text(
424
+ keywords=term,
425
+ region=region,
426
+ safesearch="on" if safe == "active" else "moderate" if safe == "moderate" else "off",
427
+ max_results=num_results,
428
+ start_num=start_num,
429
+ unique=unique
430
+ )
431
+
432
+ # Convert to simple URLs if not advanced mode
433
+ if not advanced:
434
+ return [result.url for result in results]
435
+ return results
436
+
437
+
438
+ if __name__ == "__main__":
439
+ from rich import print
440
+ google = GoogleSearch(
441
+ timeout=10, # Optional: Set custom timeout
442
+ proxies=None, # Optional: Use proxies
443
+ verify=True # Optional: SSL verification
444
+ )
445
+
446
+ # Text Search
447
+ print("TEXT SEARCH RESULTS:")
448
+ text_results = google.text(
449
+ keywords="Python programming",
450
+ region="us", # Optional: Region for results
451
+ safesearch="moderate", # Optional: "on", "moderate", "off"
452
+ max_results=3 # Optional: Limit number of results
453
+ )
454
+ for result in text_results:
455
+ print(f"Title: {result.title}")
456
+ print(f"URL: {result.url}")
457
+ print(f"Description: {result.description}")
458
+ print("---")
459
+
460
+ # News Search
461
+ print("\nNEWS SEARCH RESULTS:")
462
+ news_results = google.news(
463
+ keywords="artificial intelligence",
464
+ region="us",
465
+ safesearch="moderate",
466
+ max_results=2
467
+ )
468
+ for result in news_results:
469
+ print(f"Title: {result.title}")
470
+ print(f"URL: {result.url}")
471
+ print(f"Description: {result.description}")
472
+ print("---")
473
+
474
+ # Search Suggestions
475
+ print("\nSEARCH SUGGESTIONS:")
476
+ suggestions = google.suggestions("how to")
477
477
  print(suggestions)