webscout 2025.10.14.1__py3-none-any.whl → 2025.10.16__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of webscout might be problematic. Click here for more details.

Files changed (70) hide show
  1. webscout/Extra/YTToolkit/README.md +1 -1
  2. webscout/Extra/tempmail/README.md +3 -3
  3. webscout/Provider/OPENAI/README.md +1 -1
  4. webscout/Provider/TTI/bing.py +4 -4
  5. webscout/__init__.py +1 -1
  6. webscout/cli.py +0 -147
  7. webscout/client.py +4 -5
  8. webscout/litprinter/__init__.py +0 -42
  9. webscout/scout/README.md +59 -8
  10. webscout/scout/core/scout.py +62 -0
  11. webscout/scout/element.py +251 -45
  12. webscout/search/__init__.py +5 -8
  13. webscout/search/bing_main.py +42 -0
  14. webscout/search/engines/bing/__init__.py +1 -0
  15. webscout/search/engines/bing/base.py +33 -0
  16. webscout/search/engines/bing/images.py +108 -0
  17. webscout/search/engines/bing/news.py +91 -0
  18. webscout/search/engines/bing/suggestions.py +34 -0
  19. webscout/search/engines/bing/text.py +106 -0
  20. webscout/search/engines/duckduckgo/maps.py +13 -0
  21. webscout/search/engines/yahoo/__init__.py +41 -0
  22. webscout/search/engines/yahoo/answers.py +16 -0
  23. webscout/search/engines/yahoo/base.py +34 -0
  24. webscout/search/engines/yahoo/images.py +324 -0
  25. webscout/search/engines/yahoo/maps.py +16 -0
  26. webscout/search/engines/yahoo/news.py +258 -0
  27. webscout/search/engines/yahoo/suggestions.py +140 -0
  28. webscout/search/engines/yahoo/text.py +273 -0
  29. webscout/search/engines/yahoo/translate.py +16 -0
  30. webscout/search/engines/yahoo/videos.py +302 -0
  31. webscout/search/engines/yahoo/weather.py +220 -0
  32. webscout/search/http_client.py +1 -1
  33. webscout/search/yahoo_main.py +54 -0
  34. webscout/{auth → server}/__init__.py +2 -23
  35. webscout/server/config.py +84 -0
  36. webscout/{auth → server}/request_processing.py +3 -28
  37. webscout/{auth → server}/routes.py +14 -170
  38. webscout/server/schemas.py +23 -0
  39. webscout/{auth → server}/server.py +11 -43
  40. webscout/server/simple_logger.py +84 -0
  41. webscout/version.py +1 -1
  42. webscout/version.py.bak +1 -1
  43. webscout/zeroart/README.md +17 -9
  44. webscout/zeroart/__init__.py +78 -6
  45. webscout/zeroart/effects.py +51 -1
  46. webscout/zeroart/fonts.py +559 -1
  47. {webscout-2025.10.14.1.dist-info → webscout-2025.10.16.dist-info}/METADATA +15 -332
  48. {webscout-2025.10.14.1.dist-info → webscout-2025.10.16.dist-info}/RECORD +55 -48
  49. {webscout-2025.10.14.1.dist-info → webscout-2025.10.16.dist-info}/entry_points.txt +1 -1
  50. webscout/Bing_search.py +0 -417
  51. webscout/DWEBS.py +0 -529
  52. webscout/auth/api_key_manager.py +0 -189
  53. webscout/auth/auth_system.py +0 -85
  54. webscout/auth/config.py +0 -175
  55. webscout/auth/database.py +0 -755
  56. webscout/auth/middleware.py +0 -248
  57. webscout/auth/models.py +0 -185
  58. webscout/auth/rate_limiter.py +0 -254
  59. webscout/auth/schemas.py +0 -103
  60. webscout/auth/simple_logger.py +0 -236
  61. webscout/search/engines/bing.py +0 -84
  62. webscout/search/engines/bing_news.py +0 -52
  63. webscout/search/engines/yahoo.py +0 -65
  64. webscout/search/engines/yahoo_news.py +0 -64
  65. /webscout/{auth → server}/exceptions.py +0 -0
  66. /webscout/{auth → server}/providers.py +0 -0
  67. /webscout/{auth → server}/request_models.py +0 -0
  68. {webscout-2025.10.14.1.dist-info → webscout-2025.10.16.dist-info}/WHEEL +0 -0
  69. {webscout-2025.10.14.1.dist-info → webscout-2025.10.16.dist-info}/licenses/LICENSE.md +0 -0
  70. {webscout-2025.10.14.1.dist-info → webscout-2025.10.16.dist-info}/top_level.txt +0 -0
webscout/scout/element.py CHANGED
@@ -267,7 +267,14 @@ class Tag:
267
267
  def select(self, selector: str) -> List['Tag']:
268
268
  """
269
269
  Select elements using CSS selector.
270
- Enhanced to support more complex selectors.
270
+ Enhanced to support more complex selectors including:
271
+ - Tag selectors: 'p', 'div'
272
+ - Class selectors: '.class', 'p.class'
273
+ - ID selectors: '#id', 'div#id'
274
+ - Attribute selectors: '[attr]', '[attr=value]'
275
+ - Descendant selectors: 'div p'
276
+ - Child selectors: 'div > p'
277
+ - Multiple classes: '.class1.class2'
271
278
 
272
279
  Args:
273
280
  selector (str): CSS selector string
@@ -275,54 +282,248 @@ class Tag:
275
282
  Returns:
276
283
  List[Tag]: List of matching elements
277
284
  """
278
- # More advanced CSS selector parsing
279
- # This is a simplified implementation and might need more robust parsing
280
- parts = re.split(r'\s+', selector.strip())
281
285
  results = []
282
-
283
- def _match_selector(tag, selector_part):
284
- # Support more complex selectors
285
- if selector_part.startswith('.'):
286
- # Class selector
287
- return selector_part[1:] in tag.get('class', [])
288
- elif selector_part.startswith('#'):
289
- # ID selector
290
- return tag.get('id') == selector_part[1:]
291
- elif '[' in selector_part and ']' in selector_part:
292
- # Attribute selector
293
- attr_match = re.match(r'(\w+)\[([^=]+)(?:=(.+))?\]', selector_part)
294
- if attr_match:
295
- tag_name, attr, value = attr_match.groups()
296
- if tag_name and tag.name != tag_name:
286
+
287
+ def _parse_simple_selector(simple_sel: str) -> dict:
288
+ """Parse a simple selector like 'p.class#id[attr=value]' into components."""
289
+ components = {
290
+ 'tag': None,
291
+ 'id': None,
292
+ 'classes': [],
293
+ 'attrs': {}
294
+ }
295
+
296
+ # Extract tag name (at the start)
297
+ tag_match = re.match(r'^([a-zA-Z][\w-]*)', simple_sel)
298
+ if tag_match:
299
+ components['tag'] = tag_match.group(1)
300
+ simple_sel = simple_sel[len(tag_match.group(1)):]
301
+
302
+ # Extract ID
303
+ id_matches = re.findall(r'#([\w-]+)', simple_sel)
304
+ if id_matches:
305
+ components['id'] = id_matches[0]
306
+
307
+ # Extract classes
308
+ class_matches = re.findall(r'\.([\w-]+)', simple_sel)
309
+ components['classes'] = class_matches
310
+
311
+ # Extract attributes
312
+ attr_matches = re.findall(r'\[([^\]]+)\]', simple_sel)
313
+ for attr_expr in attr_matches:
314
+ if '=' in attr_expr:
315
+ attr_name, attr_value = attr_expr.split('=', 1)
316
+ components['attrs'][attr_name.strip()] = attr_value.strip('\'"')
317
+ else:
318
+ components['attrs'][attr_expr.strip()] = None
319
+
320
+ return components
321
+
322
+ def _match_simple_selector(tag: 'Tag', components: dict) -> bool:
323
+ """Check if a tag matches the parsed selector components."""
324
+ # Check tag name
325
+ if components['tag'] and tag.name != components['tag']:
326
+ return False
327
+
328
+ # Check ID
329
+ if components['id'] and tag.get('id') != components['id']:
330
+ return False
331
+
332
+ # Check classes
333
+ tag_classes = tag.get('class', '')
334
+ if isinstance(tag_classes, str):
335
+ tag_classes = tag_classes.split()
336
+ elif not isinstance(tag_classes, list):
337
+ tag_classes = [str(tag_classes)] if tag_classes else []
338
+
339
+ for cls in components['classes']:
340
+ if cls not in tag_classes:
341
+ return False
342
+
343
+ # Check attributes
344
+ for attr_name, attr_value in components['attrs'].items():
345
+ if attr_value is None:
346
+ # Just check attribute exists
347
+ if attr_name not in tag.attrs:
297
348
  return False
298
- if value:
299
- return tag.get(attr) == value.strip("'\"")
300
- return attr in tag.attrs
301
- else:
302
- # Tag selector
303
- return tag.name == selector_part
304
-
305
- def _recursive_select(element, selector_parts):
306
- if not selector_parts:
307
- results.append(element)
308
- return
309
-
310
- current_selector = selector_parts[0]
311
- remaining_selectors = selector_parts[1:]
312
-
313
- if _match_selector(element, current_selector):
314
- if not remaining_selectors:
315
- results.append(element)
316
349
  else:
317
- for child in element.contents:
318
- if isinstance(child, Tag):
319
- _recursive_select(child, remaining_selectors)
320
-
321
- for child in self.contents:
322
- if isinstance(child, Tag):
323
- _recursive_select(child, parts)
324
-
350
+ # Check attribute value
351
+ if tag.get(attr_name) != attr_value:
352
+ return False
353
+
354
+ return True
355
+
356
+ def _find_all_matching(element: 'Tag', components: dict) -> List['Tag']:
357
+ """Recursively find all elements matching the selector components."""
358
+ matches = []
359
+
360
+ # Check current element
361
+ if _match_simple_selector(element, components):
362
+ matches.append(element)
363
+
364
+ # Check children recursively
365
+ for child in element.contents:
366
+ if isinstance(child, Tag):
367
+ matches.extend(_find_all_matching(child, components))
368
+
369
+ return matches
370
+
371
+ # Handle combinators (descendant ' ' and child '>')
372
+ if ' > ' in selector:
373
+ # Child combinator
374
+ parts = [p.strip() for p in selector.split(' > ')]
375
+ return self._select_with_child_combinator(parts)
376
+ elif ' ' in selector.strip():
377
+ # Descendant combinator
378
+ parts = [p.strip() for p in selector.split()]
379
+ return self._select_with_descendant_combinator(parts)
380
+ else:
381
+ # Simple selector
382
+ components = _parse_simple_selector(selector)
383
+ return _find_all_matching(self, components)
384
+
385
+ def _select_with_descendant_combinator(self, parts: List[str]) -> List['Tag']:
386
+ """Handle descendant combinator (space)."""
387
+ if not parts:
388
+ return []
389
+
390
+ if len(parts) == 1:
391
+ components = self._parse_selector_components(parts[0])
392
+ return self._find_all_matching_in_tree(self, components)
393
+
394
+ # Find elements matching the first part
395
+ first_components = self._parse_selector_components(parts[0])
396
+ first_matches = self._find_all_matching_in_tree(self, first_components)
397
+
398
+ # For each match, find descendants matching remaining parts
399
+ results = []
400
+ remaining_selector = ' '.join(parts[1:])
401
+ for match in first_matches:
402
+ descendants = match.select(remaining_selector)
403
+ results.extend(descendants)
404
+
325
405
  return results
406
+
407
+ def _select_with_child_combinator(self, parts: List[str]) -> List['Tag']:
408
+ """Handle child combinator (>)."""
409
+ if not parts:
410
+ return []
411
+
412
+ if len(parts) == 1:
413
+ components = self._parse_selector_components(parts[0])
414
+ return self._find_all_matching_in_tree(self, components)
415
+
416
+ # Find elements matching the first part
417
+ first_components = self._parse_selector_components(parts[0])
418
+ first_matches = self._find_all_matching_in_tree(self, first_components)
419
+
420
+ # For each match, find direct children matching the next part
421
+ if len(parts) == 2:
422
+ # Last part, just check direct children
423
+ next_components = self._parse_selector_components(parts[1])
424
+ results = []
425
+ for match in first_matches:
426
+ for child in match.contents:
427
+ if isinstance(child, Tag) and self._match_selector_components(child, next_components):
428
+ results.append(child)
429
+ return results
430
+ else:
431
+ # More parts, need to continue recursively
432
+ results = []
433
+ next_components = self._parse_selector_components(parts[1])
434
+ remaining_parts = parts[2:]
435
+ for match in first_matches:
436
+ for child in match.contents:
437
+ if isinstance(child, Tag) and self._match_selector_components(child, next_components):
438
+ # Continue with remaining parts
439
+ remaining_selector = ' > '.join(remaining_parts)
440
+ descendants = child.select(remaining_selector)
441
+ results.extend(descendants)
442
+ return results
443
+
444
+ def _parse_selector_components(self, simple_sel: str) -> dict:
445
+ """Parse a simple selector like 'p.class#id[attr=value]' into components."""
446
+ components = {
447
+ 'tag': None,
448
+ 'id': None,
449
+ 'classes': [],
450
+ 'attrs': {}
451
+ }
452
+
453
+ # Extract tag name (at the start)
454
+ tag_match = re.match(r'^([a-zA-Z][\w-]*)', simple_sel)
455
+ if tag_match:
456
+ components['tag'] = tag_match.group(1)
457
+ simple_sel = simple_sel[len(tag_match.group(1)):]
458
+
459
+ # Extract ID
460
+ id_matches = re.findall(r'#([\w-]+)', simple_sel)
461
+ if id_matches:
462
+ components['id'] = id_matches[0]
463
+
464
+ # Extract classes
465
+ class_matches = re.findall(r'\.([\w-]+)', simple_sel)
466
+ components['classes'] = class_matches
467
+
468
+ # Extract attributes
469
+ attr_matches = re.findall(r'\[([^\]]+)\]', simple_sel)
470
+ for attr_expr in attr_matches:
471
+ if '=' in attr_expr:
472
+ attr_name, attr_value = attr_expr.split('=', 1)
473
+ components['attrs'][attr_name.strip()] = attr_value.strip('\'"')
474
+ else:
475
+ components['attrs'][attr_expr.strip()] = None
476
+
477
+ return components
478
+
479
+ def _match_selector_components(self, tag: 'Tag', components: dict) -> bool:
480
+ """Check if a tag matches the parsed selector components."""
481
+ # Check tag name
482
+ if components['tag'] and tag.name != components['tag']:
483
+ return False
484
+
485
+ # Check ID
486
+ if components['id'] and tag.get('id') != components['id']:
487
+ return False
488
+
489
+ # Check classes
490
+ tag_classes = tag.get('class', '')
491
+ if isinstance(tag_classes, str):
492
+ tag_classes = tag_classes.split()
493
+ elif not isinstance(tag_classes, list):
494
+ tag_classes = [str(tag_classes)] if tag_classes else []
495
+
496
+ for cls in components['classes']:
497
+ if cls not in tag_classes:
498
+ return False
499
+
500
+ # Check attributes
501
+ for attr_name, attr_value in components['attrs'].items():
502
+ if attr_value is None:
503
+ # Just check attribute exists
504
+ if attr_name not in tag.attrs:
505
+ return False
506
+ else:
507
+ # Check attribute value
508
+ if tag.get(attr_name) != attr_value:
509
+ return False
510
+
511
+ return True
512
+
513
+ def _find_all_matching_in_tree(self, element: 'Tag', components: dict) -> List['Tag']:
514
+ """Recursively find all elements matching the selector components."""
515
+ matches = []
516
+
517
+ # Check current element
518
+ if self._match_selector_components(element, components):
519
+ matches.append(element)
520
+
521
+ # Check children recursively
522
+ for child in element.contents:
523
+ if isinstance(child, Tag):
524
+ matches.extend(self._find_all_matching_in_tree(child, components))
525
+
526
+ return matches
326
527
 
327
528
  def select_one(self, selector: str) -> Optional['Tag']:
328
529
  """
@@ -462,6 +663,11 @@ class Tag:
462
663
  new_child.parent = self
463
664
  self.contents.append(new_child)
464
665
 
666
+ def extend(self, new_children: List[Union['Tag', NavigableString, str]]) -> None:
667
+ """Extend the contents of this tag with a list of new children."""
668
+ for child in new_children:
669
+ self.append(child)
670
+
465
671
  def insert(self, index: int, new_child: Union['Tag', NavigableString, str]) -> None:
466
672
  """Insert a new child at the given index with error handling."""
467
673
  if isinstance(new_child, str):
@@ -3,16 +3,15 @@
3
3
  from .base import BaseSearch, BaseSearchEngine
4
4
  from .duckduckgo_main import DuckDuckGoSearch
5
5
  from .yep_main import YepSearch
6
+ from .bing_main import BingSearch
7
+ from .yahoo_main import YahooSearch
6
8
 
7
9
  # Import new search engines
8
- from .engines.bing import Bing
9
10
  from .engines.brave import Brave
10
11
  from .engines.mojeek import Mojeek
11
- from .engines.yahoo import Yahoo
12
+
12
13
  from .engines.yandex import Yandex
13
14
  from .engines.wikipedia import Wikipedia
14
- from .engines.bing_news import BingNews
15
- from .engines.yahoo_news import YahooNews
16
15
 
17
16
  # Import result models
18
17
  from .results import (
@@ -31,16 +30,14 @@ __all__ = [
31
30
  # Main search interfaces
32
31
  "DuckDuckGoSearch",
33
32
  "YepSearch",
33
+ "BingSearch",
34
+ "YahooSearch",
34
35
 
35
36
  # Individual engines
36
- "Bing",
37
37
  "Brave",
38
38
  "Mojeek",
39
- "Yahoo",
40
39
  "Yandex",
41
40
  "Wikipedia",
42
- "BingNews",
43
- "YahooNews",
44
41
 
45
42
  # Result models
46
43
  "TextResult",
@@ -0,0 +1,42 @@
1
+ """Bing unified search interface."""
2
+
3
+ from __future__ import annotations
4
+ from typing import Dict, List, Optional
5
+ from .base import BaseSearch
6
+ from .engines.bing.text import BingTextSearch
7
+ from .engines.bing.images import BingImagesSearch
8
+ from .engines.bing.news import BingNewsSearch
9
+ from .engines.bing.suggestions import BingSuggestionsSearch
10
+
11
+
12
+ class BingSearch(BaseSearch):
13
+ """Unified Bing search interface."""
14
+
15
+ def text(self, keywords: str, region: str = "us", safesearch: str = "moderate", max_results: Optional[int] = None, unique: bool = True) -> List[Dict[str, str]]:
16
+ search = BingTextSearch()
17
+ return search.run(keywords, region, safesearch, max_results, unique=unique)
18
+
19
+ def images(self, keywords: str, region: str = "us", safesearch: str = "moderate", max_results: Optional[int] = None) -> List[Dict[str, str]]:
20
+ search = BingImagesSearch()
21
+ return search.run(keywords, region, safesearch, max_results)
22
+
23
+ def news(self, keywords: str, region: str = "us", safesearch: str = "moderate", max_results: Optional[int] = None) -> List[Dict[str, str]]:
24
+ search = BingNewsSearch()
25
+ return search.run(keywords, region, safesearch, max_results)
26
+
27
+ def suggestions(self, query: str, region: str = "en-US") -> List[Dict[str, str]]:
28
+ search = BingSuggestionsSearch()
29
+ result = search.run(query, region)
30
+ return [{'suggestion': s} for s in result]
31
+
32
+ def answers(self, keywords: str) -> List[Dict[str, str]]:
33
+ raise NotImplementedError("Answers not implemented for Bing")
34
+
35
+ def maps(self, *args, **kwargs) -> List[Dict[str, str]]:
36
+ raise NotImplementedError("Maps not implemented for Bing")
37
+
38
+ def translate(self, keywords: str, from_lang: Optional[str] = None, to_lang: str = "en") -> List[Dict[str, str]]:
39
+ raise NotImplementedError("Translate not implemented for Bing")
40
+
41
+ def videos(self, *args, **kwargs) -> List[Dict[str, str]]:
42
+ raise NotImplementedError("Videos not implemented for Bing")
@@ -0,0 +1 @@
1
+ """Bing search engines."""
@@ -0,0 +1,33 @@
1
+ """Base class for Bing search implementations."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from ....litagent import LitAgent
6
+ from curl_cffi.requests import Session
7
+
8
+
9
+ class BingBase:
10
+ """Base class for Bing search engines."""
11
+
12
+ def __init__(
13
+ self,
14
+ timeout: int = 10,
15
+ proxies: dict[str, str] | None = None,
16
+ verify: bool = True,
17
+ lang: str = "en-US",
18
+ sleep_interval: float = 0.0,
19
+ impersonate: str = "chrome110",
20
+ ):
21
+ self.timeout = timeout
22
+ self.proxies = proxies
23
+ self.verify = verify
24
+ self.lang = lang
25
+ self.sleep_interval = sleep_interval
26
+ self.base_url = "https://www.bing.com"
27
+ self.session = Session(
28
+ proxies=proxies,
29
+ verify=verify,
30
+ timeout=timeout,
31
+ impersonate=impersonate,
32
+ )
33
+ self.session.headers.update(LitAgent().generate_fingerprint())
@@ -0,0 +1,108 @@
1
+ """Bing images search."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Dict, List
6
+ from urllib.parse import urlencode
7
+ from time import sleep
8
+
9
+ from .base import BingBase
10
+ from webscout.scout import Scout
11
+
12
+
13
+ class BingImagesSearch(BingBase):
14
+ def run(self, *args, **kwargs) -> List[Dict[str, str]]:
15
+ keywords = args[0] if args else kwargs.get("keywords")
16
+ region = args[1] if len(args) > 1 else kwargs.get("region", "us")
17
+ safesearch = args[2] if len(args) > 2 else kwargs.get("safesearch", "moderate")
18
+ max_results = args[3] if len(args) > 3 else kwargs.get("max_results", 10)
19
+
20
+ if max_results is None:
21
+ max_results = 10
22
+
23
+ if not keywords:
24
+ raise ValueError("Keywords are mandatory")
25
+
26
+ safe_map = {
27
+ "on": "Strict",
28
+ "moderate": "Moderate",
29
+ "off": "Off"
30
+ }
31
+ safe = safe_map.get(safesearch.lower(), "Moderate")
32
+
33
+ # Bing images URL
34
+ url = f"{self.base_url}/images/async"
35
+ params = {
36
+ 'q': keywords,
37
+ 'first': '1',
38
+ 'count': '35', # Fetch more to get max_results
39
+ 'cw': '1177',
40
+ 'ch': '759',
41
+ 'tsc': 'ImageHoverTitle',
42
+ 'layout': 'RowBased_Landscape',
43
+ 't': '0',
44
+ 'IG': '',
45
+ 'SFX': '0',
46
+ 'iid': 'images.1'
47
+ }
48
+
49
+ results = []
50
+ first = 1
51
+ sfx = 0
52
+
53
+ while len(results) < max_results:
54
+ params['first'] = str(first)
55
+ params['SFX'] = str(sfx)
56
+ full_url = f"{url}?{urlencode(params)}"
57
+
58
+ try:
59
+ response = self.session.get(full_url, timeout=self.timeout)
60
+ response.raise_for_status()
61
+ html = response.text
62
+ except Exception as e:
63
+ raise Exception(f"Failed to fetch images: {str(e)}")
64
+
65
+ soup = Scout(html)
66
+ img_tags = soup.select('a.iusc img')
67
+
68
+ for img in img_tags:
69
+ if len(results) >= max_results:
70
+ break
71
+
72
+ title = img.get('alt', '')
73
+ src = img.get('src', '')
74
+ m_attr = img.parent.get('m', '') if img.parent else ''
75
+
76
+ # Parse m attribute for full image URL
77
+ image_url = src
78
+ thumbnail = src
79
+ if m_attr:
80
+ try:
81
+ import json
82
+ m_data = json.loads(m_attr)
83
+ image_url = m_data.get('murl', src)
84
+ thumbnail = m_data.get('turl', src)
85
+ except:
86
+ pass
87
+
88
+ source = ''
89
+ if img.parent and img.parent.parent:
90
+ source_tag = img.parent.parent.select_one('.iusc .lnk')
91
+ if source_tag:
92
+ source = source_tag.get_text(strip=True)
93
+
94
+ results.append({
95
+ 'title': title,
96
+ 'image': image_url,
97
+ 'thumbnail': thumbnail,
98
+ 'url': image_url, # For compatibility
99
+ 'source': source
100
+ })
101
+
102
+ first += 35
103
+ sfx += 1
104
+
105
+ if self.sleep_interval:
106
+ sleep(self.sleep_interval)
107
+
108
+ return results[:max_results]
@@ -0,0 +1,91 @@
1
+ """Bing news search."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Dict, List
6
+ from urllib.parse import urlencode
7
+ from time import sleep
8
+
9
+ from .base import BingBase
10
+ from webscout.scout import Scout
11
+
12
+
13
+ class BingNewsSearch(BingBase):
14
+ def run(self, *args, **kwargs) -> List[Dict[str, str]]:
15
+ keywords = args[0] if args else kwargs.get("keywords")
16
+ region = args[1] if len(args) > 1 else kwargs.get("region", "us")
17
+ safesearch = args[2] if len(args) > 2 else kwargs.get("safesearch", "moderate")
18
+ max_results = args[3] if len(args) > 3 else kwargs.get("max_results", 10)
19
+
20
+ if max_results is None:
21
+ max_results = 10
22
+
23
+ if not keywords:
24
+ raise ValueError("Keywords are mandatory")
25
+
26
+ safe_map = {
27
+ "on": "Strict",
28
+ "moderate": "Moderate",
29
+ "off": "Off"
30
+ }
31
+ safe = safe_map.get(safesearch.lower(), "Moderate")
32
+
33
+ # Bing news URL
34
+ url = f"{self.base_url}/news/infinitescrollajax"
35
+ params = {
36
+ 'q': keywords,
37
+ 'InfiniteScroll': '1',
38
+ 'first': '1',
39
+ 'SFX': '0',
40
+ 'cc': region.lower(),
41
+ 'setlang': self.lang.split('-')[0]
42
+ }
43
+
44
+ results = []
45
+ first = 1
46
+ sfx = 0
47
+
48
+ while len(results) < max_results:
49
+ params['first'] = str(first)
50
+ params['SFX'] = str(sfx)
51
+ full_url = f"{url}?{urlencode(params)}"
52
+
53
+ try:
54
+ response = self.session.get(full_url, timeout=self.timeout)
55
+ response.raise_for_status()
56
+ html = response.text
57
+ except Exception as e:
58
+ raise Exception(f"Failed to fetch news: {str(e)}")
59
+
60
+ if not html:
61
+ break
62
+
63
+ soup = Scout(html)
64
+ news_items = soup.select('div.newsitem')
65
+
66
+ for item in news_items:
67
+ if len(results) >= max_results:
68
+ break
69
+
70
+ title = item.select_one('a.title')
71
+ snippet = item.select_one('div.snippet')
72
+ source = item.select_one('div.source')
73
+ date = item.select_one('span.date')
74
+
75
+ if title:
76
+ news_result = {
77
+ 'title': title.get_text(strip=True),
78
+ 'url': title.get('href', ''),
79
+ 'body': snippet.get_text(strip=True) if snippet else '',
80
+ 'source': source.get_text(strip=True) if source else '',
81
+ 'date': date.get_text(strip=True) if date else ''
82
+ }
83
+ results.append(news_result)
84
+
85
+ first += 10
86
+ sfx += 1
87
+
88
+ if self.sleep_interval:
89
+ sleep(self.sleep_interval)
90
+
91
+ return results[:max_results]
@@ -0,0 +1,34 @@
1
+ """Bing suggestions search."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import List
6
+ from urllib.parse import urlencode
7
+
8
+ from .base import BingBase
9
+
10
+
11
+ class BingSuggestionsSearch(BingBase):
12
+ def run(self, *args, **kwargs) -> List[str]:
13
+ query = args[0] if args else kwargs.get("query")
14
+ region = args[1] if len(args) > 1 else kwargs.get("region", "en-US")
15
+
16
+ if not query:
17
+ raise ValueError("Query is mandatory")
18
+
19
+ params = {
20
+ "query": query,
21
+ "mkt": region
22
+ }
23
+ url = f"https://api.bing.com/osjson.aspx?{urlencode(params)}"
24
+
25
+ try:
26
+ response = self.session.get(url, timeout=self.timeout)
27
+ response.raise_for_status()
28
+ data = response.json()
29
+ # Bing suggestions API returns [query, [suggestions]]
30
+ if len(data) > 1 and isinstance(data[1], list):
31
+ return data[1]
32
+ return []
33
+ except Exception as e:
34
+ raise Exception(f"Failed to fetch suggestions: {str(e)}")