webscout 2025.10.14.1__py3-none-any.whl → 2025.10.16__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of webscout might be problematic. Click here for more details.
- webscout/Extra/YTToolkit/README.md +1 -1
- webscout/Extra/tempmail/README.md +3 -3
- webscout/Provider/OPENAI/README.md +1 -1
- webscout/Provider/TTI/bing.py +4 -4
- webscout/__init__.py +1 -1
- webscout/cli.py +0 -147
- webscout/client.py +4 -5
- webscout/litprinter/__init__.py +0 -42
- webscout/scout/README.md +59 -8
- webscout/scout/core/scout.py +62 -0
- webscout/scout/element.py +251 -45
- webscout/search/__init__.py +5 -8
- webscout/search/bing_main.py +42 -0
- webscout/search/engines/bing/__init__.py +1 -0
- webscout/search/engines/bing/base.py +33 -0
- webscout/search/engines/bing/images.py +108 -0
- webscout/search/engines/bing/news.py +91 -0
- webscout/search/engines/bing/suggestions.py +34 -0
- webscout/search/engines/bing/text.py +106 -0
- webscout/search/engines/duckduckgo/maps.py +13 -0
- webscout/search/engines/yahoo/__init__.py +41 -0
- webscout/search/engines/yahoo/answers.py +16 -0
- webscout/search/engines/yahoo/base.py +34 -0
- webscout/search/engines/yahoo/images.py +324 -0
- webscout/search/engines/yahoo/maps.py +16 -0
- webscout/search/engines/yahoo/news.py +258 -0
- webscout/search/engines/yahoo/suggestions.py +140 -0
- webscout/search/engines/yahoo/text.py +273 -0
- webscout/search/engines/yahoo/translate.py +16 -0
- webscout/search/engines/yahoo/videos.py +302 -0
- webscout/search/engines/yahoo/weather.py +220 -0
- webscout/search/http_client.py +1 -1
- webscout/search/yahoo_main.py +54 -0
- webscout/{auth → server}/__init__.py +2 -23
- webscout/server/config.py +84 -0
- webscout/{auth → server}/request_processing.py +3 -28
- webscout/{auth → server}/routes.py +14 -170
- webscout/server/schemas.py +23 -0
- webscout/{auth → server}/server.py +11 -43
- webscout/server/simple_logger.py +84 -0
- webscout/version.py +1 -1
- webscout/version.py.bak +1 -1
- webscout/zeroart/README.md +17 -9
- webscout/zeroart/__init__.py +78 -6
- webscout/zeroart/effects.py +51 -1
- webscout/zeroart/fonts.py +559 -1
- {webscout-2025.10.14.1.dist-info → webscout-2025.10.16.dist-info}/METADATA +15 -332
- {webscout-2025.10.14.1.dist-info → webscout-2025.10.16.dist-info}/RECORD +55 -48
- {webscout-2025.10.14.1.dist-info → webscout-2025.10.16.dist-info}/entry_points.txt +1 -1
- webscout/Bing_search.py +0 -417
- webscout/DWEBS.py +0 -529
- webscout/auth/api_key_manager.py +0 -189
- webscout/auth/auth_system.py +0 -85
- webscout/auth/config.py +0 -175
- webscout/auth/database.py +0 -755
- webscout/auth/middleware.py +0 -248
- webscout/auth/models.py +0 -185
- webscout/auth/rate_limiter.py +0 -254
- webscout/auth/schemas.py +0 -103
- webscout/auth/simple_logger.py +0 -236
- webscout/search/engines/bing.py +0 -84
- webscout/search/engines/bing_news.py +0 -52
- webscout/search/engines/yahoo.py +0 -65
- webscout/search/engines/yahoo_news.py +0 -64
- /webscout/{auth → server}/exceptions.py +0 -0
- /webscout/{auth → server}/providers.py +0 -0
- /webscout/{auth → server}/request_models.py +0 -0
- {webscout-2025.10.14.1.dist-info → webscout-2025.10.16.dist-info}/WHEEL +0 -0
- {webscout-2025.10.14.1.dist-info → webscout-2025.10.16.dist-info}/licenses/LICENSE.md +0 -0
- {webscout-2025.10.14.1.dist-info → webscout-2025.10.16.dist-info}/top_level.txt +0 -0
webscout/scout/element.py
CHANGED
|
@@ -267,7 +267,14 @@ class Tag:
|
|
|
267
267
|
def select(self, selector: str) -> List['Tag']:
|
|
268
268
|
"""
|
|
269
269
|
Select elements using CSS selector.
|
|
270
|
-
Enhanced to support more complex selectors
|
|
270
|
+
Enhanced to support more complex selectors including:
|
|
271
|
+
- Tag selectors: 'p', 'div'
|
|
272
|
+
- Class selectors: '.class', 'p.class'
|
|
273
|
+
- ID selectors: '#id', 'div#id'
|
|
274
|
+
- Attribute selectors: '[attr]', '[attr=value]'
|
|
275
|
+
- Descendant selectors: 'div p'
|
|
276
|
+
- Child selectors: 'div > p'
|
|
277
|
+
- Multiple classes: '.class1.class2'
|
|
271
278
|
|
|
272
279
|
Args:
|
|
273
280
|
selector (str): CSS selector string
|
|
@@ -275,54 +282,248 @@ class Tag:
|
|
|
275
282
|
Returns:
|
|
276
283
|
List[Tag]: List of matching elements
|
|
277
284
|
"""
|
|
278
|
-
# More advanced CSS selector parsing
|
|
279
|
-
# This is a simplified implementation and might need more robust parsing
|
|
280
|
-
parts = re.split(r'\s+', selector.strip())
|
|
281
285
|
results = []
|
|
282
|
-
|
|
283
|
-
def
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
286
|
+
|
|
287
|
+
def _parse_simple_selector(simple_sel: str) -> dict:
|
|
288
|
+
"""Parse a simple selector like 'p.class#id[attr=value]' into components."""
|
|
289
|
+
components = {
|
|
290
|
+
'tag': None,
|
|
291
|
+
'id': None,
|
|
292
|
+
'classes': [],
|
|
293
|
+
'attrs': {}
|
|
294
|
+
}
|
|
295
|
+
|
|
296
|
+
# Extract tag name (at the start)
|
|
297
|
+
tag_match = re.match(r'^([a-zA-Z][\w-]*)', simple_sel)
|
|
298
|
+
if tag_match:
|
|
299
|
+
components['tag'] = tag_match.group(1)
|
|
300
|
+
simple_sel = simple_sel[len(tag_match.group(1)):]
|
|
301
|
+
|
|
302
|
+
# Extract ID
|
|
303
|
+
id_matches = re.findall(r'#([\w-]+)', simple_sel)
|
|
304
|
+
if id_matches:
|
|
305
|
+
components['id'] = id_matches[0]
|
|
306
|
+
|
|
307
|
+
# Extract classes
|
|
308
|
+
class_matches = re.findall(r'\.([\w-]+)', simple_sel)
|
|
309
|
+
components['classes'] = class_matches
|
|
310
|
+
|
|
311
|
+
# Extract attributes
|
|
312
|
+
attr_matches = re.findall(r'\[([^\]]+)\]', simple_sel)
|
|
313
|
+
for attr_expr in attr_matches:
|
|
314
|
+
if '=' in attr_expr:
|
|
315
|
+
attr_name, attr_value = attr_expr.split('=', 1)
|
|
316
|
+
components['attrs'][attr_name.strip()] = attr_value.strip('\'"')
|
|
317
|
+
else:
|
|
318
|
+
components['attrs'][attr_expr.strip()] = None
|
|
319
|
+
|
|
320
|
+
return components
|
|
321
|
+
|
|
322
|
+
def _match_simple_selector(tag: 'Tag', components: dict) -> bool:
|
|
323
|
+
"""Check if a tag matches the parsed selector components."""
|
|
324
|
+
# Check tag name
|
|
325
|
+
if components['tag'] and tag.name != components['tag']:
|
|
326
|
+
return False
|
|
327
|
+
|
|
328
|
+
# Check ID
|
|
329
|
+
if components['id'] and tag.get('id') != components['id']:
|
|
330
|
+
return False
|
|
331
|
+
|
|
332
|
+
# Check classes
|
|
333
|
+
tag_classes = tag.get('class', '')
|
|
334
|
+
if isinstance(tag_classes, str):
|
|
335
|
+
tag_classes = tag_classes.split()
|
|
336
|
+
elif not isinstance(tag_classes, list):
|
|
337
|
+
tag_classes = [str(tag_classes)] if tag_classes else []
|
|
338
|
+
|
|
339
|
+
for cls in components['classes']:
|
|
340
|
+
if cls not in tag_classes:
|
|
341
|
+
return False
|
|
342
|
+
|
|
343
|
+
# Check attributes
|
|
344
|
+
for attr_name, attr_value in components['attrs'].items():
|
|
345
|
+
if attr_value is None:
|
|
346
|
+
# Just check attribute exists
|
|
347
|
+
if attr_name not in tag.attrs:
|
|
297
348
|
return False
|
|
298
|
-
if value:
|
|
299
|
-
return tag.get(attr) == value.strip("'\"")
|
|
300
|
-
return attr in tag.attrs
|
|
301
|
-
else:
|
|
302
|
-
# Tag selector
|
|
303
|
-
return tag.name == selector_part
|
|
304
|
-
|
|
305
|
-
def _recursive_select(element, selector_parts):
|
|
306
|
-
if not selector_parts:
|
|
307
|
-
results.append(element)
|
|
308
|
-
return
|
|
309
|
-
|
|
310
|
-
current_selector = selector_parts[0]
|
|
311
|
-
remaining_selectors = selector_parts[1:]
|
|
312
|
-
|
|
313
|
-
if _match_selector(element, current_selector):
|
|
314
|
-
if not remaining_selectors:
|
|
315
|
-
results.append(element)
|
|
316
349
|
else:
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
350
|
+
# Check attribute value
|
|
351
|
+
if tag.get(attr_name) != attr_value:
|
|
352
|
+
return False
|
|
353
|
+
|
|
354
|
+
return True
|
|
355
|
+
|
|
356
|
+
def _find_all_matching(element: 'Tag', components: dict) -> List['Tag']:
|
|
357
|
+
"""Recursively find all elements matching the selector components."""
|
|
358
|
+
matches = []
|
|
359
|
+
|
|
360
|
+
# Check current element
|
|
361
|
+
if _match_simple_selector(element, components):
|
|
362
|
+
matches.append(element)
|
|
363
|
+
|
|
364
|
+
# Check children recursively
|
|
365
|
+
for child in element.contents:
|
|
366
|
+
if isinstance(child, Tag):
|
|
367
|
+
matches.extend(_find_all_matching(child, components))
|
|
368
|
+
|
|
369
|
+
return matches
|
|
370
|
+
|
|
371
|
+
# Handle combinators (descendant ' ' and child '>')
|
|
372
|
+
if ' > ' in selector:
|
|
373
|
+
# Child combinator
|
|
374
|
+
parts = [p.strip() for p in selector.split(' > ')]
|
|
375
|
+
return self._select_with_child_combinator(parts)
|
|
376
|
+
elif ' ' in selector.strip():
|
|
377
|
+
# Descendant combinator
|
|
378
|
+
parts = [p.strip() for p in selector.split()]
|
|
379
|
+
return self._select_with_descendant_combinator(parts)
|
|
380
|
+
else:
|
|
381
|
+
# Simple selector
|
|
382
|
+
components = _parse_simple_selector(selector)
|
|
383
|
+
return _find_all_matching(self, components)
|
|
384
|
+
|
|
385
|
+
def _select_with_descendant_combinator(self, parts: List[str]) -> List['Tag']:
|
|
386
|
+
"""Handle descendant combinator (space)."""
|
|
387
|
+
if not parts:
|
|
388
|
+
return []
|
|
389
|
+
|
|
390
|
+
if len(parts) == 1:
|
|
391
|
+
components = self._parse_selector_components(parts[0])
|
|
392
|
+
return self._find_all_matching_in_tree(self, components)
|
|
393
|
+
|
|
394
|
+
# Find elements matching the first part
|
|
395
|
+
first_components = self._parse_selector_components(parts[0])
|
|
396
|
+
first_matches = self._find_all_matching_in_tree(self, first_components)
|
|
397
|
+
|
|
398
|
+
# For each match, find descendants matching remaining parts
|
|
399
|
+
results = []
|
|
400
|
+
remaining_selector = ' '.join(parts[1:])
|
|
401
|
+
for match in first_matches:
|
|
402
|
+
descendants = match.select(remaining_selector)
|
|
403
|
+
results.extend(descendants)
|
|
404
|
+
|
|
325
405
|
return results
|
|
406
|
+
|
|
407
|
+
def _select_with_child_combinator(self, parts: List[str]) -> List['Tag']:
|
|
408
|
+
"""Handle child combinator (>)."""
|
|
409
|
+
if not parts:
|
|
410
|
+
return []
|
|
411
|
+
|
|
412
|
+
if len(parts) == 1:
|
|
413
|
+
components = self._parse_selector_components(parts[0])
|
|
414
|
+
return self._find_all_matching_in_tree(self, components)
|
|
415
|
+
|
|
416
|
+
# Find elements matching the first part
|
|
417
|
+
first_components = self._parse_selector_components(parts[0])
|
|
418
|
+
first_matches = self._find_all_matching_in_tree(self, first_components)
|
|
419
|
+
|
|
420
|
+
# For each match, find direct children matching the next part
|
|
421
|
+
if len(parts) == 2:
|
|
422
|
+
# Last part, just check direct children
|
|
423
|
+
next_components = self._parse_selector_components(parts[1])
|
|
424
|
+
results = []
|
|
425
|
+
for match in first_matches:
|
|
426
|
+
for child in match.contents:
|
|
427
|
+
if isinstance(child, Tag) and self._match_selector_components(child, next_components):
|
|
428
|
+
results.append(child)
|
|
429
|
+
return results
|
|
430
|
+
else:
|
|
431
|
+
# More parts, need to continue recursively
|
|
432
|
+
results = []
|
|
433
|
+
next_components = self._parse_selector_components(parts[1])
|
|
434
|
+
remaining_parts = parts[2:]
|
|
435
|
+
for match in first_matches:
|
|
436
|
+
for child in match.contents:
|
|
437
|
+
if isinstance(child, Tag) and self._match_selector_components(child, next_components):
|
|
438
|
+
# Continue with remaining parts
|
|
439
|
+
remaining_selector = ' > '.join(remaining_parts)
|
|
440
|
+
descendants = child.select(remaining_selector)
|
|
441
|
+
results.extend(descendants)
|
|
442
|
+
return results
|
|
443
|
+
|
|
444
|
+
def _parse_selector_components(self, simple_sel: str) -> dict:
|
|
445
|
+
"""Parse a simple selector like 'p.class#id[attr=value]' into components."""
|
|
446
|
+
components = {
|
|
447
|
+
'tag': None,
|
|
448
|
+
'id': None,
|
|
449
|
+
'classes': [],
|
|
450
|
+
'attrs': {}
|
|
451
|
+
}
|
|
452
|
+
|
|
453
|
+
# Extract tag name (at the start)
|
|
454
|
+
tag_match = re.match(r'^([a-zA-Z][\w-]*)', simple_sel)
|
|
455
|
+
if tag_match:
|
|
456
|
+
components['tag'] = tag_match.group(1)
|
|
457
|
+
simple_sel = simple_sel[len(tag_match.group(1)):]
|
|
458
|
+
|
|
459
|
+
# Extract ID
|
|
460
|
+
id_matches = re.findall(r'#([\w-]+)', simple_sel)
|
|
461
|
+
if id_matches:
|
|
462
|
+
components['id'] = id_matches[0]
|
|
463
|
+
|
|
464
|
+
# Extract classes
|
|
465
|
+
class_matches = re.findall(r'\.([\w-]+)', simple_sel)
|
|
466
|
+
components['classes'] = class_matches
|
|
467
|
+
|
|
468
|
+
# Extract attributes
|
|
469
|
+
attr_matches = re.findall(r'\[([^\]]+)\]', simple_sel)
|
|
470
|
+
for attr_expr in attr_matches:
|
|
471
|
+
if '=' in attr_expr:
|
|
472
|
+
attr_name, attr_value = attr_expr.split('=', 1)
|
|
473
|
+
components['attrs'][attr_name.strip()] = attr_value.strip('\'"')
|
|
474
|
+
else:
|
|
475
|
+
components['attrs'][attr_expr.strip()] = None
|
|
476
|
+
|
|
477
|
+
return components
|
|
478
|
+
|
|
479
|
+
def _match_selector_components(self, tag: 'Tag', components: dict) -> bool:
|
|
480
|
+
"""Check if a tag matches the parsed selector components."""
|
|
481
|
+
# Check tag name
|
|
482
|
+
if components['tag'] and tag.name != components['tag']:
|
|
483
|
+
return False
|
|
484
|
+
|
|
485
|
+
# Check ID
|
|
486
|
+
if components['id'] and tag.get('id') != components['id']:
|
|
487
|
+
return False
|
|
488
|
+
|
|
489
|
+
# Check classes
|
|
490
|
+
tag_classes = tag.get('class', '')
|
|
491
|
+
if isinstance(tag_classes, str):
|
|
492
|
+
tag_classes = tag_classes.split()
|
|
493
|
+
elif not isinstance(tag_classes, list):
|
|
494
|
+
tag_classes = [str(tag_classes)] if tag_classes else []
|
|
495
|
+
|
|
496
|
+
for cls in components['classes']:
|
|
497
|
+
if cls not in tag_classes:
|
|
498
|
+
return False
|
|
499
|
+
|
|
500
|
+
# Check attributes
|
|
501
|
+
for attr_name, attr_value in components['attrs'].items():
|
|
502
|
+
if attr_value is None:
|
|
503
|
+
# Just check attribute exists
|
|
504
|
+
if attr_name not in tag.attrs:
|
|
505
|
+
return False
|
|
506
|
+
else:
|
|
507
|
+
# Check attribute value
|
|
508
|
+
if tag.get(attr_name) != attr_value:
|
|
509
|
+
return False
|
|
510
|
+
|
|
511
|
+
return True
|
|
512
|
+
|
|
513
|
+
def _find_all_matching_in_tree(self, element: 'Tag', components: dict) -> List['Tag']:
|
|
514
|
+
"""Recursively find all elements matching the selector components."""
|
|
515
|
+
matches = []
|
|
516
|
+
|
|
517
|
+
# Check current element
|
|
518
|
+
if self._match_selector_components(element, components):
|
|
519
|
+
matches.append(element)
|
|
520
|
+
|
|
521
|
+
# Check children recursively
|
|
522
|
+
for child in element.contents:
|
|
523
|
+
if isinstance(child, Tag):
|
|
524
|
+
matches.extend(self._find_all_matching_in_tree(child, components))
|
|
525
|
+
|
|
526
|
+
return matches
|
|
326
527
|
|
|
327
528
|
def select_one(self, selector: str) -> Optional['Tag']:
|
|
328
529
|
"""
|
|
@@ -462,6 +663,11 @@ class Tag:
|
|
|
462
663
|
new_child.parent = self
|
|
463
664
|
self.contents.append(new_child)
|
|
464
665
|
|
|
666
|
+
def extend(self, new_children: List[Union['Tag', NavigableString, str]]) -> None:
|
|
667
|
+
"""Extend the contents of this tag with a list of new children."""
|
|
668
|
+
for child in new_children:
|
|
669
|
+
self.append(child)
|
|
670
|
+
|
|
465
671
|
def insert(self, index: int, new_child: Union['Tag', NavigableString, str]) -> None:
|
|
466
672
|
"""Insert a new child at the given index with error handling."""
|
|
467
673
|
if isinstance(new_child, str):
|
webscout/search/__init__.py
CHANGED
|
@@ -3,16 +3,15 @@
|
|
|
3
3
|
from .base import BaseSearch, BaseSearchEngine
|
|
4
4
|
from .duckduckgo_main import DuckDuckGoSearch
|
|
5
5
|
from .yep_main import YepSearch
|
|
6
|
+
from .bing_main import BingSearch
|
|
7
|
+
from .yahoo_main import YahooSearch
|
|
6
8
|
|
|
7
9
|
# Import new search engines
|
|
8
|
-
from .engines.bing import Bing
|
|
9
10
|
from .engines.brave import Brave
|
|
10
11
|
from .engines.mojeek import Mojeek
|
|
11
|
-
|
|
12
|
+
|
|
12
13
|
from .engines.yandex import Yandex
|
|
13
14
|
from .engines.wikipedia import Wikipedia
|
|
14
|
-
from .engines.bing_news import BingNews
|
|
15
|
-
from .engines.yahoo_news import YahooNews
|
|
16
15
|
|
|
17
16
|
# Import result models
|
|
18
17
|
from .results import (
|
|
@@ -31,16 +30,14 @@ __all__ = [
|
|
|
31
30
|
# Main search interfaces
|
|
32
31
|
"DuckDuckGoSearch",
|
|
33
32
|
"YepSearch",
|
|
33
|
+
"BingSearch",
|
|
34
|
+
"YahooSearch",
|
|
34
35
|
|
|
35
36
|
# Individual engines
|
|
36
|
-
"Bing",
|
|
37
37
|
"Brave",
|
|
38
38
|
"Mojeek",
|
|
39
|
-
"Yahoo",
|
|
40
39
|
"Yandex",
|
|
41
40
|
"Wikipedia",
|
|
42
|
-
"BingNews",
|
|
43
|
-
"YahooNews",
|
|
44
41
|
|
|
45
42
|
# Result models
|
|
46
43
|
"TextResult",
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
"""Bing unified search interface."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
from typing import Dict, List, Optional
|
|
5
|
+
from .base import BaseSearch
|
|
6
|
+
from .engines.bing.text import BingTextSearch
|
|
7
|
+
from .engines.bing.images import BingImagesSearch
|
|
8
|
+
from .engines.bing.news import BingNewsSearch
|
|
9
|
+
from .engines.bing.suggestions import BingSuggestionsSearch
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class BingSearch(BaseSearch):
|
|
13
|
+
"""Unified Bing search interface."""
|
|
14
|
+
|
|
15
|
+
def text(self, keywords: str, region: str = "us", safesearch: str = "moderate", max_results: Optional[int] = None, unique: bool = True) -> List[Dict[str, str]]:
|
|
16
|
+
search = BingTextSearch()
|
|
17
|
+
return search.run(keywords, region, safesearch, max_results, unique=unique)
|
|
18
|
+
|
|
19
|
+
def images(self, keywords: str, region: str = "us", safesearch: str = "moderate", max_results: Optional[int] = None) -> List[Dict[str, str]]:
|
|
20
|
+
search = BingImagesSearch()
|
|
21
|
+
return search.run(keywords, region, safesearch, max_results)
|
|
22
|
+
|
|
23
|
+
def news(self, keywords: str, region: str = "us", safesearch: str = "moderate", max_results: Optional[int] = None) -> List[Dict[str, str]]:
|
|
24
|
+
search = BingNewsSearch()
|
|
25
|
+
return search.run(keywords, region, safesearch, max_results)
|
|
26
|
+
|
|
27
|
+
def suggestions(self, query: str, region: str = "en-US") -> List[Dict[str, str]]:
|
|
28
|
+
search = BingSuggestionsSearch()
|
|
29
|
+
result = search.run(query, region)
|
|
30
|
+
return [{'suggestion': s} for s in result]
|
|
31
|
+
|
|
32
|
+
def answers(self, keywords: str) -> List[Dict[str, str]]:
|
|
33
|
+
raise NotImplementedError("Answers not implemented for Bing")
|
|
34
|
+
|
|
35
|
+
def maps(self, *args, **kwargs) -> List[Dict[str, str]]:
|
|
36
|
+
raise NotImplementedError("Maps not implemented for Bing")
|
|
37
|
+
|
|
38
|
+
def translate(self, keywords: str, from_lang: Optional[str] = None, to_lang: str = "en") -> List[Dict[str, str]]:
|
|
39
|
+
raise NotImplementedError("Translate not implemented for Bing")
|
|
40
|
+
|
|
41
|
+
def videos(self, *args, **kwargs) -> List[Dict[str, str]]:
|
|
42
|
+
raise NotImplementedError("Videos not implemented for Bing")
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Bing search engines."""
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
"""Base class for Bing search implementations."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from ....litagent import LitAgent
|
|
6
|
+
from curl_cffi.requests import Session
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class BingBase:
|
|
10
|
+
"""Base class for Bing search engines."""
|
|
11
|
+
|
|
12
|
+
def __init__(
|
|
13
|
+
self,
|
|
14
|
+
timeout: int = 10,
|
|
15
|
+
proxies: dict[str, str] | None = None,
|
|
16
|
+
verify: bool = True,
|
|
17
|
+
lang: str = "en-US",
|
|
18
|
+
sleep_interval: float = 0.0,
|
|
19
|
+
impersonate: str = "chrome110",
|
|
20
|
+
):
|
|
21
|
+
self.timeout = timeout
|
|
22
|
+
self.proxies = proxies
|
|
23
|
+
self.verify = verify
|
|
24
|
+
self.lang = lang
|
|
25
|
+
self.sleep_interval = sleep_interval
|
|
26
|
+
self.base_url = "https://www.bing.com"
|
|
27
|
+
self.session = Session(
|
|
28
|
+
proxies=proxies,
|
|
29
|
+
verify=verify,
|
|
30
|
+
timeout=timeout,
|
|
31
|
+
impersonate=impersonate,
|
|
32
|
+
)
|
|
33
|
+
self.session.headers.update(LitAgent().generate_fingerprint())
|
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
"""Bing images search."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import Dict, List
|
|
6
|
+
from urllib.parse import urlencode
|
|
7
|
+
from time import sleep
|
|
8
|
+
|
|
9
|
+
from .base import BingBase
|
|
10
|
+
from webscout.scout import Scout
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class BingImagesSearch(BingBase):
|
|
14
|
+
def run(self, *args, **kwargs) -> List[Dict[str, str]]:
|
|
15
|
+
keywords = args[0] if args else kwargs.get("keywords")
|
|
16
|
+
region = args[1] if len(args) > 1 else kwargs.get("region", "us")
|
|
17
|
+
safesearch = args[2] if len(args) > 2 else kwargs.get("safesearch", "moderate")
|
|
18
|
+
max_results = args[3] if len(args) > 3 else kwargs.get("max_results", 10)
|
|
19
|
+
|
|
20
|
+
if max_results is None:
|
|
21
|
+
max_results = 10
|
|
22
|
+
|
|
23
|
+
if not keywords:
|
|
24
|
+
raise ValueError("Keywords are mandatory")
|
|
25
|
+
|
|
26
|
+
safe_map = {
|
|
27
|
+
"on": "Strict",
|
|
28
|
+
"moderate": "Moderate",
|
|
29
|
+
"off": "Off"
|
|
30
|
+
}
|
|
31
|
+
safe = safe_map.get(safesearch.lower(), "Moderate")
|
|
32
|
+
|
|
33
|
+
# Bing images URL
|
|
34
|
+
url = f"{self.base_url}/images/async"
|
|
35
|
+
params = {
|
|
36
|
+
'q': keywords,
|
|
37
|
+
'first': '1',
|
|
38
|
+
'count': '35', # Fetch more to get max_results
|
|
39
|
+
'cw': '1177',
|
|
40
|
+
'ch': '759',
|
|
41
|
+
'tsc': 'ImageHoverTitle',
|
|
42
|
+
'layout': 'RowBased_Landscape',
|
|
43
|
+
't': '0',
|
|
44
|
+
'IG': '',
|
|
45
|
+
'SFX': '0',
|
|
46
|
+
'iid': 'images.1'
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
results = []
|
|
50
|
+
first = 1
|
|
51
|
+
sfx = 0
|
|
52
|
+
|
|
53
|
+
while len(results) < max_results:
|
|
54
|
+
params['first'] = str(first)
|
|
55
|
+
params['SFX'] = str(sfx)
|
|
56
|
+
full_url = f"{url}?{urlencode(params)}"
|
|
57
|
+
|
|
58
|
+
try:
|
|
59
|
+
response = self.session.get(full_url, timeout=self.timeout)
|
|
60
|
+
response.raise_for_status()
|
|
61
|
+
html = response.text
|
|
62
|
+
except Exception as e:
|
|
63
|
+
raise Exception(f"Failed to fetch images: {str(e)}")
|
|
64
|
+
|
|
65
|
+
soup = Scout(html)
|
|
66
|
+
img_tags = soup.select('a.iusc img')
|
|
67
|
+
|
|
68
|
+
for img in img_tags:
|
|
69
|
+
if len(results) >= max_results:
|
|
70
|
+
break
|
|
71
|
+
|
|
72
|
+
title = img.get('alt', '')
|
|
73
|
+
src = img.get('src', '')
|
|
74
|
+
m_attr = img.parent.get('m', '') if img.parent else ''
|
|
75
|
+
|
|
76
|
+
# Parse m attribute for full image URL
|
|
77
|
+
image_url = src
|
|
78
|
+
thumbnail = src
|
|
79
|
+
if m_attr:
|
|
80
|
+
try:
|
|
81
|
+
import json
|
|
82
|
+
m_data = json.loads(m_attr)
|
|
83
|
+
image_url = m_data.get('murl', src)
|
|
84
|
+
thumbnail = m_data.get('turl', src)
|
|
85
|
+
except:
|
|
86
|
+
pass
|
|
87
|
+
|
|
88
|
+
source = ''
|
|
89
|
+
if img.parent and img.parent.parent:
|
|
90
|
+
source_tag = img.parent.parent.select_one('.iusc .lnk')
|
|
91
|
+
if source_tag:
|
|
92
|
+
source = source_tag.get_text(strip=True)
|
|
93
|
+
|
|
94
|
+
results.append({
|
|
95
|
+
'title': title,
|
|
96
|
+
'image': image_url,
|
|
97
|
+
'thumbnail': thumbnail,
|
|
98
|
+
'url': image_url, # For compatibility
|
|
99
|
+
'source': source
|
|
100
|
+
})
|
|
101
|
+
|
|
102
|
+
first += 35
|
|
103
|
+
sfx += 1
|
|
104
|
+
|
|
105
|
+
if self.sleep_interval:
|
|
106
|
+
sleep(self.sleep_interval)
|
|
107
|
+
|
|
108
|
+
return results[:max_results]
|
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
"""Bing news search."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import Dict, List
|
|
6
|
+
from urllib.parse import urlencode
|
|
7
|
+
from time import sleep
|
|
8
|
+
|
|
9
|
+
from .base import BingBase
|
|
10
|
+
from webscout.scout import Scout
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class BingNewsSearch(BingBase):
|
|
14
|
+
def run(self, *args, **kwargs) -> List[Dict[str, str]]:
|
|
15
|
+
keywords = args[0] if args else kwargs.get("keywords")
|
|
16
|
+
region = args[1] if len(args) > 1 else kwargs.get("region", "us")
|
|
17
|
+
safesearch = args[2] if len(args) > 2 else kwargs.get("safesearch", "moderate")
|
|
18
|
+
max_results = args[3] if len(args) > 3 else kwargs.get("max_results", 10)
|
|
19
|
+
|
|
20
|
+
if max_results is None:
|
|
21
|
+
max_results = 10
|
|
22
|
+
|
|
23
|
+
if not keywords:
|
|
24
|
+
raise ValueError("Keywords are mandatory")
|
|
25
|
+
|
|
26
|
+
safe_map = {
|
|
27
|
+
"on": "Strict",
|
|
28
|
+
"moderate": "Moderate",
|
|
29
|
+
"off": "Off"
|
|
30
|
+
}
|
|
31
|
+
safe = safe_map.get(safesearch.lower(), "Moderate")
|
|
32
|
+
|
|
33
|
+
# Bing news URL
|
|
34
|
+
url = f"{self.base_url}/news/infinitescrollajax"
|
|
35
|
+
params = {
|
|
36
|
+
'q': keywords,
|
|
37
|
+
'InfiniteScroll': '1',
|
|
38
|
+
'first': '1',
|
|
39
|
+
'SFX': '0',
|
|
40
|
+
'cc': region.lower(),
|
|
41
|
+
'setlang': self.lang.split('-')[0]
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
results = []
|
|
45
|
+
first = 1
|
|
46
|
+
sfx = 0
|
|
47
|
+
|
|
48
|
+
while len(results) < max_results:
|
|
49
|
+
params['first'] = str(first)
|
|
50
|
+
params['SFX'] = str(sfx)
|
|
51
|
+
full_url = f"{url}?{urlencode(params)}"
|
|
52
|
+
|
|
53
|
+
try:
|
|
54
|
+
response = self.session.get(full_url, timeout=self.timeout)
|
|
55
|
+
response.raise_for_status()
|
|
56
|
+
html = response.text
|
|
57
|
+
except Exception as e:
|
|
58
|
+
raise Exception(f"Failed to fetch news: {str(e)}")
|
|
59
|
+
|
|
60
|
+
if not html:
|
|
61
|
+
break
|
|
62
|
+
|
|
63
|
+
soup = Scout(html)
|
|
64
|
+
news_items = soup.select('div.newsitem')
|
|
65
|
+
|
|
66
|
+
for item in news_items:
|
|
67
|
+
if len(results) >= max_results:
|
|
68
|
+
break
|
|
69
|
+
|
|
70
|
+
title = item.select_one('a.title')
|
|
71
|
+
snippet = item.select_one('div.snippet')
|
|
72
|
+
source = item.select_one('div.source')
|
|
73
|
+
date = item.select_one('span.date')
|
|
74
|
+
|
|
75
|
+
if title:
|
|
76
|
+
news_result = {
|
|
77
|
+
'title': title.get_text(strip=True),
|
|
78
|
+
'url': title.get('href', ''),
|
|
79
|
+
'body': snippet.get_text(strip=True) if snippet else '',
|
|
80
|
+
'source': source.get_text(strip=True) if source else '',
|
|
81
|
+
'date': date.get_text(strip=True) if date else ''
|
|
82
|
+
}
|
|
83
|
+
results.append(news_result)
|
|
84
|
+
|
|
85
|
+
first += 10
|
|
86
|
+
sfx += 1
|
|
87
|
+
|
|
88
|
+
if self.sleep_interval:
|
|
89
|
+
sleep(self.sleep_interval)
|
|
90
|
+
|
|
91
|
+
return results[:max_results]
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
"""Bing suggestions search."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import List
|
|
6
|
+
from urllib.parse import urlencode
|
|
7
|
+
|
|
8
|
+
from .base import BingBase
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class BingSuggestionsSearch(BingBase):
|
|
12
|
+
def run(self, *args, **kwargs) -> List[str]:
|
|
13
|
+
query = args[0] if args else kwargs.get("query")
|
|
14
|
+
region = args[1] if len(args) > 1 else kwargs.get("region", "en-US")
|
|
15
|
+
|
|
16
|
+
if not query:
|
|
17
|
+
raise ValueError("Query is mandatory")
|
|
18
|
+
|
|
19
|
+
params = {
|
|
20
|
+
"query": query,
|
|
21
|
+
"mkt": region
|
|
22
|
+
}
|
|
23
|
+
url = f"https://api.bing.com/osjson.aspx?{urlencode(params)}"
|
|
24
|
+
|
|
25
|
+
try:
|
|
26
|
+
response = self.session.get(url, timeout=self.timeout)
|
|
27
|
+
response.raise_for_status()
|
|
28
|
+
data = response.json()
|
|
29
|
+
# Bing suggestions API returns [query, [suggestions]]
|
|
30
|
+
if len(data) > 1 and isinstance(data[1], list):
|
|
31
|
+
return data[1]
|
|
32
|
+
return []
|
|
33
|
+
except Exception as e:
|
|
34
|
+
raise Exception(f"Failed to fetch suggestions: {str(e)}")
|