webscout 2025.10.15__py3-none-any.whl → 2025.10.16__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of webscout might be problematic. Click here for more details.

Files changed (60) hide show
  1. webscout/Extra/YTToolkit/README.md +1 -1
  2. webscout/Extra/tempmail/README.md +3 -3
  3. webscout/Provider/OPENAI/README.md +1 -1
  4. webscout/Provider/TTI/bing.py +4 -4
  5. webscout/__init__.py +1 -1
  6. webscout/client.py +4 -5
  7. webscout/litprinter/__init__.py +0 -42
  8. webscout/scout/README.md +59 -8
  9. webscout/scout/core/scout.py +62 -0
  10. webscout/scout/element.py +251 -45
  11. webscout/search/__init__.py +3 -4
  12. webscout/search/engines/bing/images.py +5 -2
  13. webscout/search/engines/bing/news.py +6 -4
  14. webscout/search/engines/bing/text.py +5 -2
  15. webscout/search/engines/yahoo/__init__.py +41 -0
  16. webscout/search/engines/yahoo/answers.py +16 -0
  17. webscout/search/engines/yahoo/base.py +34 -0
  18. webscout/search/engines/yahoo/images.py +324 -0
  19. webscout/search/engines/yahoo/maps.py +16 -0
  20. webscout/search/engines/yahoo/news.py +258 -0
  21. webscout/search/engines/yahoo/suggestions.py +140 -0
  22. webscout/search/engines/yahoo/text.py +273 -0
  23. webscout/search/engines/yahoo/translate.py +16 -0
  24. webscout/search/engines/yahoo/videos.py +302 -0
  25. webscout/search/engines/yahoo/weather.py +220 -0
  26. webscout/search/http_client.py +1 -1
  27. webscout/search/yahoo_main.py +54 -0
  28. webscout/{auth → server}/__init__.py +2 -23
  29. webscout/server/config.py +84 -0
  30. webscout/{auth → server}/request_processing.py +3 -28
  31. webscout/{auth → server}/routes.py +6 -148
  32. webscout/server/schemas.py +23 -0
  33. webscout/{auth → server}/server.py +11 -43
  34. webscout/server/simple_logger.py +84 -0
  35. webscout/version.py +1 -1
  36. webscout/version.py.bak +1 -1
  37. webscout/zeroart/README.md +17 -9
  38. webscout/zeroart/__init__.py +78 -6
  39. webscout/zeroart/effects.py +51 -1
  40. webscout/zeroart/fonts.py +559 -1
  41. {webscout-2025.10.15.dist-info → webscout-2025.10.16.dist-info}/METADATA +10 -52
  42. {webscout-2025.10.15.dist-info → webscout-2025.10.16.dist-info}/RECORD +49 -45
  43. {webscout-2025.10.15.dist-info → webscout-2025.10.16.dist-info}/entry_points.txt +1 -1
  44. webscout/auth/api_key_manager.py +0 -189
  45. webscout/auth/auth_system.py +0 -85
  46. webscout/auth/config.py +0 -175
  47. webscout/auth/database.py +0 -755
  48. webscout/auth/middleware.py +0 -248
  49. webscout/auth/models.py +0 -185
  50. webscout/auth/rate_limiter.py +0 -254
  51. webscout/auth/schemas.py +0 -103
  52. webscout/auth/simple_logger.py +0 -236
  53. webscout/search/engines/yahoo.py +0 -65
  54. webscout/search/engines/yahoo_news.py +0 -64
  55. /webscout/{auth → server}/exceptions.py +0 -0
  56. /webscout/{auth → server}/providers.py +0 -0
  57. /webscout/{auth → server}/request_models.py +0 -0
  58. {webscout-2025.10.15.dist-info → webscout-2025.10.16.dist-info}/WHEEL +0 -0
  59. {webscout-2025.10.15.dist-info → webscout-2025.10.16.dist-info}/licenses/LICENSE.md +0 -0
  60. {webscout-2025.10.15.dist-info → webscout-2025.10.16.dist-info}/top_level.txt +0 -0
@@ -1,5 +1,5 @@
1
1
  <div align="center">
2
- <a href="https://github.com/OEvortex/Webscout">
2
+ <a href="https://github.com/pyscout/Webscout">
3
3
  <img src="https://img.shields.io/badge/YTToolkit-YouTube%20Toolkit-red?style=for-the-badge&logo=youtube&logoColor=white" alt="YTToolkit Logo">
4
4
  </a>
5
5
  <h1>YTToolkit</h1>
@@ -1,6 +1,6 @@
1
1
  <div align="center">
2
- <a href="https://github.com/OEvortex/Webscout">
3
- <img src="https://img.shields.io/badge/WebScout-TempMail%20Module-blue?style=for-the-badge&logo=mail&logoColor=white" alt="WebScout TempMail">
2
+ <a href="https://github.com/pyscout/Webscout">
3
+ <img src="https://img.shields.io/badge/WebScout-TempMail%20Toolkit-blue?style=for-the-badge&logo=maildotru&logoColor=white" alt="WebScout TempMail Toolkit">
4
4
  </a>
5
5
 
6
6
  <h1>📧 TempMail</h1>
@@ -484,5 +484,5 @@ Please refer to the main Webscout project's contributing guidelines if you plan
484
484
  <a href="https://buymeacoffee.com/oevortex"><img alt="Buy Me A Coffee" src="https://img.shields.io/badge/Buy%20Me%20A%20Coffee-FFDD00?style=for-the-badge&logo=buymeacoffee&logoColor=black"></a>
485
485
  </div>
486
486
  <p>📧 TempMail - Part of the Webscout Toolkit</p>
487
- <a href="https://github.com/OEvortex/Webscout">Back to Main Webscout Project</a>
487
+ <a href="https://github.com/pyscout/Webscout">Back to Main Webscout Project</a>
488
488
  </div>
@@ -1,5 +1,5 @@
1
1
  <div align="center">
2
- <a href="https://github.com/OEvortex/Webscout">
2
+ <a href="https://github.com/pyscout/Webscout">
3
3
  <img src="https://img.shields.io/badge/WebScout-OpenAI%20Compatible%20Providers-4285F4?style=for-the-badge&logo=openai&logoColor=white" alt="WebScout OpenAI Compatible Providers">
4
4
  </a>
5
5
  <br/>
@@ -7,6 +7,7 @@ from webscout.Provider.TTI.utils import ImageData, ImageResponse
7
7
  from webscout.Provider.TTI.base import TTICompatibleProvider, BaseImages
8
8
  from io import BytesIO
9
9
  from webscout.litagent import LitAgent
10
+ from webscout.scout import Scout
10
11
 
11
12
  try:
12
13
  from PIL import Image
@@ -81,9 +82,8 @@ class Images(BaseImages):
81
82
  time.sleep(3)
82
83
  try:
83
84
  poll_resp = session.get(polling_url, headers=headers, timeout=timeout)
84
- from bs4 import BeautifulSoup
85
- soup = BeautifulSoup(poll_resp.text, "html.parser")
86
- imgs = [img["src"].split("?")[0] for img in soup.select(".img_cont .mimg") if img.get("src")]
85
+ scout = Scout(poll_resp.text, features='html.parser')
86
+ imgs = [img["src"].split("?")[0] for img in scout.select(".img_cont .mimg") if img.attrs.get("src")]
87
87
  if imgs:
88
88
  img_url = imgs[0]
89
89
  break
@@ -232,7 +232,7 @@ class BingImageAI(TTICompatibleProvider):
232
232
 
233
233
  if __name__ == "__main__":
234
234
  from rich import print
235
- client = BingImageAI(cookie="1QyBY4Z1eHBW6fbI25kdM5TrlRGWzn5PFySapCOfvvz04zaounFG660EipVJSOXXvcdeXXLwsWHdDI8bNymucF_QnMHSlY1mc0pPI7e9Ar6o-_7e9Ik5QOe1nkJIe5vz22pibioTqx0IfVKwmVbX22A3bFD7ODaSZalKFr-AuxgAaRVod-giTTry6Ei7RVgisF7BHlkMPPwtCeO234ujgug")
235
+ client = BingImageAI(cookie="1Fw9daLSZzVBJXgevTDuc0jHZ60l4m5IiQEwjRCFOwEkpEBDmw3b8CEAALFSwZ1QBu-rATNkfD0i0gfJmVHeFlogqIriGwxNwT9T6fVREgAQD4_qn0VnQYP681NN4K80t6o-eJXnK1MBhdjxTIaok8173LGmLkEWLqHC0k3dYnF7m2kHRhf1dxjEH3WDI56hxiSPZtnggdzrfnuFAmOgCQQ")
236
236
  response = client.images.create(
237
237
  model="gpt4o",
238
238
  prompt="A cat riding a bicycle",
webscout/__init__.py CHANGED
@@ -1,6 +1,6 @@
1
1
  # webscout/__init__.py
2
2
 
3
- from .search import DuckDuckGoSearch, YepSearch, BingSearch
3
+ from .search import *
4
4
  from .version import __version__
5
5
  from .Provider import *
6
6
  from .Provider.TTI import *
webscout/client.py CHANGED
@@ -34,20 +34,19 @@ from webscout.Provider.OPENAI import *
34
34
  try:
35
35
  # Use lazy import to avoid module execution issues
36
36
  def run_api(*args, **kwargs):
37
- """Run the Webscout OpenAI-compatible API server (FastAPI backend)."""
38
- from webscout.auth.server import run_api as _run_api
37
+ from webscout.server.server import run_api as _run_api
39
38
  return _run_api(*args, **kwargs)
40
39
 
41
40
  def start_server(**kwargs):
42
41
  """Start the Webscout OpenAI-compatible API server (FastAPI backend)."""
43
- from webscout.auth.server import run_api as _run_api
42
+ from webscout.server.server import run_api as _run_api
44
43
  return _run_api(**kwargs)
45
44
  except ImportError:
46
45
  # Fallback for environments where the backend is not available
47
46
  def run_api(*args, **kwargs):
48
- raise ImportError("webscout.auth.server.run_api is not available in this environment.")
47
+ raise ImportError("webscout.server.server.run_api is not available in this environment.")
49
48
  def start_server(*args, **kwargs):
50
- raise ImportError("webscout.auth.server.start_server is not available in this environment.")
49
+ raise ImportError("webscout.server.server.start_server is not available in this environment.")
51
50
 
52
51
  # ---
53
52
  # API Documentation
@@ -1,45 +1,3 @@
1
- """
2
- >>> from litprinter import litprint
3
- >>> from litprinter import lit
4
- >>> from litprinter import install, uninstall
5
- >>>
6
- >>> litprint("Hello, world!")
7
- LIT -> [__main__.py:1] in () >>> Hello, world!
8
- >>>
9
- >>> def my_function():
10
- ... lit(1, 2, 3)
11
- >>> my_function()
12
- LIT -> [__main__.py:4] in my_function() >>> 1, 2, 3
13
- >>> install()
14
- >>> ic("This is now the builtins.ic()")
15
- LIT -> [__main__.py:7] in () >>> This is now the builtins.ic()
16
- >>> uninstall()
17
-
18
- This module provides enhanced print and logging functionalities for Python,
19
- allowing developers to debug their code with style and precision. It
20
- includes the litprint and lit functions for debugging, log for logging, and
21
- install/uninstall functions for integration into the builtins module.
22
- It also handles colorizing output and provides different styles and customizable
23
- options.
24
-
25
- LITPRINTER is inspired by the icecream package and provides similar functionality
26
- with additional features:
27
- - Variable inspection with expression display
28
- - Return value handling for inline usage
29
- - Support for custom formatters for specific data types
30
- - Execution context tracking
31
- - Rich-like colorized output with multiple themes (JARVIS, RICH, MODERN, NEON, CYBERPUNK)
32
- - Better JSON formatting with indent=2 by default
33
- - Advanced pretty printing for complex data structures with smart truncation
34
- - Clickable file paths in supported terminals and editors (VSCode compatible)
35
- - Enhanced visual formatting with better spacing and separators
36
- - Special formatters for common types (Exception, bytes, set, frozenset, etc.)
37
- - Smart object introspection for custom classes
38
- - Logging capabilities with timestamp and log levels
39
- """
40
-
41
- # Try to import from the standalone litprinter package first
42
- # If it's not installed
43
1
  try:
44
2
  import litprinter
45
3
  # If standalone package is found, re-export all its components
webscout/scout/README.md CHANGED
@@ -43,7 +43,7 @@ pip install webscout
43
43
  Or install the latest version from GitHub:
44
44
 
45
45
  ```bash
46
- pip install git+https://github.com/OEvortex/Webscout.git
46
+ pip install git+https://github.com/pyscout/Webscout.git
47
47
  ```
48
48
 
49
49
  ## 🚀 Quick Start
@@ -147,10 +147,57 @@ Scout provides powerful tools for navigating and manipulating HTML/XML documents
147
147
  - **Document Manipulation**: Modify, replace, or remove elements
148
148
  - **Dynamic Building**: Easily append or insert new nodes
149
149
 
150
+ #### CSS Selector Support
151
+
152
+ Scout includes a comprehensive CSS selector engine that supports all common selector types:
153
+
150
154
  ```python
151
- # CSS selector support
152
- elements = scout.select('div.content > p')
155
+ # Tag selectors
156
+ paragraphs = scout.select('p')
157
+ divs = scout.select('div')
158
+
159
+ # Class selectors
160
+ items = scout.select('.item') # Single class
161
+ cards = scout.select('div.card') # Tag + class
162
+ special = scout.select('.card.special') # Multiple classes
163
+
164
+ # ID selectors
165
+ header = scout.select_one('#header') # Single element by ID
166
+ menu = scout.select('nav#main-menu') # Tag + ID
167
+
168
+ # Attribute selectors
169
+ links = scout.select('a[href]') # Has attribute
170
+ external = scout.select('a[rel="nofollow"]') # Attribute value
171
+ images = scout.select('img[alt]') # Has alt attribute
172
+
173
+ # Descendant selectors (space)
174
+ nested = scout.select('div p') # Any p inside div
175
+ deep = scout.select('article section p') # Deeply nested
176
+
177
+ # Child selectors (>)
178
+ direct = scout.select('ul > li') # Direct children only
179
+ menu_items = scout.select('nav#menu > ul > li') # Multiple levels
180
+
181
+ # Combined selectors
182
+ complex = scout.select('div.container > p.text[lang="en"]')
183
+ links = scout.select('ol#results > li.item a[href]')
184
+
185
+ # Get first match only
186
+ first = scout.select_one('p.intro')
187
+ ```
153
188
 
189
+ **Supported Selector Types:**
190
+ - **Tag**: `p`, `div`, `a`
191
+ - **Class**: `.class`, `div.class`, `.class1.class2`
192
+ - **ID**: `#id`, `div#id`
193
+ - **Attribute**: `[attr]`, `[attr="value"]`
194
+ - **Descendant**: `div p`, `article section p`
195
+ - **Child**: `div > p`, `ul > li`
196
+ - **Combined**: `p.class#id[attr="value"]`
197
+
198
+ #### Element Navigation
199
+
200
+ ```python
154
201
  # Advanced find with attribute matching
155
202
  results = scout.find_all('a', attrs={'class': 'external', 'rel': 'nofollow'})
156
203
 
@@ -340,6 +387,10 @@ cached_data = scout.cache('parsed_data')
340
387
  - `__init__(markup, features='html.parser', from_encoding=None)`: Initialize with HTML content
341
388
  - `find(name, attrs={}, recursive=True, text=None)`: Find first matching element
342
389
  - `find_all(name, attrs={}, recursive=True, text=None, limit=None)`: Find all matching elements
390
+ - `find_next(name, attrs={}, text=None)`: Find next element in document order
391
+ - `find_all_next(name, attrs={}, text=None, limit=None)`: Find all next elements in document order
392
+ - `find_previous(name, attrs={}, text=None)`: Find previous element in document order
393
+ - `find_all_previous(name, attrs={}, text=None, limit=None)`: Find all previous elements in document order
343
394
  - `select(selector)`: Find elements using CSS selector
344
395
  - `get_text(separator=' ', strip=False)`: Extract text from document
345
396
  - `analyze_text()`: Perform text analysis
@@ -358,7 +409,7 @@ cached_data = scout.cache('parsed_data')
358
409
  - `_crawl_page(url, depth=0)`: Crawl a single page (internal method)
359
410
  - `_is_valid_url(url)`: Check if a URL is valid (internal method)
360
411
 
361
- For detailed API documentation, please refer to the [documentation](https://github.com/OEvortex/Webscout/wiki).
412
+ For detailed API documentation, please refer to the [documentation](https://github.com/pyscout/Webscout/wiki).
362
413
 
363
414
  ## 🔧 Dependencies
364
415
 
@@ -393,9 +444,9 @@ This project is licensed under the MIT License - see the LICENSE file for detail
393
444
  <div align="center">
394
445
  <p>Made with ❤️ by the Webscout team</p>
395
446
  <p>
396
- <a href="https://github.com/OEvortex/Webscout">GitHub</a> •
397
- <a href="https://github.com/OEvortex/Webscout/wiki">Documentation</a> •
398
- <a href="https://github.com/OEvortex/Webscout/issues">Report Bug</a> •
399
- <a href="https://github.com/OEvortex/Webscout/issues">Request Feature</a>
447
+ <a href="https://github.com/pyscout/Webscout">GitHub</a> •
448
+ <a href="https://github.com/pyscout/Webscout/wiki">Documentation</a> •
449
+ <a href="https://github.com/pyscout/Webscout/issues">Report Bug</a> •
450
+ <a href="https://github.com/pyscout/Webscout/issues">Request Feature</a>
400
451
  </p>
401
452
  </div>
@@ -454,6 +454,68 @@ class Scout:
454
454
  pass
455
455
  return siblings
456
456
 
457
+ def find_next(self, name=None, attrs={}, text=None, **kwargs) -> Optional[Tag]:
458
+ """
459
+ Find the next element in document order.
460
+
461
+ Args:
462
+ name: Tag name to search for
463
+ attrs: Attributes to match
464
+ text: Text content to match
465
+ **kwargs: Additional attributes
466
+
467
+ Returns:
468
+ Optional[Tag]: Next matching element or None
469
+ """
470
+ return self._soup.find_next(name, attrs, text, **kwargs)
471
+
472
+ def find_all_next(self, name=None, attrs={}, text=None, limit=None, **kwargs) -> List[Tag]:
473
+ """
474
+ Find all next elements in document order.
475
+
476
+ Args:
477
+ name: Tag name to search for
478
+ attrs: Attributes to match
479
+ text: Text content to match
480
+ limit: Maximum number of results
481
+ **kwargs: Additional attributes
482
+
483
+ Returns:
484
+ List[Tag]: List of matching elements
485
+ """
486
+ return self._soup.find_all_next(name, attrs, text, limit, **kwargs)
487
+
488
+ def find_previous(self, name=None, attrs={}, text=None, **kwargs) -> Optional[Tag]:
489
+ """
490
+ Find the previous element in document order.
491
+
492
+ Args:
493
+ name: Tag name to search for
494
+ attrs: Attributes to match
495
+ text: Text content to match
496
+ **kwargs: Additional attributes
497
+
498
+ Returns:
499
+ Optional[Tag]: Previous matching element or None
500
+ """
501
+ return self._soup.find_previous(name, attrs, text, **kwargs)
502
+
503
+ def find_all_previous(self, name=None, attrs={}, text=None, limit=None, **kwargs) -> List[Tag]:
504
+ """
505
+ Find all previous elements in document order.
506
+
507
+ Args:
508
+ name: Tag name to search for
509
+ attrs: Attributes to match
510
+ text: Text content to match
511
+ limit: Maximum number of results
512
+ **kwargs: Additional attributes
513
+
514
+ Returns:
515
+ List[Tag]: List of matching elements
516
+ """
517
+ return self._soup.find_all_previous(name, attrs, text, limit, **kwargs)
518
+
457
519
  def select(self, selector: str) -> List[Tag]:
458
520
  """
459
521
  Select elements using CSS selector.
webscout/scout/element.py CHANGED
@@ -267,7 +267,14 @@ class Tag:
267
267
  def select(self, selector: str) -> List['Tag']:
268
268
  """
269
269
  Select elements using CSS selector.
270
- Enhanced to support more complex selectors.
270
+ Enhanced to support more complex selectors including:
271
+ - Tag selectors: 'p', 'div'
272
+ - Class selectors: '.class', 'p.class'
273
+ - ID selectors: '#id', 'div#id'
274
+ - Attribute selectors: '[attr]', '[attr=value]'
275
+ - Descendant selectors: 'div p'
276
+ - Child selectors: 'div > p'
277
+ - Multiple classes: '.class1.class2'
271
278
 
272
279
  Args:
273
280
  selector (str): CSS selector string
@@ -275,54 +282,248 @@ class Tag:
275
282
  Returns:
276
283
  List[Tag]: List of matching elements
277
284
  """
278
- # More advanced CSS selector parsing
279
- # This is a simplified implementation and might need more robust parsing
280
- parts = re.split(r'\s+', selector.strip())
281
285
  results = []
282
-
283
- def _match_selector(tag, selector_part):
284
- # Support more complex selectors
285
- if selector_part.startswith('.'):
286
- # Class selector
287
- return selector_part[1:] in tag.get('class', [])
288
- elif selector_part.startswith('#'):
289
- # ID selector
290
- return tag.get('id') == selector_part[1:]
291
- elif '[' in selector_part and ']' in selector_part:
292
- # Attribute selector
293
- attr_match = re.match(r'(\w+)\[([^=]+)(?:=(.+))?\]', selector_part)
294
- if attr_match:
295
- tag_name, attr, value = attr_match.groups()
296
- if tag_name and tag.name != tag_name:
286
+
287
+ def _parse_simple_selector(simple_sel: str) -> dict:
288
+ """Parse a simple selector like 'p.class#id[attr=value]' into components."""
289
+ components = {
290
+ 'tag': None,
291
+ 'id': None,
292
+ 'classes': [],
293
+ 'attrs': {}
294
+ }
295
+
296
+ # Extract tag name (at the start)
297
+ tag_match = re.match(r'^([a-zA-Z][\w-]*)', simple_sel)
298
+ if tag_match:
299
+ components['tag'] = tag_match.group(1)
300
+ simple_sel = simple_sel[len(tag_match.group(1)):]
301
+
302
+ # Extract ID
303
+ id_matches = re.findall(r'#([\w-]+)', simple_sel)
304
+ if id_matches:
305
+ components['id'] = id_matches[0]
306
+
307
+ # Extract classes
308
+ class_matches = re.findall(r'\.([\w-]+)', simple_sel)
309
+ components['classes'] = class_matches
310
+
311
+ # Extract attributes
312
+ attr_matches = re.findall(r'\[([^\]]+)\]', simple_sel)
313
+ for attr_expr in attr_matches:
314
+ if '=' in attr_expr:
315
+ attr_name, attr_value = attr_expr.split('=', 1)
316
+ components['attrs'][attr_name.strip()] = attr_value.strip('\'"')
317
+ else:
318
+ components['attrs'][attr_expr.strip()] = None
319
+
320
+ return components
321
+
322
+ def _match_simple_selector(tag: 'Tag', components: dict) -> bool:
323
+ """Check if a tag matches the parsed selector components."""
324
+ # Check tag name
325
+ if components['tag'] and tag.name != components['tag']:
326
+ return False
327
+
328
+ # Check ID
329
+ if components['id'] and tag.get('id') != components['id']:
330
+ return False
331
+
332
+ # Check classes
333
+ tag_classes = tag.get('class', '')
334
+ if isinstance(tag_classes, str):
335
+ tag_classes = tag_classes.split()
336
+ elif not isinstance(tag_classes, list):
337
+ tag_classes = [str(tag_classes)] if tag_classes else []
338
+
339
+ for cls in components['classes']:
340
+ if cls not in tag_classes:
341
+ return False
342
+
343
+ # Check attributes
344
+ for attr_name, attr_value in components['attrs'].items():
345
+ if attr_value is None:
346
+ # Just check attribute exists
347
+ if attr_name not in tag.attrs:
297
348
  return False
298
- if value:
299
- return tag.get(attr) == value.strip("'\"")
300
- return attr in tag.attrs
301
- else:
302
- # Tag selector
303
- return tag.name == selector_part
304
-
305
- def _recursive_select(element, selector_parts):
306
- if not selector_parts:
307
- results.append(element)
308
- return
309
-
310
- current_selector = selector_parts[0]
311
- remaining_selectors = selector_parts[1:]
312
-
313
- if _match_selector(element, current_selector):
314
- if not remaining_selectors:
315
- results.append(element)
316
349
  else:
317
- for child in element.contents:
318
- if isinstance(child, Tag):
319
- _recursive_select(child, remaining_selectors)
320
-
321
- for child in self.contents:
322
- if isinstance(child, Tag):
323
- _recursive_select(child, parts)
324
-
350
+ # Check attribute value
351
+ if tag.get(attr_name) != attr_value:
352
+ return False
353
+
354
+ return True
355
+
356
+ def _find_all_matching(element: 'Tag', components: dict) -> List['Tag']:
357
+ """Recursively find all elements matching the selector components."""
358
+ matches = []
359
+
360
+ # Check current element
361
+ if _match_simple_selector(element, components):
362
+ matches.append(element)
363
+
364
+ # Check children recursively
365
+ for child in element.contents:
366
+ if isinstance(child, Tag):
367
+ matches.extend(_find_all_matching(child, components))
368
+
369
+ return matches
370
+
371
+ # Handle combinators (descendant ' ' and child '>')
372
+ if ' > ' in selector:
373
+ # Child combinator
374
+ parts = [p.strip() for p in selector.split(' > ')]
375
+ return self._select_with_child_combinator(parts)
376
+ elif ' ' in selector.strip():
377
+ # Descendant combinator
378
+ parts = [p.strip() for p in selector.split()]
379
+ return self._select_with_descendant_combinator(parts)
380
+ else:
381
+ # Simple selector
382
+ components = _parse_simple_selector(selector)
383
+ return _find_all_matching(self, components)
384
+
385
+ def _select_with_descendant_combinator(self, parts: List[str]) -> List['Tag']:
386
+ """Handle descendant combinator (space)."""
387
+ if not parts:
388
+ return []
389
+
390
+ if len(parts) == 1:
391
+ components = self._parse_selector_components(parts[0])
392
+ return self._find_all_matching_in_tree(self, components)
393
+
394
+ # Find elements matching the first part
395
+ first_components = self._parse_selector_components(parts[0])
396
+ first_matches = self._find_all_matching_in_tree(self, first_components)
397
+
398
+ # For each match, find descendants matching remaining parts
399
+ results = []
400
+ remaining_selector = ' '.join(parts[1:])
401
+ for match in first_matches:
402
+ descendants = match.select(remaining_selector)
403
+ results.extend(descendants)
404
+
325
405
  return results
406
+
407
+ def _select_with_child_combinator(self, parts: List[str]) -> List['Tag']:
408
+ """Handle child combinator (>)."""
409
+ if not parts:
410
+ return []
411
+
412
+ if len(parts) == 1:
413
+ components = self._parse_selector_components(parts[0])
414
+ return self._find_all_matching_in_tree(self, components)
415
+
416
+ # Find elements matching the first part
417
+ first_components = self._parse_selector_components(parts[0])
418
+ first_matches = self._find_all_matching_in_tree(self, first_components)
419
+
420
+ # For each match, find direct children matching the next part
421
+ if len(parts) == 2:
422
+ # Last part, just check direct children
423
+ next_components = self._parse_selector_components(parts[1])
424
+ results = []
425
+ for match in first_matches:
426
+ for child in match.contents:
427
+ if isinstance(child, Tag) and self._match_selector_components(child, next_components):
428
+ results.append(child)
429
+ return results
430
+ else:
431
+ # More parts, need to continue recursively
432
+ results = []
433
+ next_components = self._parse_selector_components(parts[1])
434
+ remaining_parts = parts[2:]
435
+ for match in first_matches:
436
+ for child in match.contents:
437
+ if isinstance(child, Tag) and self._match_selector_components(child, next_components):
438
+ # Continue with remaining parts
439
+ remaining_selector = ' > '.join(remaining_parts)
440
+ descendants = child.select(remaining_selector)
441
+ results.extend(descendants)
442
+ return results
443
+
444
+ def _parse_selector_components(self, simple_sel: str) -> dict:
445
+ """Parse a simple selector like 'p.class#id[attr=value]' into components."""
446
+ components = {
447
+ 'tag': None,
448
+ 'id': None,
449
+ 'classes': [],
450
+ 'attrs': {}
451
+ }
452
+
453
+ # Extract tag name (at the start)
454
+ tag_match = re.match(r'^([a-zA-Z][\w-]*)', simple_sel)
455
+ if tag_match:
456
+ components['tag'] = tag_match.group(1)
457
+ simple_sel = simple_sel[len(tag_match.group(1)):]
458
+
459
+ # Extract ID
460
+ id_matches = re.findall(r'#([\w-]+)', simple_sel)
461
+ if id_matches:
462
+ components['id'] = id_matches[0]
463
+
464
+ # Extract classes
465
+ class_matches = re.findall(r'\.([\w-]+)', simple_sel)
466
+ components['classes'] = class_matches
467
+
468
+ # Extract attributes
469
+ attr_matches = re.findall(r'\[([^\]]+)\]', simple_sel)
470
+ for attr_expr in attr_matches:
471
+ if '=' in attr_expr:
472
+ attr_name, attr_value = attr_expr.split('=', 1)
473
+ components['attrs'][attr_name.strip()] = attr_value.strip('\'"')
474
+ else:
475
+ components['attrs'][attr_expr.strip()] = None
476
+
477
+ return components
478
+
479
+ def _match_selector_components(self, tag: 'Tag', components: dict) -> bool:
480
+ """Check if a tag matches the parsed selector components."""
481
+ # Check tag name
482
+ if components['tag'] and tag.name != components['tag']:
483
+ return False
484
+
485
+ # Check ID
486
+ if components['id'] and tag.get('id') != components['id']:
487
+ return False
488
+
489
+ # Check classes
490
+ tag_classes = tag.get('class', '')
491
+ if isinstance(tag_classes, str):
492
+ tag_classes = tag_classes.split()
493
+ elif not isinstance(tag_classes, list):
494
+ tag_classes = [str(tag_classes)] if tag_classes else []
495
+
496
+ for cls in components['classes']:
497
+ if cls not in tag_classes:
498
+ return False
499
+
500
+ # Check attributes
501
+ for attr_name, attr_value in components['attrs'].items():
502
+ if attr_value is None:
503
+ # Just check attribute exists
504
+ if attr_name not in tag.attrs:
505
+ return False
506
+ else:
507
+ # Check attribute value
508
+ if tag.get(attr_name) != attr_value:
509
+ return False
510
+
511
+ return True
512
+
513
+ def _find_all_matching_in_tree(self, element: 'Tag', components: dict) -> List['Tag']:
514
+ """Recursively find all elements matching the selector components."""
515
+ matches = []
516
+
517
+ # Check current element
518
+ if self._match_selector_components(element, components):
519
+ matches.append(element)
520
+
521
+ # Check children recursively
522
+ for child in element.contents:
523
+ if isinstance(child, Tag):
524
+ matches.extend(self._find_all_matching_in_tree(child, components))
525
+
526
+ return matches
326
527
 
327
528
  def select_one(self, selector: str) -> Optional['Tag']:
328
529
  """
@@ -462,6 +663,11 @@ class Tag:
462
663
  new_child.parent = self
463
664
  self.contents.append(new_child)
464
665
 
666
+ def extend(self, new_children: List[Union['Tag', NavigableString, str]]) -> None:
667
+ """Extend the contents of this tag with a list of new children."""
668
+ for child in new_children:
669
+ self.append(child)
670
+
465
671
  def insert(self, index: int, new_child: Union['Tag', NavigableString, str]) -> None:
466
672
  """Insert a new child at the given index with error handling."""
467
673
  if isinstance(new_child, str):
@@ -4,14 +4,14 @@ from .base import BaseSearch, BaseSearchEngine
4
4
  from .duckduckgo_main import DuckDuckGoSearch
5
5
  from .yep_main import YepSearch
6
6
  from .bing_main import BingSearch
7
+ from .yahoo_main import YahooSearch
7
8
 
8
9
  # Import new search engines
9
10
  from .engines.brave import Brave
10
11
  from .engines.mojeek import Mojeek
11
- from .engines.yahoo import Yahoo
12
+
12
13
  from .engines.yandex import Yandex
13
14
  from .engines.wikipedia import Wikipedia
14
- from .engines.yahoo_news import YahooNews
15
15
 
16
16
  # Import result models
17
17
  from .results import (
@@ -31,14 +31,13 @@ __all__ = [
31
31
  "DuckDuckGoSearch",
32
32
  "YepSearch",
33
33
  "BingSearch",
34
+ "YahooSearch",
34
35
 
35
36
  # Individual engines
36
37
  "Brave",
37
38
  "Mojeek",
38
- "Yahoo",
39
39
  "Yandex",
40
40
  "Wikipedia",
41
- "YahooNews",
42
41
 
43
42
  # Result models
44
43
  "TextResult",