webscout 8.2.8__py3-none-any.whl → 8.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of webscout might be problematic. Click here for more details.

Files changed (197) hide show
  1. webscout/AIauto.py +34 -16
  2. webscout/AIbase.py +96 -37
  3. webscout/AIutel.py +491 -87
  4. webscout/Bard.py +441 -323
  5. webscout/Extra/GitToolkit/__init__.py +10 -10
  6. webscout/Extra/YTToolkit/ytapi/video.py +232 -232
  7. webscout/Litlogger/README.md +10 -0
  8. webscout/Litlogger/__init__.py +7 -59
  9. webscout/Litlogger/formats.py +4 -0
  10. webscout/Litlogger/handlers.py +103 -0
  11. webscout/Litlogger/levels.py +13 -0
  12. webscout/Litlogger/logger.py +92 -0
  13. webscout/Provider/AISEARCH/Perplexity.py +332 -358
  14. webscout/Provider/AISEARCH/felo_search.py +9 -35
  15. webscout/Provider/AISEARCH/genspark_search.py +30 -56
  16. webscout/Provider/AISEARCH/hika_search.py +4 -16
  17. webscout/Provider/AISEARCH/iask_search.py +410 -436
  18. webscout/Provider/AISEARCH/monica_search.py +4 -30
  19. webscout/Provider/AISEARCH/scira_search.py +6 -32
  20. webscout/Provider/AISEARCH/webpilotai_search.py +38 -64
  21. webscout/Provider/Blackboxai.py +155 -35
  22. webscout/Provider/ChatSandbox.py +2 -1
  23. webscout/Provider/Deepinfra.py +339 -339
  24. webscout/Provider/ExaChat.py +358 -358
  25. webscout/Provider/Gemini.py +169 -169
  26. webscout/Provider/GithubChat.py +1 -2
  27. webscout/Provider/Glider.py +3 -3
  28. webscout/Provider/HeckAI.py +172 -82
  29. webscout/Provider/LambdaChat.py +1 -0
  30. webscout/Provider/MCPCore.py +7 -3
  31. webscout/Provider/OPENAI/BLACKBOXAI.py +421 -139
  32. webscout/Provider/OPENAI/Cloudflare.py +38 -21
  33. webscout/Provider/OPENAI/FalconH1.py +457 -0
  34. webscout/Provider/OPENAI/FreeGemini.py +35 -18
  35. webscout/Provider/OPENAI/NEMOTRON.py +34 -34
  36. webscout/Provider/OPENAI/PI.py +427 -0
  37. webscout/Provider/OPENAI/Qwen3.py +304 -0
  38. webscout/Provider/OPENAI/README.md +952 -1253
  39. webscout/Provider/OPENAI/TwoAI.py +374 -0
  40. webscout/Provider/OPENAI/__init__.py +7 -1
  41. webscout/Provider/OPENAI/ai4chat.py +73 -63
  42. webscout/Provider/OPENAI/api.py +869 -644
  43. webscout/Provider/OPENAI/base.py +2 -0
  44. webscout/Provider/OPENAI/c4ai.py +34 -13
  45. webscout/Provider/OPENAI/chatgpt.py +575 -556
  46. webscout/Provider/OPENAI/chatgptclone.py +512 -487
  47. webscout/Provider/OPENAI/chatsandbox.py +11 -6
  48. webscout/Provider/OPENAI/copilot.py +258 -0
  49. webscout/Provider/OPENAI/deepinfra.py +327 -318
  50. webscout/Provider/OPENAI/e2b.py +140 -104
  51. webscout/Provider/OPENAI/exaai.py +420 -411
  52. webscout/Provider/OPENAI/exachat.py +448 -443
  53. webscout/Provider/OPENAI/flowith.py +7 -3
  54. webscout/Provider/OPENAI/freeaichat.py +12 -8
  55. webscout/Provider/OPENAI/glider.py +15 -8
  56. webscout/Provider/OPENAI/groq.py +5 -2
  57. webscout/Provider/OPENAI/heckai.py +311 -307
  58. webscout/Provider/OPENAI/llmchatco.py +9 -7
  59. webscout/Provider/OPENAI/mcpcore.py +18 -9
  60. webscout/Provider/OPENAI/multichat.py +7 -5
  61. webscout/Provider/OPENAI/netwrck.py +16 -11
  62. webscout/Provider/OPENAI/oivscode.py +290 -0
  63. webscout/Provider/OPENAI/opkfc.py +507 -496
  64. webscout/Provider/OPENAI/pydantic_imports.py +172 -0
  65. webscout/Provider/OPENAI/scirachat.py +29 -17
  66. webscout/Provider/OPENAI/sonus.py +308 -303
  67. webscout/Provider/OPENAI/standardinput.py +442 -433
  68. webscout/Provider/OPENAI/textpollinations.py +18 -11
  69. webscout/Provider/OPENAI/toolbaz.py +419 -413
  70. webscout/Provider/OPENAI/typefully.py +17 -10
  71. webscout/Provider/OPENAI/typegpt.py +21 -11
  72. webscout/Provider/OPENAI/uncovrAI.py +477 -462
  73. webscout/Provider/OPENAI/utils.py +90 -79
  74. webscout/Provider/OPENAI/venice.py +435 -425
  75. webscout/Provider/OPENAI/wisecat.py +387 -381
  76. webscout/Provider/OPENAI/writecream.py +166 -163
  77. webscout/Provider/OPENAI/x0gpt.py +26 -37
  78. webscout/Provider/OPENAI/yep.py +384 -356
  79. webscout/Provider/PI.py +2 -1
  80. webscout/Provider/TTI/README.md +55 -101
  81. webscout/Provider/TTI/__init__.py +4 -9
  82. webscout/Provider/TTI/aiarta.py +365 -0
  83. webscout/Provider/TTI/artbit.py +0 -0
  84. webscout/Provider/TTI/base.py +64 -0
  85. webscout/Provider/TTI/fastflux.py +200 -0
  86. webscout/Provider/TTI/magicstudio.py +201 -0
  87. webscout/Provider/TTI/piclumen.py +203 -0
  88. webscout/Provider/TTI/pixelmuse.py +225 -0
  89. webscout/Provider/TTI/pollinations.py +221 -0
  90. webscout/Provider/TTI/utils.py +11 -0
  91. webscout/Provider/TTS/__init__.py +2 -1
  92. webscout/Provider/TTS/base.py +159 -159
  93. webscout/Provider/TTS/openai_fm.py +129 -0
  94. webscout/Provider/TextPollinationsAI.py +308 -308
  95. webscout/Provider/TwoAI.py +239 -44
  96. webscout/Provider/UNFINISHED/Youchat.py +330 -330
  97. webscout/Provider/UNFINISHED/puterjs.py +635 -0
  98. webscout/Provider/UNFINISHED/test_lmarena.py +119 -119
  99. webscout/Provider/Writecream.py +246 -246
  100. webscout/Provider/__init__.py +2 -2
  101. webscout/Provider/ai4chat.py +33 -8
  102. webscout/Provider/granite.py +41 -6
  103. webscout/Provider/koala.py +169 -169
  104. webscout/Provider/oivscode.py +309 -0
  105. webscout/Provider/samurai.py +3 -2
  106. webscout/Provider/scnet.py +1 -0
  107. webscout/Provider/typegpt.py +3 -3
  108. webscout/Provider/uncovr.py +368 -368
  109. webscout/client.py +70 -0
  110. webscout/litprinter/__init__.py +58 -58
  111. webscout/optimizers.py +419 -419
  112. webscout/scout/README.md +3 -1
  113. webscout/scout/core/crawler.py +134 -64
  114. webscout/scout/core/scout.py +148 -109
  115. webscout/scout/element.py +106 -88
  116. webscout/swiftcli/Readme.md +323 -323
  117. webscout/swiftcli/plugins/manager.py +9 -2
  118. webscout/version.py +1 -1
  119. webscout/zeroart/__init__.py +134 -134
  120. webscout/zeroart/effects.py +100 -100
  121. webscout/zeroart/fonts.py +1238 -1238
  122. {webscout-8.2.8.dist-info → webscout-8.3.dist-info}/METADATA +160 -35
  123. webscout-8.3.dist-info/RECORD +290 -0
  124. {webscout-8.2.8.dist-info → webscout-8.3.dist-info}/WHEEL +1 -1
  125. {webscout-8.2.8.dist-info → webscout-8.3.dist-info}/entry_points.txt +1 -0
  126. webscout/Litlogger/Readme.md +0 -175
  127. webscout/Litlogger/core/__init__.py +0 -6
  128. webscout/Litlogger/core/level.py +0 -23
  129. webscout/Litlogger/core/logger.py +0 -165
  130. webscout/Litlogger/handlers/__init__.py +0 -12
  131. webscout/Litlogger/handlers/console.py +0 -33
  132. webscout/Litlogger/handlers/file.py +0 -143
  133. webscout/Litlogger/handlers/network.py +0 -173
  134. webscout/Litlogger/styles/__init__.py +0 -7
  135. webscout/Litlogger/styles/colors.py +0 -249
  136. webscout/Litlogger/styles/formats.py +0 -458
  137. webscout/Litlogger/styles/text.py +0 -87
  138. webscout/Litlogger/utils/__init__.py +0 -6
  139. webscout/Litlogger/utils/detectors.py +0 -153
  140. webscout/Litlogger/utils/formatters.py +0 -200
  141. webscout/Provider/ChatGPTGratis.py +0 -194
  142. webscout/Provider/TTI/AiForce/README.md +0 -159
  143. webscout/Provider/TTI/AiForce/__init__.py +0 -22
  144. webscout/Provider/TTI/AiForce/async_aiforce.py +0 -224
  145. webscout/Provider/TTI/AiForce/sync_aiforce.py +0 -245
  146. webscout/Provider/TTI/FreeAIPlayground/README.md +0 -99
  147. webscout/Provider/TTI/FreeAIPlayground/__init__.py +0 -9
  148. webscout/Provider/TTI/FreeAIPlayground/async_freeaiplayground.py +0 -181
  149. webscout/Provider/TTI/FreeAIPlayground/sync_freeaiplayground.py +0 -180
  150. webscout/Provider/TTI/ImgSys/README.md +0 -174
  151. webscout/Provider/TTI/ImgSys/__init__.py +0 -23
  152. webscout/Provider/TTI/ImgSys/async_imgsys.py +0 -202
  153. webscout/Provider/TTI/ImgSys/sync_imgsys.py +0 -195
  154. webscout/Provider/TTI/MagicStudio/README.md +0 -101
  155. webscout/Provider/TTI/MagicStudio/__init__.py +0 -2
  156. webscout/Provider/TTI/MagicStudio/async_magicstudio.py +0 -111
  157. webscout/Provider/TTI/MagicStudio/sync_magicstudio.py +0 -109
  158. webscout/Provider/TTI/Nexra/README.md +0 -155
  159. webscout/Provider/TTI/Nexra/__init__.py +0 -22
  160. webscout/Provider/TTI/Nexra/async_nexra.py +0 -286
  161. webscout/Provider/TTI/Nexra/sync_nexra.py +0 -258
  162. webscout/Provider/TTI/PollinationsAI/README.md +0 -146
  163. webscout/Provider/TTI/PollinationsAI/__init__.py +0 -23
  164. webscout/Provider/TTI/PollinationsAI/async_pollinations.py +0 -311
  165. webscout/Provider/TTI/PollinationsAI/sync_pollinations.py +0 -265
  166. webscout/Provider/TTI/aiarta/README.md +0 -134
  167. webscout/Provider/TTI/aiarta/__init__.py +0 -2
  168. webscout/Provider/TTI/aiarta/async_aiarta.py +0 -482
  169. webscout/Provider/TTI/aiarta/sync_aiarta.py +0 -440
  170. webscout/Provider/TTI/artbit/README.md +0 -100
  171. webscout/Provider/TTI/artbit/__init__.py +0 -22
  172. webscout/Provider/TTI/artbit/async_artbit.py +0 -155
  173. webscout/Provider/TTI/artbit/sync_artbit.py +0 -148
  174. webscout/Provider/TTI/fastflux/README.md +0 -129
  175. webscout/Provider/TTI/fastflux/__init__.py +0 -22
  176. webscout/Provider/TTI/fastflux/async_fastflux.py +0 -261
  177. webscout/Provider/TTI/fastflux/sync_fastflux.py +0 -252
  178. webscout/Provider/TTI/huggingface/README.md +0 -114
  179. webscout/Provider/TTI/huggingface/__init__.py +0 -22
  180. webscout/Provider/TTI/huggingface/async_huggingface.py +0 -199
  181. webscout/Provider/TTI/huggingface/sync_huggingface.py +0 -195
  182. webscout/Provider/TTI/piclumen/README.md +0 -161
  183. webscout/Provider/TTI/piclumen/__init__.py +0 -23
  184. webscout/Provider/TTI/piclumen/async_piclumen.py +0 -268
  185. webscout/Provider/TTI/piclumen/sync_piclumen.py +0 -233
  186. webscout/Provider/TTI/pixelmuse/README.md +0 -79
  187. webscout/Provider/TTI/pixelmuse/__init__.py +0 -4
  188. webscout/Provider/TTI/pixelmuse/async_pixelmuse.py +0 -249
  189. webscout/Provider/TTI/pixelmuse/sync_pixelmuse.py +0 -182
  190. webscout/Provider/TTI/talkai/README.md +0 -139
  191. webscout/Provider/TTI/talkai/__init__.py +0 -4
  192. webscout/Provider/TTI/talkai/async_talkai.py +0 -229
  193. webscout/Provider/TTI/talkai/sync_talkai.py +0 -207
  194. webscout/Provider/UNFINISHED/oivscode.py +0 -351
  195. webscout-8.2.8.dist-info/RECORD +0 -334
  196. {webscout-8.2.8.dist-info → webscout-8.3.dist-info}/licenses/LICENSE.md +0 -0
  197. {webscout-8.2.8.dist-info → webscout-8.3.dist-info}/top_level.txt +0 -0
@@ -1,20 +1,20 @@
1
1
  """
2
2
  Scout Main Module - HTML Parsing and Traversal
3
3
  """
4
- import re
5
- import json
6
4
  import hashlib
5
+ import json
6
+ import re
7
7
  import unicodedata
8
8
  import urllib.parse
9
- from typing import List, Dict, Optional, Any
9
+ from typing import Any, Dict, List, Optional
10
10
 
11
+ from ..element import NavigableString, Tag
11
12
  from ..parsers import ParserRegistry
12
- from ..element import Tag, NavigableString
13
13
  from ..utils import decode_markup
14
- from .text_analyzer import ScoutTextAnalyzer
15
- from .web_analyzer import ScoutWebAnalyzer
16
14
  from .search_result import ScoutSearchResult
15
+ from .text_analyzer import ScoutTextAnalyzer
17
16
  from .text_utils import SentenceTokenizer
17
+ from .web_analyzer import ScoutWebAnalyzer
18
18
 
19
19
 
20
20
  class Scout:
@@ -23,11 +23,11 @@ class Scout:
23
23
  A comprehensive HTML parsing and traversal library.
24
24
  Enhanced with advanced features and intelligent parsing.
25
25
  """
26
-
26
+
27
27
  def __init__(self, markup="", features='html.parser', from_encoding=None, **kwargs):
28
28
  """
29
29
  Initialize Scout with HTML content.
30
-
30
+
31
31
  Args:
32
32
  markup (str): HTML content to parse
33
33
  features (str): Parser to use ('html.parser', 'lxml', 'html5lib', 'lxml-xml')
@@ -38,50 +38,50 @@ class Scout:
38
38
  self.markup = self._preprocess_markup(markup, from_encoding)
39
39
  self.features = features
40
40
  self.from_encoding = from_encoding
41
-
41
+
42
42
  # Get the right parser for the job
43
43
  if features not in ParserRegistry.list_parsers():
44
44
  raise ValueError(
45
45
  f"Invalid parser '{features}'! Choose from: {', '.join(ParserRegistry.list_parsers().keys())}"
46
46
  )
47
-
47
+
48
48
  parser_class = ParserRegistry.get_parser(features)
49
49
  self.parser = parser_class
50
-
50
+
51
51
  # Parse that HTML! 🎯
52
52
  self._soup = self.parser.parse(self.markup)
53
-
53
+
54
54
  # BeautifulSoup-like attributes
55
55
  self.name = self._soup.name if hasattr(self._soup, 'name') else None
56
56
  self.attrs = self._soup.attrs if hasattr(self._soup, 'attrs') else {}
57
-
57
+
58
58
  # Advanced parsing options
59
59
  self._cache = {}
60
-
60
+
61
61
  # Text and web analyzers
62
62
  self.text_analyzer = ScoutTextAnalyzer()
63
63
  self.web_analyzer = ScoutWebAnalyzer()
64
-
64
+
65
65
  def normalize_text(self, text: str, form='NFKD') -> str:
66
66
  """
67
67
  Normalize text using Unicode normalization.
68
-
68
+
69
69
  Args:
70
70
  text (str): Input text
71
71
  form (str, optional): Normalization form
72
-
72
+
73
73
  Returns:
74
74
  str: Normalized text
75
75
  """
76
76
  return unicodedata.normalize(form, text)
77
-
77
+
78
78
  def url_parse(self, url: str) -> Dict[str, str]:
79
79
  """
80
80
  Parse and analyze a URL.
81
-
81
+
82
82
  Args:
83
83
  url (str): URL to parse
84
-
84
+
85
85
  Returns:
86
86
  Dict[str, str]: Parsed URL components
87
87
  """
@@ -94,39 +94,39 @@ class Scout:
94
94
  'query': parsed.query,
95
95
  'fragment': parsed.fragment
96
96
  }
97
-
97
+
98
98
  def analyze_page_structure(self) -> Dict[str, Any]:
99
99
  """
100
100
  Analyze the structure of the parsed page.
101
-
101
+
102
102
  Returns:
103
103
  Dict[str, Any]: Page structure analysis
104
104
  """
105
105
  return self.web_analyzer.analyze_page_structure(self)
106
-
106
+
107
107
  def analyze_text(self, text: Optional[str] = None) -> Dict[str, Any]:
108
108
  """
109
109
  Perform advanced text analysis.
110
-
110
+
111
111
  Args:
112
112
  text (str, optional): Text to analyze. If None, uses page text.
113
-
113
+
114
114
  Returns:
115
115
  Dict[str, Any]: Text analysis results
116
116
  """
117
117
  if text is None:
118
118
  text = self.get_text()
119
-
119
+
120
120
  return {
121
121
  'word_count': self.text_analyzer.count_words(text),
122
122
  'entities': self.text_analyzer.extract_entities(text),
123
123
  'tokens': self.text_analyzer.tokenize(text)
124
124
  }
125
-
125
+
126
126
  def extract_semantic_info(self) -> Dict[str, Any]:
127
127
  """
128
128
  Extract semantic information from the document.
129
-
129
+
130
130
  Returns:
131
131
  Dict[str, Any]: Semantic information
132
132
  """
@@ -146,29 +146,29 @@ class Scout:
146
146
  }
147
147
  }
148
148
  return semantic_info
149
-
149
+
150
150
  def cache(self, key: str, value: Any = None) -> Any:
151
151
  """
152
152
  Manage a cache for parsed content.
153
-
153
+
154
154
  Args:
155
155
  key (str): Cache key
156
156
  value (Any, optional): Value to cache
157
-
157
+
158
158
  Returns:
159
159
  Any: Cached value or None
160
160
  """
161
161
  if value is not None:
162
162
  self._cache[key] = value
163
163
  return self._cache.get(key)
164
-
164
+
165
165
  def hash_content(self, method='md5') -> str:
166
166
  """
167
167
  Generate a hash of the parsed content.
168
-
168
+
169
169
  Args:
170
170
  method (str, optional): Hashing method
171
-
171
+
172
172
  Returns:
173
173
  str: Content hash
174
174
  """
@@ -177,21 +177,21 @@ class Scout:
177
177
  'sha1': hashlib.sha1,
178
178
  'sha256': hashlib.sha256
179
179
  }
180
-
180
+
181
181
  if method not in hash_methods:
182
182
  raise ValueError(f"Unsupported hash method: {method}")
183
-
183
+
184
184
  hasher = hash_methods[method]()
185
185
  hasher.update(str(self._soup).encode('utf-8'))
186
186
  return hasher.hexdigest()
187
-
187
+
188
188
  def extract_links(self, base_url: Optional[str] = None) -> List[Dict[str, str]]:
189
189
  """
190
190
  Extract all links from the document.
191
-
191
+
192
192
  Args:
193
193
  base_url (str, optional): Base URL for resolving relative links
194
-
194
+
195
195
  Returns:
196
196
  List[Dict[str, str]]: List of link dictionaries
197
197
  """
@@ -202,7 +202,7 @@ class Scout:
202
202
  # Resolve relative URLs if base_url is provided
203
203
  if base_url and not href.startswith(('http://', 'https://', '//')):
204
204
  href = f"{base_url.rstrip('/')}/{href.lstrip('/')}"
205
-
205
+
206
206
  links.append({
207
207
  'href': href,
208
208
  'text': link.get_text(strip=True),
@@ -210,11 +210,11 @@ class Scout:
210
210
  'type': link.get('type')
211
211
  })
212
212
  return links
213
-
213
+
214
214
  def extract_metadata(self) -> Dict[str, Any]:
215
215
  """
216
216
  Extract metadata from HTML document.
217
-
217
+
218
218
  Returns:
219
219
  Dict[str, Any]: Extracted metadata
220
220
  """
@@ -225,87 +225,87 @@ class Scout:
225
225
  'og_metadata': {},
226
226
  'twitter_metadata': {}
227
227
  }
228
-
228
+
229
229
  # Open Graph metadata
230
230
  for meta in self.find_all('meta', attrs={'property': re.compile(r'^og:')}):
231
231
  key = meta.attrs('property')[0][3:]
232
232
  metadata['og_metadata'][key] = meta.attrs('content')[0]
233
-
233
+
234
234
  # Twitter Card metadata
235
235
  for meta in self.find_all('meta', attrs={'name': re.compile(r'^twitter:')}):
236
236
  key = meta.attrs('name')[0][8:]
237
237
  metadata['twitter_metadata'][key] = meta.attrs('content')[0]
238
-
238
+
239
239
  return metadata
240
-
240
+
241
241
  def to_json(self, indent=2) -> str:
242
242
  """
243
243
  Convert parsed content to JSON.
244
-
244
+
245
245
  Args:
246
246
  indent (int, optional): JSON indentation
247
-
247
+
248
248
  Returns:
249
249
  str: JSON representation of the document
250
250
  """
251
251
  def _tag_to_dict(tag):
252
252
  if isinstance(tag, NavigableString):
253
253
  return str(tag)
254
-
254
+
255
255
  result = {
256
256
  'name': tag.name,
257
257
  'attrs': tag.attrs,
258
258
  'text': tag.get_text(strip=True)
259
259
  }
260
-
260
+
261
261
  if tag.contents:
262
262
  result['children'] = [_tag_to_dict(child) for child in tag.contents]
263
-
263
+
264
264
  return result
265
-
265
+
266
266
  return json.dumps(_tag_to_dict(self._soup), indent=indent)
267
-
267
+
268
268
  def find(self, name=None, attrs={}, recursive=True, text=None, **kwargs) -> ScoutSearchResult:
269
269
  """
270
270
  Find the first matching element.
271
-
271
+
272
272
  Args:
273
273
  name (str, optional): Tag name to search for
274
274
  attrs (dict, optional): Attributes to match
275
275
  recursive (bool, optional): Search recursively
276
276
  text (str, optional): Text content to match
277
-
277
+
278
278
  Returns:
279
279
  ScoutSearchResult: First matching element
280
280
  """
281
281
  result = self._soup.find(name, attrs, recursive, text, **kwargs)
282
282
  return ScoutSearchResult([result]) if result else ScoutSearchResult([])
283
-
283
+
284
284
  def find_all(self, name=None, attrs={}, recursive=True, text=None, limit=None, **kwargs) -> ScoutSearchResult:
285
285
  """
286
286
  Find all matching elements.
287
-
287
+
288
288
  Args:
289
289
  name (str, optional): Tag name to search for
290
290
  attrs (dict, optional): Attributes to match
291
291
  recursive (bool, optional): Search recursively
292
292
  text (str, optional): Text content to match
293
293
  limit (int, optional): Maximum number of results
294
-
294
+
295
295
  Returns:
296
296
  ScoutSearchResult: List of matching elements
297
297
  """
298
298
  results = self._soup.find_all(name, attrs, recursive, text, limit, **kwargs)
299
299
  return ScoutSearchResult(results)
300
-
300
+
301
301
  def find_parent(self, name=None, attrs={}, **kwargs) -> Optional[Tag]:
302
302
  """
303
303
  Find the first parent matching given criteria.
304
-
304
+
305
305
  Args:
306
306
  name (str, optional): Tag name to search for
307
307
  attrs (dict, optional): Attributes to match
308
-
308
+
309
309
  Returns:
310
310
  Tag or None: First matching parent
311
311
  """
@@ -316,16 +316,16 @@ class Scout:
316
316
  return current
317
317
  current = current.parent
318
318
  return None
319
-
319
+
320
320
  def find_parents(self, name=None, attrs={}, limit=None, **kwargs) -> List[Tag]:
321
321
  """
322
322
  Find all parents matching given criteria.
323
-
323
+
324
324
  Args:
325
325
  name (str, optional): Tag name to search for
326
326
  attrs (dict, optional): Attributes to match
327
327
  limit (int, optional): Maximum number of results
328
-
328
+
329
329
  Returns:
330
330
  List[Tag]: List of matching parents
331
331
  """
@@ -337,21 +337,21 @@ class Scout:
337
337
  parents.append(current)
338
338
  current = current.parent
339
339
  return parents
340
-
340
+
341
341
  def find_next_sibling(self, name=None, attrs={}, **kwargs) -> Optional[Tag]:
342
342
  """
343
343
  Find the next sibling matching given criteria.
344
-
344
+
345
345
  Args:
346
346
  name (str, optional): Tag name to search for
347
347
  attrs (dict, optional): Attributes to match
348
-
348
+
349
349
  Returns:
350
350
  Tag or None: First matching next sibling
351
351
  """
352
352
  if not self._soup.parent:
353
353
  return None
354
-
354
+
355
355
  siblings = self._soup.parent.contents
356
356
  try:
357
357
  current_index = siblings.index(self._soup)
@@ -363,22 +363,22 @@ class Scout:
363
363
  except ValueError:
364
364
  pass
365
365
  return None
366
-
366
+
367
367
  def find_next_siblings(self, name=None, attrs={}, limit=None, **kwargs) -> List[Tag]:
368
368
  """
369
369
  Find all next siblings matching given criteria.
370
-
370
+
371
371
  Args:
372
372
  name (str, optional): Tag name to search for
373
373
  attrs (dict, optional): Attributes to match
374
374
  limit (int, optional): Maximum number of results
375
-
375
+
376
376
  Returns:
377
377
  List[Tag]: List of matching next siblings
378
378
  """
379
379
  if not self._soup.parent:
380
380
  return []
381
-
381
+
382
382
  siblings = []
383
383
  siblings_list = self._soup.parent.contents
384
384
  try:
@@ -393,40 +393,79 @@ class Scout:
393
393
  except ValueError:
394
394
  pass
395
395
  return siblings
396
-
396
+
397
+ def find_previous_sibling(self, name=None, attrs={}, **kwargs) -> Optional[Tag]:
398
+ """Find the previous sibling matching given criteria."""
399
+ if not self._soup.parent:
400
+ return None
401
+
402
+ siblings = self._soup.parent.contents
403
+ try:
404
+ current_index = siblings.index(self._soup)
405
+ for sibling in reversed(siblings[:current_index]):
406
+ if isinstance(sibling, Tag):
407
+ if (name is None or sibling.name == name) and all(
408
+ sibling.get(k) == v for k, v in attrs.items()
409
+ ):
410
+ return sibling
411
+ except ValueError:
412
+ pass
413
+ return None
414
+
415
+ def find_previous_siblings(self, name=None, attrs={}, limit=None, **kwargs) -> List[Tag]:
416
+ """Find all previous siblings matching given criteria."""
417
+ if not self._soup.parent:
418
+ return []
419
+
420
+ siblings = []
421
+ siblings_list = self._soup.parent.contents
422
+ try:
423
+ current_index = siblings_list.index(self._soup)
424
+ for sibling in reversed(siblings_list[:current_index]):
425
+ if isinstance(sibling, Tag):
426
+ if (name is None or sibling.name == name) and all(
427
+ sibling.get(k) == v for k, v in attrs.items()
428
+ ):
429
+ siblings.append(sibling)
430
+ if limit and len(siblings) == limit:
431
+ break
432
+ except ValueError:
433
+ pass
434
+ return siblings
435
+
397
436
  def select(self, selector: str) -> List[Tag]:
398
437
  """
399
438
  Select elements using CSS selector.
400
-
439
+
401
440
  Args:
402
441
  selector (str): CSS selector string
403
-
442
+
404
443
  Returns:
405
444
  List[Tag]: List of matching elements
406
445
  """
407
446
  return self._soup.select(selector)
408
-
447
+
409
448
  def select_one(self, selector: str) -> Optional[Tag]:
410
449
  """
411
450
  Select the first element matching the CSS selector.
412
-
451
+
413
452
  Args:
414
453
  selector (str): CSS selector string
415
-
454
+
416
455
  Returns:
417
456
  Tag or None: First matching element
418
457
  """
419
458
  return self._soup.select_one(selector)
420
-
459
+
421
460
  def get_text(self, separator=' ', strip=False, types=None) -> str:
422
461
  """
423
462
  Extract all text from the parsed document.
424
-
463
+
425
464
  Args:
426
465
  separator (str, optional): Text separator
427
466
  strip (bool, optional): Strip whitespace
428
467
  types (list, optional): Types of content to extract
429
-
468
+
430
469
  Returns:
431
470
  str: Extracted text
432
471
  """
@@ -434,113 +473,113 @@ class Scout:
434
473
  text = self._soup.get_text(separator, strip, types)
435
474
  sentences = tokenizer.tokenize(text)
436
475
  return "\n\n".join(sentences)
437
-
476
+
438
477
  def remove_tags(self, tags: List[str]) -> None:
439
478
  """
440
479
  Remove specified tags and their contents from the document.
441
-
480
+
442
481
  Args:
443
482
  tags (List[str]): List of tag names to remove
444
483
  """
445
484
  for tag_name in tags:
446
485
  for tag in self._soup.find_all(tag_name):
447
486
  tag.decompose()
448
-
487
+
449
488
  def prettify(self, formatter='minimal') -> str:
450
489
  """
451
490
  Return a formatted, pretty-printed version of the HTML.
452
-
491
+
453
492
  Args:
454
493
  formatter (str, optional): Formatting style
455
-
494
+
456
495
  Returns:
457
496
  str: Prettified HTML
458
497
  """
459
498
  return self._soup.prettify(formatter)
460
-
499
+
461
500
  def decompose(self, tag: Tag = None) -> None:
462
501
  """
463
502
  Remove a tag and its contents from the document.
464
-
503
+
465
504
  Args:
466
505
  tag (Tag, optional): Tag to remove. If None, removes the root tag.
467
506
  """
468
507
  if tag is None:
469
508
  tag = self._soup
470
509
  tag.decompose()
471
-
510
+
472
511
  def extract(self, tag: Tag = None) -> Tag:
473
512
  """
474
513
  Remove a tag from the document and return it.
475
-
514
+
476
515
  Args:
477
516
  tag (Tag, optional): Tag to extract. If None, extracts the root tag.
478
-
517
+
479
518
  Returns:
480
519
  Tag: Extracted tag
481
520
  """
482
521
  if tag is None:
483
522
  tag = self._soup
484
523
  return tag.extract()
485
-
524
+
486
525
  def clear(self, tag: Tag = None) -> None:
487
526
  """
488
527
  Remove a tag's contents while keeping the tag itself.
489
-
528
+
490
529
  Args:
491
530
  tag (Tag, optional): Tag to clear. If None, clears the root tag.
492
531
  """
493
532
  if tag is None:
494
533
  tag = self._soup
495
534
  tag.clear()
496
-
535
+
497
536
  def replace_with(self, old_tag: Tag, new_tag: Tag) -> None:
498
537
  """
499
538
  Replace one tag with another.
500
-
539
+
501
540
  Args:
502
541
  old_tag (Tag): Tag to replace
503
542
  new_tag (Tag): Replacement tag
504
543
  """
505
544
  old_tag.replace_with(new_tag)
506
-
545
+
507
546
  def encode(self, encoding='utf-8') -> bytes:
508
547
  """
509
548
  Encode the document to a specific encoding.
510
-
549
+
511
550
  Args:
512
551
  encoding (str, optional): Encoding to use
513
-
552
+
514
553
  Returns:
515
554
  bytes: Encoded document
516
555
  """
517
556
  return str(self._soup).encode(encoding)
518
-
557
+
519
558
  def decode(self, encoding='utf-8') -> str:
520
559
  """
521
560
  Decode the document from a specific encoding.
522
-
561
+
523
562
  Args:
524
563
  encoding (str, optional): Encoding to use
525
-
564
+
526
565
  Returns:
527
566
  str: Decoded document
528
567
  """
529
568
  return str(self._soup)
530
-
569
+
531
570
  def __str__(self) -> str:
532
571
  """
533
572
  String representation of the parsed document.
534
-
573
+
535
574
  Returns:
536
575
  str: HTML content
537
576
  """
538
577
  return str(self._soup)
539
-
578
+
540
579
  def __repr__(self) -> str:
541
580
  """
542
581
  Detailed representation of the Scout object.
543
-
582
+
544
583
  Returns:
545
584
  str: Scout object description
546
585
  """
@@ -549,20 +588,20 @@ class Scout:
549
588
  def _preprocess_markup(self, markup: str, encoding: Optional[str] = None) -> str:
550
589
  """
551
590
  Preprocess markup before parsing.
552
-
591
+
553
592
  Args:
554
593
  markup (str): Input markup
555
594
  encoding (str, optional): Encoding to use
556
-
595
+
557
596
  Returns:
558
597
  str: Preprocessed markup
559
598
  """
560
599
  # Decode markup
561
600
  decoded_markup = decode_markup(markup, encoding)
562
-
601
+
563
602
  # Basic HTML cleaning
564
603
  # Remove comments, normalize whitespace, etc.
565
604
  decoded_markup = re.sub(r'<!--.*?-->', '', decoded_markup, flags=re.DOTALL)
566
605
  decoded_markup = re.sub(r'\s+', ' ', decoded_markup)
567
-
606
+
568
607
  return decoded_markup