webscout 8.2.8__py3-none-any.whl → 8.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of webscout might be problematic. Click here for more details.

Files changed (197) hide show
  1. webscout/AIauto.py +34 -16
  2. webscout/AIbase.py +96 -37
  3. webscout/AIutel.py +491 -87
  4. webscout/Bard.py +441 -323
  5. webscout/Extra/GitToolkit/__init__.py +10 -10
  6. webscout/Extra/YTToolkit/ytapi/video.py +232 -232
  7. webscout/Litlogger/README.md +10 -0
  8. webscout/Litlogger/__init__.py +7 -59
  9. webscout/Litlogger/formats.py +4 -0
  10. webscout/Litlogger/handlers.py +103 -0
  11. webscout/Litlogger/levels.py +13 -0
  12. webscout/Litlogger/logger.py +92 -0
  13. webscout/Provider/AISEARCH/Perplexity.py +332 -358
  14. webscout/Provider/AISEARCH/felo_search.py +9 -35
  15. webscout/Provider/AISEARCH/genspark_search.py +30 -56
  16. webscout/Provider/AISEARCH/hika_search.py +4 -16
  17. webscout/Provider/AISEARCH/iask_search.py +410 -436
  18. webscout/Provider/AISEARCH/monica_search.py +4 -30
  19. webscout/Provider/AISEARCH/scira_search.py +6 -32
  20. webscout/Provider/AISEARCH/webpilotai_search.py +38 -64
  21. webscout/Provider/Blackboxai.py +155 -35
  22. webscout/Provider/ChatSandbox.py +2 -1
  23. webscout/Provider/Deepinfra.py +339 -339
  24. webscout/Provider/ExaChat.py +358 -358
  25. webscout/Provider/Gemini.py +169 -169
  26. webscout/Provider/GithubChat.py +1 -2
  27. webscout/Provider/Glider.py +3 -3
  28. webscout/Provider/HeckAI.py +172 -82
  29. webscout/Provider/LambdaChat.py +1 -0
  30. webscout/Provider/MCPCore.py +7 -3
  31. webscout/Provider/OPENAI/BLACKBOXAI.py +421 -139
  32. webscout/Provider/OPENAI/Cloudflare.py +38 -21
  33. webscout/Provider/OPENAI/FalconH1.py +457 -0
  34. webscout/Provider/OPENAI/FreeGemini.py +35 -18
  35. webscout/Provider/OPENAI/NEMOTRON.py +34 -34
  36. webscout/Provider/OPENAI/PI.py +427 -0
  37. webscout/Provider/OPENAI/Qwen3.py +304 -0
  38. webscout/Provider/OPENAI/README.md +952 -1253
  39. webscout/Provider/OPENAI/TwoAI.py +374 -0
  40. webscout/Provider/OPENAI/__init__.py +7 -1
  41. webscout/Provider/OPENAI/ai4chat.py +73 -63
  42. webscout/Provider/OPENAI/api.py +869 -644
  43. webscout/Provider/OPENAI/base.py +2 -0
  44. webscout/Provider/OPENAI/c4ai.py +34 -13
  45. webscout/Provider/OPENAI/chatgpt.py +575 -556
  46. webscout/Provider/OPENAI/chatgptclone.py +512 -487
  47. webscout/Provider/OPENAI/chatsandbox.py +11 -6
  48. webscout/Provider/OPENAI/copilot.py +258 -0
  49. webscout/Provider/OPENAI/deepinfra.py +327 -318
  50. webscout/Provider/OPENAI/e2b.py +140 -104
  51. webscout/Provider/OPENAI/exaai.py +420 -411
  52. webscout/Provider/OPENAI/exachat.py +448 -443
  53. webscout/Provider/OPENAI/flowith.py +7 -3
  54. webscout/Provider/OPENAI/freeaichat.py +12 -8
  55. webscout/Provider/OPENAI/glider.py +15 -8
  56. webscout/Provider/OPENAI/groq.py +5 -2
  57. webscout/Provider/OPENAI/heckai.py +311 -307
  58. webscout/Provider/OPENAI/llmchatco.py +9 -7
  59. webscout/Provider/OPENAI/mcpcore.py +18 -9
  60. webscout/Provider/OPENAI/multichat.py +7 -5
  61. webscout/Provider/OPENAI/netwrck.py +16 -11
  62. webscout/Provider/OPENAI/oivscode.py +290 -0
  63. webscout/Provider/OPENAI/opkfc.py +507 -496
  64. webscout/Provider/OPENAI/pydantic_imports.py +172 -0
  65. webscout/Provider/OPENAI/scirachat.py +29 -17
  66. webscout/Provider/OPENAI/sonus.py +308 -303
  67. webscout/Provider/OPENAI/standardinput.py +442 -433
  68. webscout/Provider/OPENAI/textpollinations.py +18 -11
  69. webscout/Provider/OPENAI/toolbaz.py +419 -413
  70. webscout/Provider/OPENAI/typefully.py +17 -10
  71. webscout/Provider/OPENAI/typegpt.py +21 -11
  72. webscout/Provider/OPENAI/uncovrAI.py +477 -462
  73. webscout/Provider/OPENAI/utils.py +90 -79
  74. webscout/Provider/OPENAI/venice.py +435 -425
  75. webscout/Provider/OPENAI/wisecat.py +387 -381
  76. webscout/Provider/OPENAI/writecream.py +166 -163
  77. webscout/Provider/OPENAI/x0gpt.py +26 -37
  78. webscout/Provider/OPENAI/yep.py +384 -356
  79. webscout/Provider/PI.py +2 -1
  80. webscout/Provider/TTI/README.md +55 -101
  81. webscout/Provider/TTI/__init__.py +4 -9
  82. webscout/Provider/TTI/aiarta.py +365 -0
  83. webscout/Provider/TTI/artbit.py +0 -0
  84. webscout/Provider/TTI/base.py +64 -0
  85. webscout/Provider/TTI/fastflux.py +200 -0
  86. webscout/Provider/TTI/magicstudio.py +201 -0
  87. webscout/Provider/TTI/piclumen.py +203 -0
  88. webscout/Provider/TTI/pixelmuse.py +225 -0
  89. webscout/Provider/TTI/pollinations.py +221 -0
  90. webscout/Provider/TTI/utils.py +11 -0
  91. webscout/Provider/TTS/__init__.py +2 -1
  92. webscout/Provider/TTS/base.py +159 -159
  93. webscout/Provider/TTS/openai_fm.py +129 -0
  94. webscout/Provider/TextPollinationsAI.py +308 -308
  95. webscout/Provider/TwoAI.py +239 -44
  96. webscout/Provider/UNFINISHED/Youchat.py +330 -330
  97. webscout/Provider/UNFINISHED/puterjs.py +635 -0
  98. webscout/Provider/UNFINISHED/test_lmarena.py +119 -119
  99. webscout/Provider/Writecream.py +246 -246
  100. webscout/Provider/__init__.py +2 -2
  101. webscout/Provider/ai4chat.py +33 -8
  102. webscout/Provider/granite.py +41 -6
  103. webscout/Provider/koala.py +169 -169
  104. webscout/Provider/oivscode.py +309 -0
  105. webscout/Provider/samurai.py +3 -2
  106. webscout/Provider/scnet.py +1 -0
  107. webscout/Provider/typegpt.py +3 -3
  108. webscout/Provider/uncovr.py +368 -368
  109. webscout/client.py +70 -0
  110. webscout/litprinter/__init__.py +58 -58
  111. webscout/optimizers.py +419 -419
  112. webscout/scout/README.md +3 -1
  113. webscout/scout/core/crawler.py +134 -64
  114. webscout/scout/core/scout.py +148 -109
  115. webscout/scout/element.py +106 -88
  116. webscout/swiftcli/Readme.md +323 -323
  117. webscout/swiftcli/plugins/manager.py +9 -2
  118. webscout/version.py +1 -1
  119. webscout/zeroart/__init__.py +134 -134
  120. webscout/zeroart/effects.py +100 -100
  121. webscout/zeroart/fonts.py +1238 -1238
  122. {webscout-8.2.8.dist-info → webscout-8.3.dist-info}/METADATA +160 -35
  123. webscout-8.3.dist-info/RECORD +290 -0
  124. {webscout-8.2.8.dist-info → webscout-8.3.dist-info}/WHEEL +1 -1
  125. {webscout-8.2.8.dist-info → webscout-8.3.dist-info}/entry_points.txt +1 -0
  126. webscout/Litlogger/Readme.md +0 -175
  127. webscout/Litlogger/core/__init__.py +0 -6
  128. webscout/Litlogger/core/level.py +0 -23
  129. webscout/Litlogger/core/logger.py +0 -165
  130. webscout/Litlogger/handlers/__init__.py +0 -12
  131. webscout/Litlogger/handlers/console.py +0 -33
  132. webscout/Litlogger/handlers/file.py +0 -143
  133. webscout/Litlogger/handlers/network.py +0 -173
  134. webscout/Litlogger/styles/__init__.py +0 -7
  135. webscout/Litlogger/styles/colors.py +0 -249
  136. webscout/Litlogger/styles/formats.py +0 -458
  137. webscout/Litlogger/styles/text.py +0 -87
  138. webscout/Litlogger/utils/__init__.py +0 -6
  139. webscout/Litlogger/utils/detectors.py +0 -153
  140. webscout/Litlogger/utils/formatters.py +0 -200
  141. webscout/Provider/ChatGPTGratis.py +0 -194
  142. webscout/Provider/TTI/AiForce/README.md +0 -159
  143. webscout/Provider/TTI/AiForce/__init__.py +0 -22
  144. webscout/Provider/TTI/AiForce/async_aiforce.py +0 -224
  145. webscout/Provider/TTI/AiForce/sync_aiforce.py +0 -245
  146. webscout/Provider/TTI/FreeAIPlayground/README.md +0 -99
  147. webscout/Provider/TTI/FreeAIPlayground/__init__.py +0 -9
  148. webscout/Provider/TTI/FreeAIPlayground/async_freeaiplayground.py +0 -181
  149. webscout/Provider/TTI/FreeAIPlayground/sync_freeaiplayground.py +0 -180
  150. webscout/Provider/TTI/ImgSys/README.md +0 -174
  151. webscout/Provider/TTI/ImgSys/__init__.py +0 -23
  152. webscout/Provider/TTI/ImgSys/async_imgsys.py +0 -202
  153. webscout/Provider/TTI/ImgSys/sync_imgsys.py +0 -195
  154. webscout/Provider/TTI/MagicStudio/README.md +0 -101
  155. webscout/Provider/TTI/MagicStudio/__init__.py +0 -2
  156. webscout/Provider/TTI/MagicStudio/async_magicstudio.py +0 -111
  157. webscout/Provider/TTI/MagicStudio/sync_magicstudio.py +0 -109
  158. webscout/Provider/TTI/Nexra/README.md +0 -155
  159. webscout/Provider/TTI/Nexra/__init__.py +0 -22
  160. webscout/Provider/TTI/Nexra/async_nexra.py +0 -286
  161. webscout/Provider/TTI/Nexra/sync_nexra.py +0 -258
  162. webscout/Provider/TTI/PollinationsAI/README.md +0 -146
  163. webscout/Provider/TTI/PollinationsAI/__init__.py +0 -23
  164. webscout/Provider/TTI/PollinationsAI/async_pollinations.py +0 -311
  165. webscout/Provider/TTI/PollinationsAI/sync_pollinations.py +0 -265
  166. webscout/Provider/TTI/aiarta/README.md +0 -134
  167. webscout/Provider/TTI/aiarta/__init__.py +0 -2
  168. webscout/Provider/TTI/aiarta/async_aiarta.py +0 -482
  169. webscout/Provider/TTI/aiarta/sync_aiarta.py +0 -440
  170. webscout/Provider/TTI/artbit/README.md +0 -100
  171. webscout/Provider/TTI/artbit/__init__.py +0 -22
  172. webscout/Provider/TTI/artbit/async_artbit.py +0 -155
  173. webscout/Provider/TTI/artbit/sync_artbit.py +0 -148
  174. webscout/Provider/TTI/fastflux/README.md +0 -129
  175. webscout/Provider/TTI/fastflux/__init__.py +0 -22
  176. webscout/Provider/TTI/fastflux/async_fastflux.py +0 -261
  177. webscout/Provider/TTI/fastflux/sync_fastflux.py +0 -252
  178. webscout/Provider/TTI/huggingface/README.md +0 -114
  179. webscout/Provider/TTI/huggingface/__init__.py +0 -22
  180. webscout/Provider/TTI/huggingface/async_huggingface.py +0 -199
  181. webscout/Provider/TTI/huggingface/sync_huggingface.py +0 -195
  182. webscout/Provider/TTI/piclumen/README.md +0 -161
  183. webscout/Provider/TTI/piclumen/__init__.py +0 -23
  184. webscout/Provider/TTI/piclumen/async_piclumen.py +0 -268
  185. webscout/Provider/TTI/piclumen/sync_piclumen.py +0 -233
  186. webscout/Provider/TTI/pixelmuse/README.md +0 -79
  187. webscout/Provider/TTI/pixelmuse/__init__.py +0 -4
  188. webscout/Provider/TTI/pixelmuse/async_pixelmuse.py +0 -249
  189. webscout/Provider/TTI/pixelmuse/sync_pixelmuse.py +0 -182
  190. webscout/Provider/TTI/talkai/README.md +0 -139
  191. webscout/Provider/TTI/talkai/__init__.py +0 -4
  192. webscout/Provider/TTI/talkai/async_talkai.py +0 -229
  193. webscout/Provider/TTI/talkai/sync_talkai.py +0 -207
  194. webscout/Provider/UNFINISHED/oivscode.py +0 -351
  195. webscout-8.2.8.dist-info/RECORD +0 -334
  196. {webscout-8.2.8.dist-info → webscout-8.3.dist-info}/licenses/LICENSE.md +0 -0
  197. {webscout-8.2.8.dist-info → webscout-8.3.dist-info}/top_level.txt +0 -0
webscout/AIutel.py CHANGED
@@ -1,9 +1,21 @@
1
- import json
2
- from typing import Union, Optional, Dict, Any, Iterable, Generator, List, Callable, Literal
3
1
  import codecs
2
+ import json
3
+ from typing import (
4
+ Any,
5
+ AsyncGenerator,
6
+ AsyncIterable,
7
+ Callable,
8
+ Dict,
9
+ Generator,
10
+ Iterable,
11
+ List,
12
+ Literal,
13
+ Optional,
14
+ Union,
15
+ )
4
16
 
5
17
  # Expanded encoding types
6
- EncodingType = Literal['utf-8', 'utf-16', 'utf-32', 'ascii', 'latin1', 'cp1252', 'iso-8859-1',
18
+ EncodingType = Literal['utf-8', 'utf-16', 'utf-32', 'ascii', 'latin1', 'cp1252', 'iso-8859-1',
7
19
  'iso-8859-2', 'windows-1250', 'windows-1251', 'windows-1252', 'gbk', 'big5',
8
20
  'shift_jis', 'euc-jp', 'euc-kr']
9
21
 
@@ -14,8 +26,40 @@ def _process_chunk(
14
26
  skip_markers: List[str],
15
27
  strip_chars: Optional[str],
16
28
  yield_raw_on_error: bool,
29
+ error_handler: Optional[Callable[[Exception, str], Optional[Any]]] = None,
17
30
  ) -> Union[str, Dict[str, Any], None]:
18
- """Internal helper to sanitize and potentially parse a single chunk."""
31
+ """
32
+ Sanitizes and potentially parses a single chunk of text.
33
+
34
+ This function performs several operations on the input chunk:
35
+ - Removes a specified prefix (`intro_value`).
36
+ - Strips leading/trailing characters (`strip_chars`).
37
+ - Skips chunks matching specific markers (`skip_markers`).
38
+ - Optionally parses the chunk as JSON (`to_json`).
39
+ - Handles JSON parsing errors with an optional callback (`error_handler`).
40
+
41
+ Args:
42
+ chunk (str): The chunk of text to process.
43
+ intro_value (str): The prefix to remove from the chunk.
44
+ to_json (bool): If True, attempts to parse the chunk as JSON.
45
+ skip_markers (List[str]): A list of markers; chunks matching these are skipped.
46
+ strip_chars (Optional[str]): Characters to strip from the beginning and end of the chunk.
47
+ yield_raw_on_error (bool): If True, returns the raw chunk when JSON parsing fails; otherwise, returns None.
48
+ error_handler (Optional[Callable[[Exception, str], Optional[Any]]]): An optional callback function that is called when JSON parsing fails.
49
+ It receives the exception and the sanitized chunk as arguments. It should return a value to yield instead of the raw chunk, or None to ignore.
50
+
51
+
52
+ Args:
53
+ chunk: Chunk of text to process.
54
+ intro_value: Prefix to remove from the chunk.
55
+ to_json: Parse the chunk as JSON if True.
56
+ skip_markers: List of markers to skip.
57
+ strip_chars: Characters to strip from the chunk.
58
+ yield_raw_on_error: Whether to return the raw chunk on parse errors.
59
+ error_handler: Optional callback ``Callable[[Exception, str], Optional[Any]]``
60
+ invoked when JSON parsing fails. The callback should return a value to
61
+ yield instead of the raw chunk, or ``None`` to ignore.
62
+ """
19
63
  if not isinstance(chunk, str):
20
64
  return None
21
65
 
@@ -46,20 +90,44 @@ def _process_chunk(
46
90
  if sanitized_chunk[0] not in '{[' or sanitized_chunk[-1] not in '}]':
47
91
  sanitized_chunk = sanitized_chunk.strip()
48
92
  return json.loads(sanitized_chunk)
49
- except (json.JSONDecodeError, Exception):
93
+ except (json.JSONDecodeError, Exception) as e:
94
+ if error_handler:
95
+ try:
96
+ handled = error_handler(e, sanitized_chunk)
97
+ if handled is not None:
98
+ return handled
99
+ except Exception:
100
+ pass
50
101
  return sanitized_chunk if yield_raw_on_error else None
51
-
102
+
52
103
  return sanitized_chunk
53
104
 
54
105
  def _decode_byte_stream(
55
- byte_iterator: Iterable[bytes],
106
+ byte_iterator: Iterable[bytes],
56
107
  encoding: EncodingType = 'utf-8',
57
108
  errors: str = 'replace',
58
109
  buffer_size: int = 8192
59
110
  ) -> Generator[str, None, None]:
60
111
  """
61
- Realtime byte stream decoder with flexible encoding support.
62
-
112
+ Decodes a byte stream in realtime with flexible encoding support.
113
+
114
+ This function takes an iterator of bytes and decodes it into a stream of strings
115
+ using the specified character encoding. It handles encoding errors gracefully
116
+ and can be tuned for performance with the `buffer_size` parameter.
117
+
118
+ Args:
119
+ byte_iterator (Iterable[bytes]): An iterator that yields chunks of bytes.
120
+ encoding (EncodingType): The character encoding to use for decoding.
121
+ Defaults to 'utf-8'. Supports a wide range of encodings, including:
122
+ 'utf-8', 'utf-16', 'utf-32', 'ascii', 'latin1', 'cp1252', 'iso-8859-1',
123
+ 'iso-8859-2', 'windows-1250', 'windows-1251', 'windows-1252', 'gbk', 'big5',
124
+ 'shift_jis', 'euc-jp', 'euc-kr'.
125
+ errors (str): Specifies how encoding errors should be handled.
126
+ Options are 'strict' (raises an error), 'ignore' (skips the error), and
127
+ 'replace' (replaces the erroneous byte with a replacement character).
128
+ Defaults to 'replace'.
129
+ buffer_size (int): The size of the internal buffer used for decoding.
130
+
63
131
  Args:
64
132
  byte_iterator: Iterator yielding bytes
65
133
  encoding: Character encoding to use
@@ -72,11 +140,11 @@ def _decode_byte_stream(
72
140
  except LookupError:
73
141
  # Fallback to utf-8 if the encoding is not supported
74
142
  decoder = codecs.getincrementaldecoder('utf-8')(errors=errors)
75
-
143
+
76
144
  # Process byte stream in realtime
77
145
  buffer = bytearray(buffer_size)
78
146
  buffer_view = memoryview(buffer)
79
-
147
+
80
148
  for chunk_bytes in byte_iterator:
81
149
  if not chunk_bytes:
82
150
  continue
@@ -88,12 +156,12 @@ def _decode_byte_stream(
88
156
  text = decoder.decode(buffer_view[:len(chunk_bytes)], final=False)
89
157
  else:
90
158
  text = decoder.decode(chunk_bytes, final=False)
91
-
159
+
92
160
  if text:
93
161
  yield text
94
162
  except UnicodeDecodeError:
95
163
  yield f"[Encoding Error: Could not decode bytes with {encoding}]\n"
96
-
164
+
97
165
  # Final flush
98
166
  try:
99
167
  final_text = decoder.decode(b'', final=True)
@@ -102,7 +170,63 @@ def _decode_byte_stream(
102
170
  except UnicodeDecodeError:
103
171
  yield f"[Encoding Error: Could not decode final bytes with {encoding}]\n"
104
172
 
105
- def sanitize_stream(
173
+ async def _decode_byte_stream_async(
174
+ byte_iterator: Iterable[bytes],
175
+ encoding: EncodingType = 'utf-8',
176
+ errors: str = 'replace',
177
+ buffer_size: int = 8192
178
+ ) -> AsyncGenerator[str, None]:
179
+ """
180
+ Asynchronously decodes a byte stream with flexible encoding support.
181
+
182
+ This function is the asynchronous counterpart to `_decode_byte_stream`. It takes
183
+ an asynchronous iterator of bytes and decodes it into a stream of strings using
184
+ the specified character encoding. It handles encoding errors gracefully and can
185
+ be tuned for performance with the `buffer_size` parameter.
186
+
187
+ Args:
188
+ byte_iterator (Iterable[bytes]): An asynchronous iterator that yields chunks of bytes.
189
+ encoding (EncodingType): The character encoding to use for decoding.
190
+ Defaults to 'utf-8'. Supports a wide range of encodings, including:
191
+ 'utf-8', 'utf-16', 'utf-32', 'ascii', 'latin1', 'cp1252', 'iso-8859-1',
192
+ 'iso-8859-2', 'windows-1250', 'windows-1251', 'windows-1252', 'gbk', 'big5',
193
+ 'shift_jis', 'euc-jp', 'euc-kr'.
194
+ errors (str): Specifies how encoding errors should be handled.
195
+ Options are 'strict' (raises an error), 'ignore' (skips the error), and
196
+ 'replace' (replaces the erroneous byte with a replacement character).
197
+ Defaults to 'replace'.
198
+ buffer_size (int): The size of the internal buffer used for decoding.
199
+ """
200
+ try:
201
+ decoder = codecs.getincrementaldecoder(encoding)(errors=errors)
202
+ except LookupError:
203
+ decoder = codecs.getincrementaldecoder('utf-8')(errors=errors)
204
+
205
+ buffer = bytearray(buffer_size)
206
+ buffer_view = memoryview(buffer)
207
+
208
+ async for chunk_bytes in byte_iterator:
209
+ if not chunk_bytes:
210
+ continue
211
+ try:
212
+ if len(chunk_bytes) <= buffer_size:
213
+ buffer[:len(chunk_bytes)] = chunk_bytes
214
+ text = decoder.decode(buffer_view[:len(chunk_bytes)], final=False)
215
+ else:
216
+ text = decoder.decode(chunk_bytes, final=False)
217
+ if text:
218
+ yield text
219
+ except UnicodeDecodeError:
220
+ yield f"[Encoding Error: Could not decode bytes with {encoding}]\n"
221
+
222
+ try:
223
+ final_text = decoder.decode(b'', final=True)
224
+ if final_text:
225
+ yield final_text
226
+ except UnicodeDecodeError:
227
+ yield f"[Encoding Error: Could not decode final bytes with {encoding}]\n"
228
+
229
+ def _sanitize_stream_sync(
106
230
  data: Union[str, Iterable[str], Iterable[bytes]],
107
231
  intro_value: str = "data:",
108
232
  to_json: bool = True,
@@ -115,10 +239,39 @@ def sanitize_stream(
115
239
  encoding: EncodingType = 'utf-8',
116
240
  encoding_errors: str = 'replace',
117
241
  buffer_size: int = 8192,
242
+ line_delimiter: Optional[str] = None,
243
+ error_handler: Optional[Callable[[Exception, str], Optional[Any]]] = None,
118
244
  ) -> Generator[Any, None, None]:
119
245
  """
120
- Robust realtime stream processor that handles string/byte streams with correct marker extraction/skipping.
121
- Now handles split markers, partial chunks, and skips lines containing (not just equal to) skip markers.
246
+ Processes a stream of data (strings or bytes) in real-time, applying various transformations and filtering.
247
+
248
+ This function is designed to handle streaming data, allowing for operations such as
249
+ prefix removal, JSON parsing, skipping lines based on markers, and extracting specific content.
250
+ It also supports custom error handling for JSON parsing failures.
251
+
252
+ Args:
253
+ data: String, iterable of strings, or iterable of bytes to process.
254
+ intro_value: Prefix indicating the start of meaningful data.
255
+ to_json: Parse JSON content if ``True``.
256
+ skip_markers: Lines containing any of these markers are skipped.
257
+ strip_chars: Characters to strip from each line.
258
+ start_marker: Begin processing only after this marker is found.
259
+ end_marker: Stop processing once this marker is found.
260
+ content_extractor: Optional callable to transform parsed content before yielding.
261
+ yield_raw_on_error: Yield raw lines when JSON parsing fails.
262
+ encoding: Byte stream encoding.
263
+ encoding_errors: How to handle encoding errors.
264
+ buffer_size: Buffer size for byte decoding.
265
+ line_delimiter: Delimiter used to split incoming text into lines. ``None``
266
+ uses ``str.splitlines()``.
267
+ error_handler: Callback invoked with ``(Exception, str)`` when JSON
268
+ parsing fails. If the callback returns a value, it is yielded instead of the raw line.
269
+
270
+ Yields:
271
+ Any: Processed data, which can be a string, a dictionary (if `to_json` is True), or the result of `content_extractor`.
272
+
273
+ Raises:
274
+ TypeError: If the input `data` is not a string or an iterable.
122
275
  """
123
276
  effective_skip_markers = skip_markers or []
124
277
  processing_active = start_marker is None
@@ -134,12 +287,21 @@ def sanitize_stream(
134
287
  data = data.strip()
135
288
  if data:
136
289
  processed_item = json.loads(data)
137
- except json.JSONDecodeError:
138
- processed_item = data if yield_raw_on_error else None
290
+ except Exception as e:
291
+ if error_handler:
292
+ try:
293
+ handled = error_handler(e, data)
294
+ if handled is not None:
295
+ processed_item = handled
296
+
297
+ except Exception:
298
+ pass
299
+ if processed_item is None:
300
+ processed_item = data if yield_raw_on_error else None
139
301
  else:
140
302
  processed_item = _process_chunk(
141
- data, intro_value, False, effective_skip_markers,
142
- strip_chars, yield_raw_on_error
303
+ data, intro_value, False, effective_skip_markers,
304
+ strip_chars, yield_raw_on_error, error_handler
143
305
  )
144
306
  if processed_item is not None:
145
307
  if content_extractor:
@@ -168,7 +330,7 @@ def sanitize_stream(
168
330
  # Determine if we're dealing with bytes or strings
169
331
  if isinstance(first_item, bytes):
170
332
  line_iterator = _decode_byte_stream(
171
- stream,
333
+ stream,
172
334
  encoding=encoding,
173
335
  errors=encoding_errors,
174
336
  buffer_size=buffer_size
@@ -206,44 +368,27 @@ def sanitize_stream(
206
368
  processing_active = True
207
369
  # Process chunk if we are in active region
208
370
  if chunk and processing_active:
209
- # Split into lines for skip marker logic
210
- for subline in chunk.splitlines():
211
- # Remove intro_value prefix if present
212
- if intro_value and subline.startswith(intro_value):
213
- subline = subline[len(intro_value):]
214
- # Strip chars if needed
215
- if strip_chars is not None:
216
- subline = subline.strip(strip_chars)
217
- else:
218
- subline = subline.lstrip()
219
- # Skip if matches any skip marker (using 'in')
220
- if any(marker in subline for marker in effective_skip_markers):
371
+ for subline in (chunk.split(line_delimiter) if line_delimiter is not None else chunk.splitlines()):
372
+ result = _process_chunk(
373
+ subline,
374
+ intro_value,
375
+ to_json,
376
+ effective_skip_markers,
377
+ strip_chars,
378
+ yield_raw_on_error,
379
+ error_handler,
380
+ )
381
+ if result is None:
221
382
  continue
222
- # Skip empty
223
- if not subline:
224
- continue
225
- # JSON parse if needed
226
- if to_json:
383
+ if content_extractor:
227
384
  try:
228
- if subline and (subline[0] in '{[' and subline[-1] in '}]'):
229
- parsed = json.loads(subline)
230
- result = parsed
231
- else:
232
- result = subline
385
+ final_content = content_extractor(result)
386
+ if final_content is not None:
387
+ yield final_content
233
388
  except Exception:
234
- result = subline if yield_raw_on_error else None
389
+ pass
235
390
  else:
236
- result = subline
237
- if result is not None:
238
- if content_extractor:
239
- try:
240
- final_content = content_extractor(result)
241
- if final_content is not None:
242
- yield final_content
243
- except Exception:
244
- pass
245
- else:
246
- yield result
391
+ yield result
247
392
  if not processing_active:
248
393
  found_start = False
249
394
  if idx == -1:
@@ -253,38 +398,27 @@ def sanitize_stream(
253
398
  chunk = buffer
254
399
  buffer = ""
255
400
  if chunk:
256
- for subline in chunk.splitlines():
257
- if intro_value and subline.startswith(intro_value):
258
- subline = subline[len(intro_value):]
259
- if strip_chars is not None:
260
- subline = subline.strip(strip_chars)
261
- else:
262
- subline = subline.lstrip()
263
- if any(marker in subline for marker in effective_skip_markers):
401
+ for subline in (chunk.split(line_delimiter) if line_delimiter is not None else chunk.splitlines()):
402
+ result = _process_chunk(
403
+ subline,
404
+ intro_value,
405
+ to_json,
406
+ effective_skip_markers,
407
+ strip_chars,
408
+ yield_raw_on_error,
409
+ error_handler,
410
+ )
411
+ if result is None:
264
412
  continue
265
- if not subline:
266
- continue
267
- if to_json:
413
+ if content_extractor:
268
414
  try:
269
- if subline and (subline[0] in '{[' and subline[-1] in '}]'):
270
- parsed = json.loads(subline)
271
- result = parsed
272
- else:
273
- result = subline
415
+ final_content = content_extractor(result)
416
+ if final_content is not None:
417
+ yield final_content
274
418
  except Exception:
275
- result = subline if yield_raw_on_error else None
419
+ pass
276
420
  else:
277
- result = subline
278
- if result is not None:
279
- if content_extractor:
280
- try:
281
- final_content = content_extractor(result)
282
- if final_content is not None:
283
- yield final_content
284
- except Exception:
285
- pass
286
- else:
287
- yield result
421
+ yield result
288
422
  break
289
423
  else:
290
424
  break
@@ -293,7 +427,277 @@ def sanitize_stream(
293
427
  print(f"Stream processing error: {str(e)}", file=sys.stderr)
294
428
 
295
429
 
296
- from .conversation import Conversation
297
- from .optimizers import Optimizers
298
- from .Extra.autocoder import AutoCoder
299
- from .prompt_manager import AwesomePrompts
430
+ async def _sanitize_stream_async(
431
+ data: Union[str, Iterable[str], Iterable[bytes]],
432
+ intro_value: str = "data:",
433
+ to_json: bool = True,
434
+ skip_markers: Optional[List[str]] = None,
435
+ strip_chars: Optional[str] = None,
436
+ start_marker: Optional[str] = None,
437
+ end_marker: Optional[str] = None,
438
+ content_extractor: Optional[Callable[[Union[str, Dict[str, Any]]], Optional[Any]]] = None,
439
+ yield_raw_on_error: bool = True,
440
+ encoding: EncodingType = 'utf-8',
441
+ encoding_errors: str = 'replace',
442
+ buffer_size: int = 8192,
443
+ line_delimiter: Optional[str] = None,
444
+ error_handler: Optional[Callable[[Exception, str], Optional[Any]]] = None,
445
+ ) -> AsyncGenerator[Any, None]:
446
+ """
447
+ Asynchronously processes a stream of data (strings or bytes), applying transformations and filtering.
448
+
449
+ This function is the asynchronous counterpart to `_sanitize_stream_sync`. It handles
450
+ streaming data, allowing for operations such as prefix removal, JSON parsing,
451
+ skipping lines based on markers, and extracting specific content. It also supports
452
+ custom error handling for JSON parsing failures.
453
+
454
+ Args:
455
+ data: String, iterable of strings, or iterable of bytes to process.
456
+ intro_value: Prefix indicating the start of meaningful data.
457
+ to_json: Parse JSON content if ``True``.
458
+ skip_markers: Lines containing any of these markers are skipped.
459
+ strip_chars: Characters to strip from each line.
460
+ start_marker: Begin processing only after this marker is found.
461
+ end_marker: Stop processing once this marker is found.
462
+ content_extractor: Optional callable to transform parsed content before yielding.
463
+ yield_raw_on_error: Yield raw lines when JSON parsing fails.
464
+ encoding: Byte stream encoding.
465
+ encoding_errors: How to handle encoding errors.
466
+ buffer_size: Buffer size for byte decoding.
467
+ line_delimiter: Delimiter used to split incoming text into lines. ``None`` uses ``str.splitlines()``.
468
+ error_handler: Callback invoked with ``(Exception, str)`` when JSON parsing fails. If the callback returns a value, it is yielded in place of the raw line.
469
+ """
470
+ if isinstance(data, str):
471
+ for item in _sanitize_stream_sync(
472
+ data,
473
+ intro_value=intro_value,
474
+ to_json=to_json,
475
+ skip_markers=skip_markers,
476
+ strip_chars=strip_chars,
477
+ start_marker=start_marker,
478
+ end_marker=end_marker,
479
+ content_extractor=content_extractor,
480
+ yield_raw_on_error=yield_raw_on_error,
481
+ encoding=encoding,
482
+ encoding_errors=encoding_errors,
483
+ buffer_size=buffer_size,
484
+ line_delimiter=line_delimiter,
485
+ error_handler=error_handler,
486
+ ):
487
+ yield item
488
+ return
489
+
490
+ if not hasattr(data, "__aiter__"):
491
+ # Fallback to synchronous processing if possible
492
+ for item in _sanitize_stream_sync(
493
+ data,
494
+ intro_value=intro_value,
495
+ to_json=to_json,
496
+ skip_markers=skip_markers,
497
+ strip_chars=strip_chars,
498
+ start_marker=start_marker,
499
+ end_marker=end_marker,
500
+ content_extractor=content_extractor,
501
+ yield_raw_on_error=yield_raw_on_error,
502
+ encoding=encoding,
503
+ encoding_errors=encoding_errors,
504
+ buffer_size=buffer_size,
505
+ line_delimiter=line_delimiter,
506
+ error_handler=error_handler,
507
+ ):
508
+ yield item
509
+ return
510
+
511
+ effective_skip_markers = skip_markers or []
512
+ processing_active = start_marker is None
513
+ buffer = ""
514
+ found_start = False if start_marker else True
515
+
516
+ iterator = data.__aiter__()
517
+ first_item = None
518
+ async for first_item in iterator:
519
+ break
520
+ if first_item is None:
521
+ return
522
+ async def _chain(first, it):
523
+ yield first
524
+ async for x in it:
525
+ yield x
526
+
527
+ stream = _chain(first_item, iterator)
528
+
529
+ if isinstance(first_item, bytes):
530
+ line_iterator = _decode_byte_stream_async(
531
+ stream,
532
+ encoding=encoding,
533
+ errors=encoding_errors,
534
+ buffer_size=buffer_size,
535
+ )
536
+ elif isinstance(first_item, str):
537
+ line_iterator = stream
538
+ else:
539
+ raise TypeError(
540
+ f"Stream must yield strings or bytes, not {type(first_item).__name__}"
541
+ )
542
+
543
+ async for line in line_iterator:
544
+ if not line:
545
+ continue
546
+ buffer += line
547
+ while True:
548
+ if not found_start and start_marker:
549
+ idx = buffer.find(start_marker)
550
+ if idx != -1:
551
+ found_start = True
552
+ buffer = buffer[idx + len(start_marker) :]
553
+ else:
554
+ buffer = buffer[-max(len(start_marker), 256) :]
555
+ break
556
+ if found_start and end_marker:
557
+ idx = buffer.find(end_marker)
558
+ if idx != -1:
559
+ chunk = buffer[:idx]
560
+ buffer = buffer[idx + len(end_marker) :]
561
+ processing_active = False
562
+ else:
563
+ chunk = buffer
564
+ buffer = ""
565
+ processing_active = True
566
+ if chunk and processing_active:
567
+ for subline in (
568
+ chunk.split(line_delimiter)
569
+ if line_delimiter is not None
570
+ else chunk.splitlines()
571
+ ):
572
+ result = _process_chunk(
573
+ subline,
574
+ intro_value,
575
+ to_json,
576
+ effective_skip_markers,
577
+ strip_chars,
578
+ yield_raw_on_error,
579
+ error_handler,
580
+ )
581
+ if result is None:
582
+ continue
583
+ if content_extractor:
584
+ try:
585
+ final_content = content_extractor(result)
586
+ if final_content is not None:
587
+ yield final_content
588
+ except Exception:
589
+ pass
590
+ else:
591
+ yield result
592
+ if not processing_active:
593
+ found_start = False
594
+ if idx == -1:
595
+ break
596
+ elif found_start:
597
+ chunk = buffer
598
+ buffer = ""
599
+ if chunk:
600
+ for subline in (
601
+ chunk.split(line_delimiter)
602
+ if line_delimiter is not None
603
+ else chunk.splitlines()
604
+ ):
605
+ result = _process_chunk(
606
+ subline,
607
+ intro_value,
608
+ to_json,
609
+ effective_skip_markers,
610
+ strip_chars,
611
+ yield_raw_on_error,
612
+ error_handler,
613
+ )
614
+ if result is None:
615
+ continue
616
+ if content_extractor:
617
+ try:
618
+ final_content = content_extractor(result)
619
+ if final_content is not None:
620
+ yield final_content
621
+ except Exception:
622
+ pass
623
+ else:
624
+ yield result
625
+ break
626
+ else:
627
+ break
628
+
629
+
630
+ def sanitize_stream(
631
+ data: Union[
632
+ str,
633
+ Iterable[str],
634
+ Iterable[bytes],
635
+ AsyncIterable[str],
636
+ AsyncIterable[bytes],
637
+ ],
638
+ intro_value: str = "data:",
639
+ to_json: bool = True,
640
+ skip_markers: Optional[List[str]] = None,
641
+ strip_chars: Optional[str] = None,
642
+ start_marker: Optional[str] = None,
643
+ end_marker: Optional[str] = None,
644
+ content_extractor: Optional[Callable[[Union[str, Dict[str, Any]]], Optional[Any]]] = None,
645
+ yield_raw_on_error: bool = True,
646
+ encoding: EncodingType = "utf-8",
647
+ encoding_errors: str = "replace",
648
+ buffer_size: int = 8192,
649
+ line_delimiter: Optional[str] = None,
650
+ error_handler: Optional[Callable[[Exception, str], Optional[Any]]] = None,
651
+ ) -> Union[Generator[Any, None, None], AsyncGenerator[Any, None]]:
652
+ """
653
+ Processes streaming data (strings or bytes) in either synchronous or asynchronous mode.
654
+
655
+ This function acts as a unified interface for handling both synchronous and
656
+ asynchronous data streams. It automatically detects the type of input data and
657
+ dispatches it to the appropriate processing function (`_sanitize_stream_sync` or
658
+ `_sanitize_stream_async`).
659
+
660
+ Args:
661
+ data (Union[str, Iterable[str], Iterable[bytes], AsyncIterable[str], AsyncIterable[bytes]]):
662
+ The data to be processed. Can be a string, a synchronous iterable of strings or bytes,
663
+ or an asynchronous iterable of strings or bytes.
664
+ intro_value (str): Prefix indicating the start of meaningful data. Defaults to "data:".
665
+ to_json (bool): Parse JSON content if ``True``. Defaults to True.
666
+ skip_markers (Optional[List[str]]): Lines containing any of these markers are skipped. Defaults to None.
667
+ strip_chars (Optional[str]): Characters to strip from each line. Defaults to None.
668
+ start_marker (Optional[str]): Begin processing only after this marker is found. Defaults to None.
669
+ end_marker (Optional[str]): Stop processing once this marker is found. Defaults to None.
670
+ content_extractor (Optional[Callable[[Union[str, Dict[str, Any]]], Optional[Any]]]):
671
+ Optional callable to transform parsed content before yielding. Defaults to None.
672
+ yield_raw_on_error (bool): Yield raw lines when JSON parsing fails. Defaults to True.
673
+ encoding (EncodingType): Byte stream encoding. Defaults to "utf-8".
674
+ encoding_errors (str): How to handle encoding errors. Defaults to "replace".
675
+ buffer_size (int): Buffer size for byte decoding. Defaults to 8192.
676
+ line_delimiter (Optional[str]): Delimiter used to split incoming text into lines.
677
+ ``None`` uses ``str.splitlines()``. Defaults to None.
678
+ error_handler (Optional[Callable[[Exception, str], Optional[Any]]]):
679
+ Callback invoked with ``(Exception, str)`` when JSON parsing fails.
680
+ If the callback returns a value, it is yielded in place of the raw line. Defaults to None.
681
+
682
+ Returns:
683
+ Union[Generator[Any, None, None], AsyncGenerator[Any, None]]:
684
+ A generator or an asynchronous generator yielding the processed data.
685
+ """
686
+
687
+ if hasattr(data, "__aiter__"):
688
+ return _sanitize_stream_async(
689
+ data, intro_value, to_json, skip_markers, strip_chars,
690
+ start_marker, end_marker, content_extractor, yield_raw_on_error,
691
+ encoding, encoding_errors, buffer_size, line_delimiter, error_handler,
692
+ )
693
+ return _sanitize_stream_sync(
694
+ data, intro_value, to_json, skip_markers, strip_chars,
695
+ start_marker, end_marker, content_extractor, yield_raw_on_error,
696
+ encoding, encoding_errors, buffer_size, line_delimiter, error_handler,
697
+ )
698
+
699
+
700
+ from .conversation import Conversation # noqa: E402,F401
701
+ from .Extra.autocoder import AutoCoder # noqa: E402,F401
702
+ from .optimizers import Optimizers # noqa: E402,F401
703
+ from .prompt_manager import AwesomePrompts # noqa: E402,F401