webscout 8.3.3__py3-none-any.whl → 8.3.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of webscout might be problematic. Click here for more details.

Files changed (79) hide show
  1. webscout/AIutel.py +53 -800
  2. webscout/Bard.py +2 -22
  3. webscout/Provider/AISEARCH/__init__.py +11 -10
  4. webscout/Provider/AISEARCH/felo_search.py +7 -3
  5. webscout/Provider/AISEARCH/scira_search.py +26 -11
  6. webscout/Provider/AISEARCH/stellar_search.py +53 -8
  7. webscout/Provider/Deepinfra.py +81 -57
  8. webscout/Provider/ExaChat.py +9 -5
  9. webscout/Provider/Flowith.py +1 -1
  10. webscout/Provider/FreeGemini.py +2 -2
  11. webscout/Provider/Gemini.py +3 -10
  12. webscout/Provider/GeminiProxy.py +31 -5
  13. webscout/Provider/LambdaChat.py +39 -31
  14. webscout/Provider/Netwrck.py +5 -8
  15. webscout/Provider/OLLAMA.py +8 -9
  16. webscout/Provider/OPENAI/README.md +1 -1
  17. webscout/Provider/OPENAI/TogetherAI.py +57 -48
  18. webscout/Provider/OPENAI/TwoAI.py +94 -1
  19. webscout/Provider/OPENAI/__init__.py +1 -3
  20. webscout/Provider/OPENAI/autoproxy.py +1 -1
  21. webscout/Provider/OPENAI/copilot.py +73 -26
  22. webscout/Provider/OPENAI/deepinfra.py +60 -24
  23. webscout/Provider/OPENAI/exachat.py +9 -5
  24. webscout/Provider/OPENAI/monochat.py +3 -3
  25. webscout/Provider/OPENAI/netwrck.py +4 -7
  26. webscout/Provider/OPENAI/qodo.py +630 -0
  27. webscout/Provider/OPENAI/scirachat.py +86 -49
  28. webscout/Provider/OPENAI/textpollinations.py +19 -14
  29. webscout/Provider/OPENAI/venice.py +1 -0
  30. webscout/Provider/Perplexitylabs.py +163 -147
  31. webscout/Provider/Qodo.py +478 -0
  32. webscout/Provider/TTI/__init__.py +1 -0
  33. webscout/Provider/TTI/monochat.py +3 -3
  34. webscout/Provider/TTI/together.py +7 -6
  35. webscout/Provider/TTI/venice.py +368 -0
  36. webscout/Provider/TextPollinationsAI.py +19 -14
  37. webscout/Provider/TogetherAI.py +57 -44
  38. webscout/Provider/TwoAI.py +96 -2
  39. webscout/Provider/TypliAI.py +33 -27
  40. webscout/Provider/UNFINISHED/PERPLEXED_search.py +254 -0
  41. webscout/Provider/UNFINISHED/fetch_together_models.py +6 -11
  42. webscout/Provider/Venice.py +1 -0
  43. webscout/Provider/WiseCat.py +18 -20
  44. webscout/Provider/__init__.py +4 -10
  45. webscout/Provider/copilot.py +58 -61
  46. webscout/Provider/freeaichat.py +64 -55
  47. webscout/Provider/monochat.py +275 -0
  48. webscout/Provider/scira_chat.py +115 -21
  49. webscout/Provider/toolbaz.py +5 -10
  50. webscout/Provider/typefully.py +1 -11
  51. webscout/Provider/x0gpt.py +325 -315
  52. webscout/__init__.py +4 -11
  53. webscout/auth/__init__.py +19 -4
  54. webscout/auth/api_key_manager.py +189 -189
  55. webscout/auth/auth_system.py +25 -40
  56. webscout/auth/config.py +105 -6
  57. webscout/auth/database.py +377 -22
  58. webscout/auth/models.py +185 -130
  59. webscout/auth/request_processing.py +175 -11
  60. webscout/auth/routes.py +119 -5
  61. webscout/auth/server.py +9 -2
  62. webscout/auth/simple_logger.py +236 -0
  63. webscout/sanitize.py +1074 -0
  64. webscout/version.py +1 -1
  65. {webscout-8.3.3.dist-info → webscout-8.3.5.dist-info}/METADATA +9 -150
  66. {webscout-8.3.3.dist-info → webscout-8.3.5.dist-info}/RECORD +70 -72
  67. webscout/Provider/AI21.py +0 -177
  68. webscout/Provider/HuggingFaceChat.py +0 -469
  69. webscout/Provider/OPENAI/README_AUTOPROXY.md +0 -238
  70. webscout/Provider/OPENAI/freeaichat.py +0 -363
  71. webscout/Provider/OPENAI/typegpt.py +0 -368
  72. webscout/Provider/OPENAI/uncovrAI.py +0 -477
  73. webscout/Provider/WritingMate.py +0 -273
  74. webscout/Provider/typegpt.py +0 -284
  75. webscout/Provider/uncovr.py +0 -333
  76. {webscout-8.3.3.dist-info → webscout-8.3.5.dist-info}/WHEEL +0 -0
  77. {webscout-8.3.3.dist-info → webscout-8.3.5.dist-info}/entry_points.txt +0 -0
  78. {webscout-8.3.3.dist-info → webscout-8.3.5.dist-info}/licenses/LICENSE.md +0 -0
  79. {webscout-8.3.3.dist-info → webscout-8.3.5.dist-info}/top_level.txt +0 -0
webscout/AIutel.py CHANGED
@@ -1,805 +1,58 @@
1
- import codecs
2
- import json
3
- import re
4
- from typing import (
5
- Any,
6
- AsyncGenerator,
7
- AsyncIterable,
8
- Callable,
9
- Dict,
10
- Generator,
11
- Iterable,
12
- List,
13
- Literal,
14
- Optional,
15
- Union,
16
- )
17
-
18
- # Expanded encoding types
19
- EncodingType = Literal['utf-8', 'utf-16', 'utf-32', 'ascii', 'latin1', 'cp1252', 'iso-8859-1',
20
- 'iso-8859-2', 'windows-1250', 'windows-1251', 'windows-1252', 'gbk', 'big5',
21
- 'shift_jis', 'euc-jp', 'euc-kr']
22
-
23
- def _process_chunk(
24
- chunk: str,
25
- intro_value: str,
26
- to_json: bool,
27
- skip_markers: List[str],
28
- strip_chars: Optional[str],
29
- yield_raw_on_error: bool,
30
- error_handler: Optional[Callable[[Exception, str], Optional[Any]]] = None,
31
- ) -> Union[str, Dict[str, Any], None]:
32
- """
33
- Sanitizes and potentially parses a single chunk of text.
34
-
35
- This function performs several operations on the input chunk:
36
- - Removes a specified prefix (`intro_value`).
37
- - Strips leading/trailing characters (`strip_chars`).
38
- - Skips chunks matching specific markers (`skip_markers`).
39
- - Optionally parses the chunk as JSON (`to_json`).
40
- - Handles JSON parsing errors with an optional callback (`error_handler`).
41
-
42
- Args:
43
- chunk (str): The chunk of text to process.
44
- intro_value (str): The prefix to remove from the chunk.
45
- to_json (bool): If True, attempts to parse the chunk as JSON.
46
- skip_markers (List[str]): A list of markers; chunks matching these are skipped.
47
- strip_chars (Optional[str]): Characters to strip from the beginning and end of the chunk.
48
- yield_raw_on_error (bool): If True, returns the raw chunk when JSON parsing fails; otherwise, returns None.
49
- error_handler (Optional[Callable[[Exception, str], Optional[Any]]]): An optional callback function that is called when JSON parsing fails.
50
- It receives the exception and the sanitized chunk as arguments. It should return a value to yield instead of the raw chunk, or None to ignore.
51
-
52
- """
53
- if not isinstance(chunk, str):
54
- return None
55
-
56
- # Fast path for empty chunks
57
- if not chunk:
58
- return None
59
-
60
- # Use slicing for prefix removal (faster than startswith+slicing)
61
- sanitized_chunk = chunk
62
- if intro_value and len(chunk) >= len(intro_value) and chunk[:len(intro_value)] == intro_value:
63
- sanitized_chunk = chunk[len(intro_value):]
1
+ from .sanitize import * # noqa: E402,F401
2
+ from .conversation import Conversation # noqa: E402,F401
3
+ from .Extra.autocoder import AutoCoder # noqa: E402,F401
4
+ from .optimizers import Optimizers # noqa: E402,F401
5
+ from .prompt_manager import AwesomePrompts # noqa: E402,F401
64
6
 
65
- # Optimize string stripping operations
66
- if strip_chars is not None:
67
- sanitized_chunk = sanitized_chunk.strip(strip_chars)
7
+ # --- Utility Decorators ---
8
+ from typing import Callable
9
+ import time
10
+ import functools
11
+
12
+ def timeIt(func: Callable):
13
+ """
14
+ Decorator to measure execution time of a function (sync or async).
15
+ Prints: - Execution time for '{func.__name__}' : {elapsed:.6f} Seconds.
16
+ """
17
+ import asyncio
18
+ GREEN_BOLD = "\033[1;92m"
19
+ RESET = "\033[0m"
20
+ @functools.wraps(func)
21
+ def sync_wrapper(*args, **kwargs):
22
+ start_time = time.time()
23
+ result = func(*args, **kwargs)
24
+ end_time = time.time()
25
+ print(f"{GREEN_BOLD}- Execution time for '{func.__name__}' : {end_time - start_time:.6f} Seconds. {RESET}\n")
26
+ return result
27
+
28
+ @functools.wraps(func)
29
+ async def async_wrapper(*args, **kwargs):
30
+ start_time = time.time()
31
+ result = await func(*args, **kwargs)
32
+ end_time = time.time()
33
+ print(f"{GREEN_BOLD}- Execution time for '{func.__name__}' : {end_time - start_time:.6f} Seconds. {RESET}\n")
34
+ return result
35
+
36
+ if asyncio.iscoroutinefunction(func):
37
+ return async_wrapper
68
38
  else:
69
- # lstrip() is faster than strip() when we only need leading whitespace removed
70
- sanitized_chunk = sanitized_chunk.lstrip()
71
-
72
- # Skip empty chunks and markers
73
- if not sanitized_chunk or any(marker == sanitized_chunk for marker in skip_markers):
74
- return None
75
-
76
- # JSON parsing with optimized error handling
77
- if to_json:
78
- try:
79
- # Only strip before JSON parsing if needed
80
- if sanitized_chunk[0] not in '{[' or sanitized_chunk[-1] not in '}]':
81
- sanitized_chunk = sanitized_chunk.strip()
82
- return json.loads(sanitized_chunk)
83
- except (json.JSONDecodeError, Exception) as e:
84
- if error_handler:
85
- try:
86
- handled = error_handler(e, sanitized_chunk)
87
- if handled is not None:
88
- return handled
89
- except Exception:
90
- pass
91
- return sanitized_chunk if yield_raw_on_error else None
92
-
93
- return sanitized_chunk
94
-
95
- def _decode_byte_stream(
96
- byte_iterator: Iterable[bytes],
97
- encoding: EncodingType = 'utf-8',
98
- errors: str = 'replace',
99
- buffer_size: int = 8192
100
- ) -> Generator[str, None, None]:
101
- """
102
- Decodes a byte stream in realtime with flexible encoding support.
103
-
104
- This function takes an iterator of bytes and decodes it into a stream of strings
105
- using the specified character encoding. It handles encoding errors gracefully
106
- and can be tuned for performance with the `buffer_size` parameter.
107
-
108
- Args:
109
- byte_iterator (Iterable[bytes]): An iterator that yields chunks of bytes.
110
- encoding (EncodingType): The character encoding to use for decoding.
111
- Defaults to 'utf-8'. Supports a wide range of encodings, including:
112
- 'utf-8', 'utf-16', 'utf-32', 'ascii', 'latin1', 'cp1252', 'iso-8859-1',
113
- 'iso-8859-2', 'windows-1250', 'windows-1251', 'windows-1252', 'gbk', 'big5',
114
- 'shift_jis', 'euc-jp', 'euc-kr'.
115
- errors (str): Specifies how encoding errors should be handled.
116
- Options are 'strict' (raises an error), 'ignore' (skips the error), and
117
- 'replace' (replaces the erroneous byte with a replacement character).
118
- Defaults to 'replace'.
119
- buffer_size (int): The size of the internal buffer used for decoding.
120
-
121
- """
122
- # Initialize decoder with the specified encoding
123
- try:
124
- decoder = codecs.getincrementaldecoder(encoding)(errors=errors)
125
- except LookupError:
126
- # Fallback to utf-8 if the encoding is not supported
127
- decoder = codecs.getincrementaldecoder('utf-8')(errors=errors)
128
-
129
- # Process byte stream in realtime
130
- buffer = bytearray(buffer_size)
131
- buffer_view = memoryview(buffer)
132
-
133
- for chunk_bytes in byte_iterator:
134
- if not chunk_bytes:
135
- continue
136
-
137
- try:
138
- # Use buffer for processing if chunk size is appropriate
139
- if len(chunk_bytes) <= buffer_size:
140
- buffer[:len(chunk_bytes)] = chunk_bytes
141
- text = decoder.decode(buffer_view[:len(chunk_bytes)], final=False)
142
- else:
143
- text = decoder.decode(chunk_bytes, final=False)
144
-
145
- if text:
146
- yield text
147
- except UnicodeDecodeError:
148
- yield f"[Encoding Error: Could not decode bytes with {encoding}]\n"
149
-
150
- # Final flush
151
- try:
152
- final_text = decoder.decode(b'', final=True)
153
- if final_text:
154
- yield final_text
155
- except UnicodeDecodeError:
156
- yield f"[Encoding Error: Could not decode final bytes with {encoding}]\n"
157
-
158
- async def _decode_byte_stream_async(
159
- byte_iterator: Iterable[bytes],
160
- encoding: EncodingType = 'utf-8',
161
- errors: str = 'replace',
162
- buffer_size: int = 8192
163
- ) -> AsyncGenerator[str, None]:
164
- """
165
- Asynchronously decodes a byte stream with flexible encoding support.
166
-
167
- This function is the asynchronous counterpart to `_decode_byte_stream`. It takes
168
- an asynchronous iterator of bytes and decodes it into a stream of strings using
169
- the specified character encoding. It handles encoding errors gracefully and can
170
- be tuned for performance with the `buffer_size` parameter.
171
-
172
- Args:
173
- byte_iterator (Iterable[bytes]): An asynchronous iterator that yields chunks of bytes.
174
- encoding (EncodingType): The character encoding to use for decoding.
175
- Defaults to 'utf-8'. Supports a wide range of encodings, including:
176
- 'utf-8', 'utf-16', 'utf-32', 'ascii', 'latin1', 'cp1252', 'iso-8859-1',
177
- 'iso-8859-2', 'windows-1250', 'windows-1251', 'windows-1252', 'gbk', 'big5',
178
- 'shift_jis', 'euc-jp', 'euc-kr'.
179
- errors (str): Specifies how encoding errors should be handled.
180
- Options are 'strict' (raises an error), 'ignore' (skips the error), and
181
- 'replace' (replaces the erroneous byte with a replacement character).
182
- Defaults to 'replace'.
183
- buffer_size (int): The size of the internal buffer used for decoding.
184
- """
185
- try:
186
- decoder = codecs.getincrementaldecoder(encoding)(errors=errors)
187
- except LookupError:
188
- decoder = codecs.getincrementaldecoder('utf-8')(errors=errors)
189
-
190
- buffer = bytearray(buffer_size)
191
- buffer_view = memoryview(buffer)
192
-
193
- async for chunk_bytes in byte_iterator:
194
- if not chunk_bytes:
195
- continue
196
- try:
197
- if len(chunk_bytes) <= buffer_size:
198
- buffer[:len(chunk_bytes)] = chunk_bytes
199
- text = decoder.decode(buffer_view[:len(chunk_bytes)], final=False)
200
- else:
201
- text = decoder.decode(chunk_bytes, final=False)
202
- if text:
203
- yield text
204
- except UnicodeDecodeError:
205
- yield f"[Encoding Error: Could not decode bytes with {encoding}]\n"
206
-
207
- try:
208
- final_text = decoder.decode(b'', final=True)
209
- if final_text:
210
- yield final_text
211
- except UnicodeDecodeError:
212
- yield f"[Encoding Error: Could not decode final bytes with {encoding}]\n"
213
-
214
- def _sanitize_stream_sync(
215
- data: Union[str, Iterable[str], Iterable[bytes]],
216
- intro_value: str = "data:",
217
- to_json: bool = True,
218
- skip_markers: Optional[List[str]] = None,
219
- strip_chars: Optional[str] = None,
220
- start_marker: Optional[str] = None,
221
- end_marker: Optional[str] = None,
222
- content_extractor: Optional[Callable[[Union[str, Dict[str, Any]]], Optional[Any]]] = None,
223
- yield_raw_on_error: bool = True,
224
- encoding: EncodingType = 'utf-8',
225
- encoding_errors: str = 'replace',
226
- buffer_size: int = 8192,
227
- line_delimiter: Optional[str] = None,
228
- error_handler: Optional[Callable[[Exception, str], Optional[Any]]] = None,
229
- ) -> Generator[Any, None, None]:
230
- """
231
- Processes a stream of data (strings or bytes) in real-time, applying various transformations and filtering.
39
+ return sync_wrapper
232
40
 
233
- This function is designed to handle streaming data, allowing for operations such as
234
- prefix removal, JSON parsing, skipping lines based on markers, and extracting specific content.
235
- It also supports custom error handling for JSON parsing failures.
236
-
237
- Args:
238
- data: String, iterable of strings, or iterable of bytes to process.
239
- intro_value: Prefix indicating the start of meaningful data.
240
- to_json: Parse the chunk as JSON if True.
241
- skip_markers: Lines containing any of these markers are skipped.
242
- strip_chars: Characters to strip from each line.
243
- start_marker: Begin processing only after this marker is found.
244
- end_marker: Stop processing once this marker is found.
245
- content_extractor: Optional callable to transform parsed content before yielding.
246
- yield_raw_on_error: Yield raw lines when JSON parsing fails.
247
- encoding: Byte stream encoding.
248
- encoding_errors: How to handle encoding errors.
249
- buffer_size: Buffer size for byte decoding.
250
- line_delimiter: Delimiter used to split incoming text into lines. ``None``
251
- uses ``str.splitlines()``.
252
- error_handler: Callback invoked with ``(Exception, str)`` when JSON
253
- parsing fails. If the callback returns a value, it is yielded instead of the raw line.
254
-
255
- Yields:
256
- Any: Processed data, which can be a string, a dictionary (if `to_json` is True), or the result of `content_extractor`.
257
-
258
- Raises:
259
- TypeError: If the input `data` is not a string or an iterable.
41
+ def retry(retries: int = 3, delay: float = 1) -> Callable:
260
42
  """
261
- effective_skip_markers = skip_markers or []
262
- processing_active = start_marker is None
263
- buffer = ""
264
- found_start = False if start_marker else True
265
- line_iterator: Iterable[str]
266
-
267
- if isinstance(data, str):
268
- # If data is a string, decide whether to split it into lines
269
- # or treat it as an iterable containing a single chunk.
270
- temp_lines: List[str]
271
- if line_delimiter is None: # Default: split by newlines if present
272
- if '\n' in data or '\r' in data:
273
- temp_lines = data.splitlines()
274
- else:
275
- temp_lines = [data] # Treat as a single line/chunk
276
- elif line_delimiter in data: # Custom delimiter found in string
277
- temp_lines = data.split(line_delimiter)
278
- else: # Custom delimiter not found, or string is effectively a single segment
279
- temp_lines = [data]
280
- line_iterator = iter(temp_lines)
281
- elif hasattr(data, '__iter__'): # data is an iterable (but not a string)
282
- _iter = iter(data)
283
- first_item = next(_iter, None)
284
-
285
- if first_item is None: # Iterable was empty
286
- return
287
-
288
- from itertools import chain
289
- # Reconstruct the full iterable including the first_item
290
- stream_input_iterable = chain([first_item], _iter)
291
-
292
- if isinstance(first_item, bytes):
293
- # Ensure stream_input_iterable is typed as Iterable[bytes] for _decode_byte_stream
294
- line_iterator = _decode_byte_stream(
295
- stream_input_iterable, # type: ignore
296
- encoding=encoding,
297
- errors=encoding_errors,
298
- buffer_size=buffer_size
299
- )
300
- elif isinstance(first_item, str):
301
- # Ensure stream_input_iterable is typed as Iterable[str]
302
- line_iterator = stream_input_iterable # type: ignore
303
- else:
304
- raise TypeError(f"Iterable must yield strings or bytes, not {type(first_item).__name__}")
305
- else: # Not a string and not an iterable
306
- raise TypeError(f"Input must be a string or an iterable, not {type(data).__name__}")
307
-
308
- try:
309
- for line in line_iterator:
310
- if not line:
311
- continue
312
- buffer += line
313
- while True:
314
- # Look for start marker if needed
315
- if not found_start and start_marker:
316
- idx = buffer.find(start_marker)
317
- if idx != -1:
318
- found_start = True
319
- buffer = buffer[idx + len(start_marker):]
320
- else:
321
- # Not found, keep buffering
322
- buffer = buffer[-max(len(start_marker), 256):] # avoid unbounded growth
323
- break
324
- # Look for end marker if needed
325
- if found_start and end_marker:
326
- idx = buffer.find(end_marker)
327
- if idx != -1:
328
- chunk = buffer[:idx]
329
- buffer = buffer[idx + len(end_marker):]
330
- processing_active = False
331
- else:
332
- chunk = buffer
333
- buffer = ""
334
- processing_active = True
335
- # Process chunk if we are in active region
336
- if chunk and processing_active:
337
- for subline in (chunk.split(line_delimiter) if line_delimiter is not None else chunk.splitlines()):
338
- result = _process_chunk(
339
- subline,
340
- intro_value,
341
- to_json,
342
- effective_skip_markers,
343
- strip_chars,
344
- yield_raw_on_error,
345
- error_handler,
346
- )
347
- if result is None:
348
- continue
349
- if content_extractor:
350
- try:
351
- final_content = content_extractor(result)
352
- if final_content is not None:
353
- yield final_content
354
- except Exception:
355
- pass
356
- else:
357
- yield result
358
- if not processing_active:
359
- found_start = False
360
- if idx == -1:
361
- break
362
- elif found_start:
363
- # No end marker, process all buffered content
364
- chunk = buffer
365
- buffer = ""
366
- if chunk:
367
- for subline in (chunk.split(line_delimiter) if line_delimiter is not None else chunk.splitlines()):
368
- result = _process_chunk(
369
- subline,
370
- intro_value,
371
- to_json,
372
- effective_skip_markers,
373
- strip_chars,
374
- yield_raw_on_error,
375
- error_handler,
376
- )
377
- if result is None:
378
- continue
379
- if content_extractor:
380
- try:
381
- final_content = content_extractor(result)
382
- if final_content is not None:
383
- yield final_content
384
- except Exception:
385
- pass
386
- else:
387
- yield result
388
- break
389
- else:
390
- break
391
- except Exception as e:
392
- import sys
393
- print(f"Stream processing error: {str(e)}", file=sys.stderr)
394
-
395
-
396
- async def _sanitize_stream_async(
397
- data: Union[str, Iterable[str], Iterable[bytes]],
398
- intro_value: str = "data:",
399
- to_json: bool = True,
400
- skip_markers: Optional[List[str]] = None,
401
- strip_chars: Optional[str] = None,
402
- start_marker: Optional[str] = None,
403
- end_marker: Optional[str] = None,
404
- content_extractor: Optional[Callable[[Union[str, Dict[str, Any]]], Optional[Any]]] = None,
405
- yield_raw_on_error: bool = True,
406
- encoding: EncodingType = 'utf-8',
407
- encoding_errors: str = 'replace',
408
- buffer_size: int = 8192,
409
- line_delimiter: Optional[str] = None,
410
- error_handler: Optional[Callable[[Exception, str], Optional[Any]]] = None,
411
- ) -> AsyncGenerator[Any, None]:
412
- """
413
- Asynchronously processes a stream of data (strings or bytes), applying transformations and filtering.
414
-
415
- This function is the asynchronous counterpart to `_sanitize_stream_sync`. It handles
416
- streaming data, allowing for operations such as prefix removal, JSON parsing,
417
- skipping lines based on markers, and extracting specific content. It also supports
418
- custom error handling for JSON parsing failures.
419
-
420
- Args:
421
- data: String, iterable of strings, or iterable of bytes to process.
422
- intro_value: Prefix indicating the start of meaningful data.
423
- to_json: Parse JSON content if ``True``.
424
- skip_markers: Lines containing any of these markers are skipped.
425
- strip_chars: Characters to strip from each line.
426
- start_marker: Begin processing only after this marker is found.
427
- end_marker: Stop processing once this marker is found.
428
- content_extractor: Optional callable to transform parsed content before yielding.
429
- yield_raw_on_error: Yield raw lines when JSON parsing fails.
430
- encoding: Byte stream encoding.
431
- encoding_errors: How to handle encoding errors.
432
- buffer_size: Buffer size for byte decoding.
433
- line_delimiter: Delimiter used to split incoming text into lines. ``None`` uses ``str.splitlines()``.
434
- error_handler: Callback invoked with ``(Exception, str)`` when JSON parsing fails. If the callback returns a value, it is yielded in place of the raw line.
435
- """
436
- if isinstance(data, str):
437
- for item in _sanitize_stream_sync(
438
- data,
439
- intro_value=intro_value,
440
- to_json=to_json,
441
- skip_markers=skip_markers,
442
- strip_chars=strip_chars,
443
- start_marker=start_marker,
444
- end_marker=end_marker,
445
- content_extractor=content_extractor,
446
- yield_raw_on_error=yield_raw_on_error,
447
- encoding=encoding,
448
- encoding_errors=encoding_errors,
449
- buffer_size=buffer_size,
450
- line_delimiter=line_delimiter,
451
- error_handler=error_handler,
452
- ):
453
- yield item
454
- return
455
-
456
- if not hasattr(data, "__aiter__"):
457
- # Fallback to synchronous processing if possible
458
- for item in _sanitize_stream_sync(
459
- data,
460
- intro_value=intro_value,
461
- to_json=to_json,
462
- skip_markers=skip_markers,
463
- strip_chars=strip_chars,
464
- start_marker=start_marker,
465
- end_marker=end_marker,
466
- content_extractor=content_extractor,
467
- yield_raw_on_error=yield_raw_on_error,
468
- encoding=encoding,
469
- encoding_errors=encoding_errors,
470
- buffer_size=buffer_size,
471
- line_delimiter=line_delimiter,
472
- error_handler=error_handler,
473
- ):
474
- yield item
475
- return
476
-
477
- effective_skip_markers = skip_markers or []
478
- processing_active = start_marker is None
479
- buffer = ""
480
- found_start = False if start_marker else True
481
-
482
- iterator = data.__aiter__()
483
- first_item = None
484
- async for first_item in iterator:
485
- break
486
- if first_item is None:
487
- return
488
- async def _chain(first, it):
489
- yield first
490
- async for x in it:
491
- yield x
492
-
493
- stream = _chain(first_item, iterator)
494
-
495
- if isinstance(first_item, bytes):
496
- line_iterator = _decode_byte_stream_async(
497
- stream,
498
- encoding=encoding,
499
- errors=encoding_errors,
500
- buffer_size=buffer_size,
501
- )
502
- elif isinstance(first_item, str):
503
- line_iterator = stream
504
- else:
505
- raise TypeError(
506
- f"Stream must yield strings or bytes, not {type(first_item).__name__}"
507
- )
508
-
509
- async for line in line_iterator:
510
- if not line:
511
- continue
512
- buffer += line
513
- while True:
514
- if not found_start and start_marker:
515
- idx = buffer.find(start_marker)
516
- if idx != -1:
517
- found_start = True
518
- buffer = buffer[idx + len(start_marker) :]
519
- else:
520
- buffer = buffer[-max(len(start_marker), 256) :]
521
- break
522
- if found_start and end_marker:
523
- idx = buffer.find(end_marker)
524
- if idx != -1:
525
- chunk = buffer[:idx]
526
- buffer = buffer[idx + len(end_marker) :]
527
- processing_active = False
528
- else:
529
- chunk = buffer
530
- buffer = ""
531
- processing_active = True
532
- if chunk and processing_active:
533
- for subline in (
534
- chunk.split(line_delimiter)
535
- if line_delimiter is not None
536
- else chunk.splitlines()
537
- ):
538
- result = _process_chunk(
539
- subline,
540
- intro_value,
541
- to_json,
542
- effective_skip_markers,
543
- strip_chars,
544
- yield_raw_on_error,
545
- error_handler,
546
- )
547
- if result is None:
548
- continue
549
- if content_extractor:
550
- try:
551
- final_content = content_extractor(result)
552
- if final_content is not None:
553
- yield final_content
554
- except Exception:
555
- pass
556
- else:
557
- yield result
558
- if not processing_active:
559
- found_start = False
560
- if idx == -1:
561
- break
562
- elif found_start:
563
- chunk = buffer
564
- buffer = ""
565
- if chunk:
566
- for subline in (
567
- chunk.split(line_delimiter)
568
- if line_delimiter is not None
569
- else chunk.splitlines()
570
- ):
571
- result = _process_chunk(
572
- subline,
573
- intro_value,
574
- to_json,
575
- effective_skip_markers,
576
- strip_chars,
577
- yield_raw_on_error,
578
- error_handler,
579
- )
580
- if result is None:
581
- continue
582
- if content_extractor:
583
- try:
584
- final_content = content_extractor(result)
585
- if final_content is not None:
586
- yield final_content
587
- except Exception:
588
- pass
589
- else:
590
- yield result
591
- break
592
- else:
593
- break
594
-
595
-
596
- def sanitize_stream(
597
- data: Union[
598
- str,
599
- bytes,
600
- Iterable[str],
601
- Iterable[bytes],
602
- AsyncIterable[str],
603
- AsyncIterable[bytes],
604
- dict,
605
- list,
606
- int,
607
- float,
608
- bool,
609
- None,
610
- ],
611
- intro_value: str = "data:",
612
- to_json: bool = True,
613
- skip_markers: Optional[List[str]] = None,
614
- strip_chars: Optional[str] = None,
615
- start_marker: Optional[str] = None,
616
- end_marker: Optional[str] = None,
617
- content_extractor: Optional[Callable[[Union[str, Dict[str, Any]]], Optional[Any]]] = None,
618
- yield_raw_on_error: bool = True,
619
- encoding: EncodingType = "utf-8",
620
- encoding_errors: str = "replace",
621
- buffer_size: int = 8192,
622
- line_delimiter: Optional[str] = None,
623
- error_handler: Optional[Callable[[Exception, str], Optional[Any]]] = None,
624
- object_mode: Literal["as_is", "json", "str"] = "json",
625
- raw: bool = False,
626
- ) -> Union[Generator[Any, None, None], AsyncGenerator[Any, None]]:
43
+ Decorator to retry a function on exception.
627
44
  """
628
- Processes streaming data (strings or bytes) in either synchronous or asynchronous mode.
629
- Now supports non-iterable and miscellaneous input types (dict, list, int, float, bool, None).
630
-
631
- Args:
632
- data: The data to be processed. Can be a string, bytes, a synchronous iterable of strings or bytes,
633
- an asynchronous iterable of strings or bytes, or a single object (dict, list, int, float, bool, None).
634
- intro_value (str): Prefix indicating the start of meaningful data. Defaults to "data:".
635
- to_json (bool): Parse JSON content if ``True``. Defaults to True.
636
- skip_markers (Optional[List[str]]): Lines containing any of these markers are skipped. Defaults to None.
637
- strip_chars (Optional[str]): Characters to strip from each line. Defaults to None.
638
- start_marker (Optional[str]): Begin processing only after this marker is found. Defaults to None.
639
- end_marker (Optional[str]): Stop processing once this marker is found. Defaults to None.
640
- content_extractor (Optional[Callable[[Union[str, Dict[str, Any]]], Optional[Any]]]):
641
- Optional callable to transform parsed content before yielding. Defaults to None.
642
- yield_raw_on_error (bool): Yield raw lines when JSON parsing fails. Defaults to True.
643
- encoding (EncodingType): Byte stream encoding. Defaults to "utf-8".
644
- encoding_errors (str): How to handle encoding errors. Defaults to "replace".
645
- buffer_size (int): Buffer size for byte decoding. Defaults to 8192.
646
- line_delimiter (Optional[str]): Delimiter used to split incoming text into lines.
647
- ``None`` uses ``str.splitlines()``. Defaults to None.
648
- error_handler (Optional[Callable[[Exception, str], Optional[Any]]]):
649
- Callback invoked with ``(Exception, str)`` when JSON parsing fails.
650
- If the callback returns a value, it is yielded in place of the raw line. Defaults to None.
651
- object_mode (Literal["as_is", "json", "str"]): How to handle non-string, non-iterable objects.
652
- "json" (default) yields as JSON string, "str" yields as str(obj), "as_is" yields the object as-is.
653
- raw (bool): If True, yields the raw response as returned by the API, chunk by chunk (no splitting or joining).
654
-
655
- Returns:
656
- Union[Generator[Any, None, None], AsyncGenerator[Any, None]]:
657
- A generator or an asynchronous generator yielding the processed data, or raw data if raw=True.
658
- """ # --- RAW MODE: yield each chunk exactly as returned by the API ---
659
- if raw:
660
- def _raw_passthrough_sync(source_iter):
661
- for chunk in source_iter:
662
- if isinstance(chunk, (bytes, bytearray)):
663
- # Decode bytes preserving all whitespace and newlines
664
- yield chunk.decode(encoding, encoding_errors)
665
- elif chunk is not None:
666
- # Yield string chunks as-is, preserving all formatting
667
- yield chunk
668
- # Skip None chunks entirely
669
- async def _raw_passthrough_async(source_aiter):
670
- async for chunk in source_aiter:
671
- if isinstance(chunk, (bytes, bytearray)):
672
- # Decode bytes preserving all whitespace and newlines
673
- yield chunk.decode(encoding, encoding_errors)
674
- elif chunk is not None:
675
- # Yield string chunks as-is, preserving all formatting
676
- yield chunk
677
- # Skip None chunks entirely
678
- # Sync iterable (but not str/bytes)
679
- if hasattr(data, "__iter__") and not isinstance(data, (str, bytes)):
680
- return _raw_passthrough_sync(data)
681
- # Async iterable
682
- if hasattr(data, "__aiter__"):
683
- return _raw_passthrough_async(data)
684
- # Single string or bytes
685
- if isinstance(data, (bytes, bytearray)):
686
- def _yield_single():
687
- yield data.decode(encoding, encoding_errors)
688
- return _yield_single()
689
- else:
690
- def _yield_single():
691
- if data is not None:
692
- yield data
693
- return _yield_single()
694
- # --- END RAW MODE ---
695
-
696
- text_attr = getattr(data, "text", None)
697
- content_attr = getattr(data, "content", None)
698
-
699
- # Handle None
700
- if data is None:
701
- def _empty_gen():
702
- if False:
703
- yield None
704
- return _empty_gen()
705
-
706
- # Handle bytes directly
707
- if isinstance(data, bytes):
708
- try:
709
- payload = data.decode(encoding, encoding_errors)
710
- except Exception:
711
- payload = str(data)
712
- return _sanitize_stream_sync(
713
- payload, intro_value, to_json, skip_markers, strip_chars,
714
- start_marker, end_marker, content_extractor, yield_raw_on_error,
715
- encoding, encoding_errors, buffer_size, line_delimiter, error_handler,
716
- )
717
-
718
- # Handle string directly
719
- if isinstance(data, str):
720
- return _sanitize_stream_sync(
721
- data, intro_value, to_json, skip_markers, strip_chars,
722
- start_marker, end_marker, content_extractor, yield_raw_on_error,
723
- encoding, encoding_errors, buffer_size, line_delimiter, error_handler,
724
- )
725
-
726
- # Handle dict, list, int, float, bool (non-iterable, non-string/bytes)
727
- if isinstance(data, (dict, list, int, float, bool)):
728
- if object_mode == "as_is":
729
- def _as_is_gen():
730
- yield data
731
- return _as_is_gen()
732
- elif object_mode == "str":
733
- return _sanitize_stream_sync(
734
- str(data), intro_value, to_json, skip_markers, strip_chars,
735
- start_marker, end_marker, content_extractor, yield_raw_on_error,
736
- encoding, encoding_errors, buffer_size, line_delimiter, error_handler,
737
- )
738
- else: # "json"
739
- try:
740
- json_str = json.dumps(data)
741
- except Exception:
742
- json_str = str(data)
743
- return _sanitize_stream_sync(
744
- json_str, intro_value, to_json, skip_markers, strip_chars,
745
- start_marker, end_marker, content_extractor, yield_raw_on_error,
746
- encoding, encoding_errors, buffer_size, line_delimiter, error_handler,
747
- )
748
-
749
- # Handle file-like objects (optional, treat as string if .read exists)
750
- if hasattr(data, "read") and callable(data.read):
751
- try:
752
- file_content = data.read()
753
- if isinstance(file_content, bytes):
754
- file_content = file_content.decode(encoding, encoding_errors)
755
- return _sanitize_stream_sync(
756
- file_content, intro_value, to_json, skip_markers, strip_chars,
757
- start_marker, end_marker, content_extractor, yield_raw_on_error,
758
- encoding, encoding_errors, buffer_size, line_delimiter, error_handler,
759
- )
760
- except Exception:
761
- pass # fallback to next
762
-
763
- # Handle .text or .content attributes
764
- if isinstance(text_attr, str):
765
- payload = text_attr
766
- return _sanitize_stream_sync(
767
- payload, intro_value, to_json, skip_markers, strip_chars,
768
- start_marker, end_marker, content_extractor, yield_raw_on_error,
769
- encoding, encoding_errors, buffer_size, line_delimiter, error_handler,
770
- )
771
- elif isinstance(content_attr, bytes):
772
- try:
773
- payload = content_attr.decode(encoding, encoding_errors)
774
- except Exception:
775
- payload = str(content_attr)
776
- return _sanitize_stream_sync(
777
- payload, intro_value, to_json, skip_markers, strip_chars,
778
- start_marker, end_marker, content_extractor, yield_raw_on_error,
779
- encoding, encoding_errors, buffer_size, line_delimiter, error_handler,
780
- )
781
-
782
- # Handle async iterables
783
- if hasattr(data, "__aiter__"):
784
- return _sanitize_stream_async(
785
- data, intro_value, to_json, skip_markers, strip_chars,
786
- start_marker, end_marker, content_extractor, yield_raw_on_error,
787
- encoding, encoding_errors, buffer_size, line_delimiter, error_handler,
788
- )
789
- # Handle sync iterables (but not strings/bytes)
790
- if hasattr(data, "__iter__"):
791
- return _sanitize_stream_sync(
792
- data, intro_value, to_json, skip_markers, strip_chars,
793
- start_marker, end_marker, content_extractor, yield_raw_on_error,
794
- encoding, encoding_errors, buffer_size, line_delimiter, error_handler,
795
- )
796
- # Fallback: treat as string
797
- return _sanitize_stream_sync(
798
- str(data), intro_value, to_json, skip_markers, strip_chars,
799
- start_marker, end_marker, content_extractor, yield_raw_on_error,
800
- encoding, encoding_errors, buffer_size, line_delimiter, error_handler,
801
- )
802
- from .conversation import Conversation # noqa: E402,F401
803
- from .Extra.autocoder import AutoCoder # noqa: E402,F401
804
- from .optimizers import Optimizers # noqa: E402,F401
805
- from .prompt_manager import AwesomePrompts # noqa: E402,F401
45
+ def decorator(func: Callable):
46
+ @functools.wraps(func)
47
+ def wrapper(*args, **kwargs):
48
+ last_exc = None
49
+ for attempt in range(retries):
50
+ try:
51
+ return func(*args, **kwargs)
52
+ except Exception as exc:
53
+ last_exc = exc
54
+ print(f"Attempt {attempt + 1} failed: {exc}. Retrying in {delay} seconds...")
55
+ time.sleep(delay)
56
+ raise last_exc
57
+ return wrapper
58
+ return decorator