webscout 8.3.4__py3-none-any.whl → 8.3.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of webscout might be problematic. Click here for more details.

Files changed (55) hide show
  1. webscout/AIutel.py +52 -1016
  2. webscout/Provider/AISEARCH/__init__.py +11 -10
  3. webscout/Provider/AISEARCH/felo_search.py +7 -3
  4. webscout/Provider/AISEARCH/scira_search.py +2 -0
  5. webscout/Provider/AISEARCH/stellar_search.py +53 -8
  6. webscout/Provider/Deepinfra.py +7 -1
  7. webscout/Provider/OPENAI/TogetherAI.py +57 -48
  8. webscout/Provider/OPENAI/TwoAI.py +94 -1
  9. webscout/Provider/OPENAI/__init__.py +0 -2
  10. webscout/Provider/OPENAI/deepinfra.py +6 -0
  11. webscout/Provider/OPENAI/scirachat.py +4 -0
  12. webscout/Provider/OPENAI/textpollinations.py +11 -7
  13. webscout/Provider/OPENAI/venice.py +1 -0
  14. webscout/Provider/Perplexitylabs.py +163 -147
  15. webscout/Provider/Qodo.py +30 -6
  16. webscout/Provider/TTI/__init__.py +1 -0
  17. webscout/Provider/TTI/together.py +7 -6
  18. webscout/Provider/TTI/venice.py +368 -0
  19. webscout/Provider/TextPollinationsAI.py +11 -7
  20. webscout/Provider/TogetherAI.py +57 -44
  21. webscout/Provider/TwoAI.py +96 -2
  22. webscout/Provider/TypliAI.py +33 -27
  23. webscout/Provider/UNFINISHED/PERPLEXED_search.py +254 -0
  24. webscout/Provider/UNFINISHED/fetch_together_models.py +6 -11
  25. webscout/Provider/Venice.py +1 -0
  26. webscout/Provider/WiseCat.py +18 -20
  27. webscout/Provider/__init__.py +0 -6
  28. webscout/Provider/scira_chat.py +4 -0
  29. webscout/Provider/toolbaz.py +5 -10
  30. webscout/Provider/typefully.py +1 -11
  31. webscout/__init__.py +3 -15
  32. webscout/auth/__init__.py +19 -4
  33. webscout/auth/api_key_manager.py +189 -189
  34. webscout/auth/auth_system.py +25 -40
  35. webscout/auth/config.py +105 -6
  36. webscout/auth/database.py +377 -22
  37. webscout/auth/models.py +185 -130
  38. webscout/auth/request_processing.py +175 -11
  39. webscout/auth/routes.py +99 -2
  40. webscout/auth/server.py +9 -2
  41. webscout/auth/simple_logger.py +236 -0
  42. webscout/sanitize.py +1074 -0
  43. webscout/version.py +1 -1
  44. {webscout-8.3.4.dist-info → webscout-8.3.5.dist-info}/METADATA +9 -149
  45. {webscout-8.3.4.dist-info → webscout-8.3.5.dist-info}/RECORD +49 -51
  46. webscout/Provider/OPENAI/README_AUTOPROXY.md +0 -238
  47. webscout/Provider/OPENAI/typegpt.py +0 -368
  48. webscout/Provider/OPENAI/uncovrAI.py +0 -477
  49. webscout/Provider/WritingMate.py +0 -273
  50. webscout/Provider/typegpt.py +0 -284
  51. webscout/Provider/uncovr.py +0 -333
  52. {webscout-8.3.4.dist-info → webscout-8.3.5.dist-info}/WHEEL +0 -0
  53. {webscout-8.3.4.dist-info → webscout-8.3.5.dist-info}/entry_points.txt +0 -0
  54. {webscout-8.3.4.dist-info → webscout-8.3.5.dist-info}/licenses/LICENSE.md +0 -0
  55. {webscout-8.3.4.dist-info → webscout-8.3.5.dist-info}/top_level.txt +0 -0
webscout/AIutel.py CHANGED
@@ -1,1022 +1,58 @@
1
- import codecs
2
- import json
3
- import re
4
- from typing import (
5
- Any,
6
- AsyncGenerator,
7
- AsyncIterable,
8
- Callable,
9
- Dict,
10
- Generator,
11
- Iterable,
12
- List,
13
- Literal,
14
- Optional,
15
- Pattern,
16
- Union,
17
- )
18
-
19
- # Expanded encoding types
20
- EncodingType = Literal['utf-8', 'utf-16', 'utf-32', 'ascii', 'latin1', 'cp1252', 'iso-8859-1',
21
- 'iso-8859-2', 'windows-1250', 'windows-1251', 'windows-1252', 'gbk', 'big5',
22
- 'shift_jis', 'euc-jp', 'euc-kr']
23
-
24
- def _compile_regexes(patterns: Optional[List[Union[str, Pattern[str]]]]) -> Optional[List[Pattern[str]]]:
25
- """
26
- Compile regex patterns from strings or return compiled patterns as-is.
27
-
28
- Args:
29
- patterns: List of regex patterns as strings or compiled Pattern objects.
30
-
31
- Returns:
32
- List of compiled Pattern objects, or None if input is None.
33
-
34
- Raises:
35
- ValueError: If any pattern is invalid.
36
- """
37
- if not patterns:
38
- return None
39
-
40
- compiled_patterns = []
41
- for i, pattern in enumerate(patterns):
42
- try:
43
- if isinstance(pattern, str):
44
- compiled_patterns.append(re.compile(pattern))
45
- elif isinstance(pattern, Pattern):
46
- compiled_patterns.append(pattern)
47
- else:
48
- raise ValueError(f"Pattern at index {i} must be a string or compiled regex pattern, got {type(pattern)}")
49
- except re.error as e:
50
- raise ValueError(f"Invalid regex pattern at index {i}: '{pattern}' - {str(e)}")
51
-
52
- return compiled_patterns
53
-
54
- def _process_chunk(
55
- chunk: str,
56
- intro_value: str,
57
- to_json: bool,
58
- skip_markers: List[str],
59
- strip_chars: Optional[str],
60
- yield_raw_on_error: bool,
61
- error_handler: Optional[Callable[[Exception, str], Optional[Any]]] = None,
62
- skip_regexes: Optional[List[Pattern[str]]] = None,
63
- extract_regexes: Optional[List[Pattern[str]]] = None,
64
- ) -> Union[str, Dict[str, Any], None]:
65
- """
66
- Sanitizes and potentially parses a single chunk of text.
67
-
68
- This function performs several operations on the input chunk:
69
- - Removes a specified prefix (`intro_value`).
70
- - Strips leading/trailing characters (`strip_chars`).
71
- - Skips chunks matching specific markers (`skip_markers`).
72
- - Skips chunks matching regex patterns (`skip_regexes`).
73
- - Extracts content using regex capturing groups (`extract_regexes`).
74
- - Optionally parses the chunk as JSON (`to_json`).
75
- - Handles JSON parsing errors with an optional callback (`error_handler`).
76
-
77
- Args:
78
- chunk (str): The chunk of text to process.
79
- intro_value (str): The prefix to remove from the chunk.
80
- to_json (bool): If True, attempts to parse the chunk as JSON.
81
- skip_markers (List[str]): A list of markers; chunks matching these are skipped.
82
- strip_chars (Optional[str]): Characters to strip from the beginning and end of the chunk.
83
- yield_raw_on_error (bool): If True, returns the raw chunk when JSON parsing fails; otherwise, returns None.
84
- error_handler (Optional[Callable[[Exception, str], Optional[Any]]]): An optional callback function that is called when JSON parsing fails.
85
- It receives the exception and the sanitized chunk as arguments. It should return a value to yield instead of the raw chunk, or None to ignore.
86
- skip_regexes (Optional[List[Pattern[str]]]): A list of compiled regex patterns; chunks matching any of these are skipped.
87
- extract_regexes (Optional[List[Pattern[str]]]): A list of compiled regex patterns for extracting content using capturing groups.
88
-
89
- """
90
- if not isinstance(chunk, str):
91
- return None
92
-
93
- # Fast path for empty chunks
94
- if not chunk:
95
- return None
96
-
97
- # Use slicing for prefix removal (faster than startswith+slicing)
98
- sanitized_chunk = chunk
99
- if intro_value and len(chunk) >= len(intro_value) and chunk[:len(intro_value)] == intro_value:
100
- sanitized_chunk = chunk[len(intro_value):]
101
-
102
- # Optimize string stripping operations
103
- if strip_chars is not None:
104
- sanitized_chunk = sanitized_chunk.strip(strip_chars)
105
- else:
106
- # lstrip() is faster than strip() when we only need leading whitespace removed
107
- sanitized_chunk = sanitized_chunk.lstrip()
108
-
109
- # Skip empty chunks and markers
110
- if not sanitized_chunk or any(marker == sanitized_chunk for marker in skip_markers):
111
- return None
112
-
113
- # Apply regex-based extraction first (if provided)
114
- if extract_regexes:
115
- for regex in extract_regexes:
116
- match = regex.search(sanitized_chunk)
117
- if match:
118
- # If there are capturing groups, return the first group or all groups as a tuple
119
- if match.groups():
120
- if len(match.groups()) == 1:
121
- sanitized_chunk = match.group(1)
122
- else:
123
- # Multiple groups - return as tuple converted to string for JSON compatibility
124
- sanitized_chunk = str(match.groups())
125
- else:
126
- # No capturing groups, return the full match
127
- sanitized_chunk = match.group(0)
128
- break # Use first matching extraction regex
129
-
130
- # Apply regex-based skipping (after extraction)
131
- if skip_regexes:
132
- if any(regex.search(sanitized_chunk) for regex in skip_regexes):
133
- return None
134
-
135
- # JSON parsing with optimized error handling
136
- if to_json:
137
- try:
138
- # Only strip before JSON parsing if needed
139
- if sanitized_chunk[0] not in '{[' or sanitized_chunk[-1] not in '}]':
140
- sanitized_chunk = sanitized_chunk.strip()
141
- return json.loads(sanitized_chunk)
142
- except (json.JSONDecodeError, Exception) as e:
143
- if error_handler:
144
- try:
145
- handled = error_handler(e, sanitized_chunk)
146
- if handled is not None:
147
- return handled
148
- except Exception:
149
- pass
150
- return sanitized_chunk if yield_raw_on_error else None
151
-
152
- return sanitized_chunk
153
-
154
- def _decode_byte_stream(
155
- byte_iterator: Iterable[bytes],
156
- encoding: EncodingType = 'utf-8',
157
- errors: str = 'replace',
158
- buffer_size: int = 8192
159
- ) -> Generator[str, None, None]:
160
- """
161
- Decodes a byte stream in realtime with flexible encoding support.
162
-
163
- This function takes an iterator of bytes and decodes it into a stream of strings
164
- using the specified character encoding. It handles encoding errors gracefully
165
- and can be tuned for performance with the `buffer_size` parameter.
166
-
167
- Args:
168
- byte_iterator (Iterable[bytes]): An iterator that yields chunks of bytes.
169
- encoding (EncodingType): The character encoding to use for decoding.
170
- Defaults to 'utf-8'. Supports a wide range of encodings, including:
171
- 'utf-8', 'utf-16', 'utf-32', 'ascii', 'latin1', 'cp1252', 'iso-8859-1',
172
- 'iso-8859-2', 'windows-1250', 'windows-1251', 'windows-1252', 'gbk', 'big5',
173
- 'shift_jis', 'euc-jp', 'euc-kr'.
174
- errors (str): Specifies how encoding errors should be handled.
175
- Options are 'strict' (raises an error), 'ignore' (skips the error), and
176
- 'replace' (replaces the erroneous byte with a replacement character).
177
- Defaults to 'replace'.
178
- buffer_size (int): The size of the internal buffer used for decoding.
179
-
180
- """
181
- # Initialize decoder with the specified encoding
182
- try:
183
- decoder = codecs.getincrementaldecoder(encoding)(errors=errors)
184
- except LookupError:
185
- # Fallback to utf-8 if the encoding is not supported
186
- decoder = codecs.getincrementaldecoder('utf-8')(errors=errors)
187
-
188
- # Process byte stream in realtime
189
- buffer = bytearray(buffer_size)
190
- buffer_view = memoryview(buffer)
191
-
192
- for chunk_bytes in byte_iterator:
193
- if not chunk_bytes:
194
- continue
195
-
196
- try:
197
- # Use buffer for processing if chunk size is appropriate
198
- if len(chunk_bytes) <= buffer_size:
199
- buffer[:len(chunk_bytes)] = chunk_bytes
200
- text = decoder.decode(buffer_view[:len(chunk_bytes)], final=False)
201
- else:
202
- text = decoder.decode(chunk_bytes, final=False)
203
-
204
- if text:
205
- yield text
206
- except UnicodeDecodeError:
207
- yield f"[Encoding Error: Could not decode bytes with {encoding}]\n"
208
-
209
- # Final flush
210
- try:
211
- final_text = decoder.decode(b'', final=True)
212
- if final_text:
213
- yield final_text
214
- except UnicodeDecodeError:
215
- yield f"[Encoding Error: Could not decode final bytes with {encoding}]\n"
216
-
217
- async def _decode_byte_stream_async(
218
- byte_iterator: Iterable[bytes],
219
- encoding: EncodingType = 'utf-8',
220
- errors: str = 'replace',
221
- buffer_size: int = 8192
222
- ) -> AsyncGenerator[str, None]:
223
- """
224
- Asynchronously decodes a byte stream with flexible encoding support.
225
-
226
- This function is the asynchronous counterpart to `_decode_byte_stream`. It takes
227
- an asynchronous iterator of bytes and decodes it into a stream of strings using
228
- the specified character encoding. It handles encoding errors gracefully and can
229
- be tuned for performance with the `buffer_size` parameter.
230
-
231
- Args:
232
- byte_iterator (Iterable[bytes]): An asynchronous iterator that yields chunks of bytes.
233
- encoding (EncodingType): The character encoding to use for decoding.
234
- Defaults to 'utf-8'. Supports a wide range of encodings, including:
235
- 'utf-8', 'utf-16', 'utf-32', 'ascii', 'latin1', 'cp1252', 'iso-8859-1',
236
- 'iso-8859-2', 'windows-1250', 'windows-1251', 'windows-1252', 'gbk', 'big5',
237
- 'shift_jis', 'euc-jp', 'euc-kr'.
238
- errors (str): Specifies how encoding errors should be handled.
239
- Options are 'strict' (raises an error), 'ignore' (skips the error), and
240
- 'replace' (replaces the erroneous byte with a replacement character).
241
- Defaults to 'replace'.
242
- buffer_size (int): The size of the internal buffer used for decoding.
243
- """
244
- try:
245
- decoder = codecs.getincrementaldecoder(encoding)(errors=errors)
246
- except LookupError:
247
- decoder = codecs.getincrementaldecoder('utf-8')(errors=errors)
248
-
249
- buffer = bytearray(buffer_size)
250
- buffer_view = memoryview(buffer)
251
-
252
- async for chunk_bytes in byte_iterator:
253
- if not chunk_bytes:
254
- continue
255
- try:
256
- if len(chunk_bytes) <= buffer_size:
257
- buffer[:len(chunk_bytes)] = chunk_bytes
258
- text = decoder.decode(buffer_view[:len(chunk_bytes)], final=False)
259
- else:
260
- text = decoder.decode(chunk_bytes, final=False)
261
- if text:
262
- yield text
263
- except UnicodeDecodeError:
264
- yield f"[Encoding Error: Could not decode bytes with {encoding}]\n"
265
-
266
- try:
267
- final_text = decoder.decode(b'', final=True)
268
- if final_text:
269
- yield final_text
270
- except UnicodeDecodeError:
271
- yield f"[Encoding Error: Could not decode final bytes with {encoding}]\n"
272
-
273
- def _sanitize_stream_sync(
274
- data: Union[str, Iterable[str], Iterable[bytes]],
275
- intro_value: str = "data:",
276
- to_json: bool = True,
277
- skip_markers: Optional[List[str]] = None,
278
- strip_chars: Optional[str] = None,
279
- start_marker: Optional[str] = None,
280
- end_marker: Optional[str] = None,
281
- content_extractor: Optional[Callable[[Union[str, Dict[str, Any]]], Optional[Any]]] = None,
282
- yield_raw_on_error: bool = True,
283
- encoding: EncodingType = 'utf-8',
284
- encoding_errors: str = 'replace',
285
- buffer_size: int = 8192,
286
- line_delimiter: Optional[str] = None,
287
- error_handler: Optional[Callable[[Exception, str], Optional[Any]]] = None,
288
- skip_regexes: Optional[List[Union[str, Pattern[str]]]] = None,
289
- extract_regexes: Optional[List[Union[str, Pattern[str]]]] = None,
290
- ) -> Generator[Any, None, None]:
291
- """
292
- Processes a stream of data (strings or bytes) in real-time, applying various transformations and filtering.
293
-
294
- This function is designed to handle streaming data, allowing for operations such as
295
- prefix removal, JSON parsing, skipping lines based on markers, regex-based filtering,
296
- and extracting specific content. It also supports custom error handling for JSON parsing failures.
297
-
298
- Args:
299
- data: String, iterable of strings, or iterable of bytes to process.
300
- intro_value: Prefix indicating the start of meaningful data.
301
- to_json: Parse the chunk as JSON if True.
302
- skip_markers: Lines containing any of these markers are skipped.
303
- strip_chars: Characters to strip from each line.
304
- start_marker: Begin processing only after this marker is found.
305
- end_marker: Stop processing once this marker is found.
306
- content_extractor: Optional callable to transform parsed content before yielding.
307
- yield_raw_on_error: Yield raw lines when JSON parsing fails.
308
- encoding: Byte stream encoding.
309
- encoding_errors: How to handle encoding errors.
310
- buffer_size: Buffer size for byte decoding.
311
- line_delimiter: Delimiter used to split incoming text into lines. ``None``
312
- uses ``str.splitlines()``.
313
- error_handler: Callback invoked with ``(Exception, str)`` when JSON
314
- parsing fails. If the callback returns a value, it is yielded instead of the raw line.
315
- skip_regexes: List of regex patterns (strings or compiled) for skipping lines that match.
316
- extract_regexes: List of regex patterns (strings or compiled) for extracting content using capturing groups.
317
-
318
- Yields:
319
- Any: Processed data, which can be a string, a dictionary (if `to_json` is True), or the result of `content_extractor`.
320
-
321
- Raises:
322
- TypeError: If the input `data` is not a string or an iterable.
323
- ValueError: If any regex pattern is invalid.
324
- """
325
- effective_skip_markers = skip_markers or []
326
- # Compile regex patterns
327
- compiled_skip_regexes = _compile_regexes(skip_regexes)
328
- compiled_extract_regexes = _compile_regexes(extract_regexes)
329
-
330
- processing_active = start_marker is None
331
- buffer = ""
332
- found_start = False if start_marker else True
333
- line_iterator: Iterable[str]
334
-
335
- if isinstance(data, str):
336
- # If data is a string, decide whether to split it into lines
337
- # or treat it as an iterable containing a single chunk.
338
- temp_lines: List[str]
339
- if line_delimiter is None: # Default: split by newlines if present
340
- if '\n' in data or '\r' in data:
341
- temp_lines = data.splitlines()
342
- else:
343
- temp_lines = [data] # Treat as a single line/chunk
344
- elif line_delimiter in data: # Custom delimiter found in string
345
- temp_lines = data.split(line_delimiter)
346
- else: # Custom delimiter not found, or string is effectively a single segment
347
- temp_lines = [data]
348
- line_iterator = iter(temp_lines)
349
- elif hasattr(data, '__iter__'): # data is an iterable (but not a string)
350
- _iter = iter(data)
351
- first_item = next(_iter, None)
352
-
353
- if first_item is None: # Iterable was empty
354
- return
355
-
356
- from itertools import chain
357
- # Reconstruct the full iterable including the first_item
358
- stream_input_iterable = chain([first_item], _iter)
359
-
360
- if isinstance(first_item, bytes):
361
- # Ensure stream_input_iterable is typed as Iterable[bytes] for _decode_byte_stream
362
- line_iterator = _decode_byte_stream(
363
- stream_input_iterable, # type: ignore
364
- encoding=encoding,
365
- errors=encoding_errors,
366
- buffer_size=buffer_size
367
- )
368
- elif isinstance(first_item, str):
369
- # Ensure stream_input_iterable is typed as Iterable[str]
370
- line_iterator = stream_input_iterable # type: ignore
371
- else:
372
- raise TypeError(f"Iterable must yield strings or bytes, not {type(first_item).__name__}")
373
- else: # Not a string and not an iterable
374
- raise TypeError(f"Input must be a string or an iterable, not {type(data).__name__}")
375
-
376
- try:
377
- for line in line_iterator:
378
- if not line:
379
- continue
380
- buffer += line
381
- while True:
382
- # Look for start marker if needed
383
- if not found_start and start_marker:
384
- idx = buffer.find(start_marker)
385
- if idx != -1:
386
- found_start = True
387
- buffer = buffer[idx + len(start_marker):]
388
- else:
389
- # Not found, keep buffering
390
- buffer = buffer[-max(len(start_marker), 256):] # avoid unbounded growth
391
- break
392
- # Look for end marker if needed
393
- if found_start and end_marker:
394
- idx = buffer.find(end_marker)
395
- if idx != -1:
396
- chunk = buffer[:idx]
397
- buffer = buffer[idx + len(end_marker):]
398
- processing_active = False
399
- else:
400
- chunk = buffer
401
- buffer = ""
402
- processing_active = True
403
- # Process chunk if we are in active region
404
- if chunk and processing_active:
405
- for subline in (chunk.split(line_delimiter) if line_delimiter is not None else chunk.splitlines()):
406
- result = _process_chunk(
407
- subline,
408
- intro_value,
409
- to_json,
410
- effective_skip_markers,
411
- strip_chars,
412
- yield_raw_on_error,
413
- error_handler,
414
- compiled_skip_regexes,
415
- compiled_extract_regexes,
416
- )
417
- if result is None:
418
- continue
419
- if content_extractor:
420
- try:
421
- final_content = content_extractor(result)
422
- if final_content is not None:
423
- yield final_content
424
- except Exception:
425
- pass
426
- else:
427
- yield result
428
- if not processing_active:
429
- found_start = False
430
- if idx == -1:
431
- break
432
- elif found_start:
433
- # No end marker, process all buffered content
434
- chunk = buffer
435
- buffer = ""
436
- if chunk:
437
- for subline in (chunk.split(line_delimiter) if line_delimiter is not None else chunk.splitlines()):
438
- result = _process_chunk(
439
- subline,
440
- intro_value,
441
- to_json,
442
- effective_skip_markers,
443
- strip_chars,
444
- yield_raw_on_error,
445
- error_handler,
446
- compiled_skip_regexes,
447
- compiled_extract_regexes,
448
- )
449
- if result is None:
450
- continue
451
- if content_extractor:
452
- try:
453
- final_content = content_extractor(result)
454
- if final_content is not None:
455
- yield final_content
456
- except Exception:
457
- pass
458
- else:
459
- yield result
460
- break
461
- else:
462
- break
463
- except Exception as e:
464
- import sys
465
- print(f"Stream processing error: {str(e)}", file=sys.stderr)
466
-
467
-
468
- async def _sanitize_stream_async(
469
- data: Union[str, Iterable[str], Iterable[bytes]],
470
- intro_value: str = "data:",
471
- to_json: bool = True,
472
- skip_markers: Optional[List[str]] = None,
473
- strip_chars: Optional[str] = None,
474
- start_marker: Optional[str] = None,
475
- end_marker: Optional[str] = None,
476
- content_extractor: Optional[Callable[[Union[str, Dict[str, Any]]], Optional[Any]]] = None,
477
- yield_raw_on_error: bool = True,
478
- encoding: EncodingType = 'utf-8',
479
- encoding_errors: str = 'replace',
480
- buffer_size: int = 8192,
481
- line_delimiter: Optional[str] = None,
482
- error_handler: Optional[Callable[[Exception, str], Optional[Any]]] = None,
483
- skip_regexes: Optional[List[Union[str, Pattern[str]]]] = None,
484
- extract_regexes: Optional[List[Union[str, Pattern[str]]]] = None,
485
- ) -> AsyncGenerator[Any, None]:
486
- """
487
- Asynchronously processes a stream of data (strings or bytes), applying transformations and filtering.
488
-
489
- This function is the asynchronous counterpart to `_sanitize_stream_sync`. It handles
490
- streaming data, allowing for operations such as prefix removal, JSON parsing,
491
- skipping lines based on markers, regex-based filtering, and extracting specific content.
492
- It also supports custom error handling for JSON parsing failures.
493
-
494
- Args:
495
- data: String, iterable of strings, or iterable of bytes to process.
496
- intro_value: Prefix indicating the start of meaningful data.
497
- to_json: Parse JSON content if ``True``.
498
- skip_markers: Lines containing any of these markers are skipped.
499
- strip_chars: Characters to strip from each line.
500
- start_marker: Begin processing only after this marker is found.
501
- end_marker: Stop processing once this marker is found.
502
- content_extractor: Optional callable to transform parsed content before yielding.
503
- yield_raw_on_error: Yield raw lines when JSON parsing fails.
504
- encoding: Byte stream encoding.
505
- encoding_errors: How to handle encoding errors.
506
- buffer_size: Buffer size for byte decoding.
507
- line_delimiter: Delimiter used to split incoming text into lines. ``None`` uses ``str.splitlines()``.
508
- error_handler: Callback invoked with ``(Exception, str)`` when JSON parsing fails. If the callback returns a value, it is yielded in place of the raw line.
509
- skip_regexes: List of regex patterns (strings or compiled) for skipping lines that match.
510
- extract_regexes: List of regex patterns (strings or compiled) for extracting content using capturing groups.
511
- """
512
- if isinstance(data, str):
513
- for item in _sanitize_stream_sync(
514
- data,
515
- intro_value=intro_value,
516
- to_json=to_json,
517
- skip_markers=skip_markers,
518
- strip_chars=strip_chars,
519
- start_marker=start_marker,
520
- end_marker=end_marker,
521
- content_extractor=content_extractor,
522
- yield_raw_on_error=yield_raw_on_error,
523
- encoding=encoding,
524
- encoding_errors=encoding_errors,
525
- buffer_size=buffer_size,
526
- line_delimiter=line_delimiter,
527
- error_handler=error_handler,
528
- skip_regexes=skip_regexes,
529
- extract_regexes=extract_regexes,
530
- ):
531
- yield item
532
- return
533
-
534
- if not hasattr(data, "__aiter__"):
535
- # Fallback to synchronous processing if possible
536
- for item in _sanitize_stream_sync(
537
- data,
538
- intro_value=intro_value,
539
- to_json=to_json,
540
- skip_markers=skip_markers,
541
- strip_chars=strip_chars,
542
- start_marker=start_marker,
543
- end_marker=end_marker,
544
- content_extractor=content_extractor,
545
- yield_raw_on_error=yield_raw_on_error,
546
- encoding=encoding,
547
- encoding_errors=encoding_errors,
548
- buffer_size=buffer_size,
549
- line_delimiter=line_delimiter,
550
- error_handler=error_handler,
551
- skip_regexes=skip_regexes,
552
- extract_regexes=extract_regexes,
553
- ):
554
- yield item
555
- return
556
-
557
- effective_skip_markers = skip_markers or []
558
- # Compile regex patterns
559
- compiled_skip_regexes = _compile_regexes(skip_regexes)
560
- compiled_extract_regexes = _compile_regexes(extract_regexes)
561
-
562
- processing_active = start_marker is None
563
- buffer = ""
564
- found_start = False if start_marker else True
565
-
566
- iterator = data.__aiter__()
567
- first_item = None
568
- async for first_item in iterator:
569
- break
570
- if first_item is None:
571
- return
572
- async def _chain(first, it):
573
- yield first
574
- async for x in it:
575
- yield x
1
+ from .sanitize import * # noqa: E402,F401
2
+ from .conversation import Conversation # noqa: E402,F401
3
+ from .Extra.autocoder import AutoCoder # noqa: E402,F401
4
+ from .optimizers import Optimizers # noqa: E402,F401
5
+ from .prompt_manager import AwesomePrompts # noqa: E402,F401
576
6
 
577
- stream = _chain(first_item, iterator)
7
+ # --- Utility Decorators ---
8
+ from typing import Callable
9
+ import time
10
+ import functools
578
11
 
579
- if isinstance(first_item, bytes):
580
- line_iterator = _decode_byte_stream_async(
581
- stream,
582
- encoding=encoding,
583
- errors=encoding_errors,
584
- buffer_size=buffer_size,
585
- )
586
- elif isinstance(first_item, str):
587
- line_iterator = stream
12
+ def timeIt(func: Callable):
13
+ """
14
+ Decorator to measure execution time of a function (sync or async).
15
+ Prints: - Execution time for '{func.__name__}' : {elapsed:.6f} Seconds.
16
+ """
17
+ import asyncio
18
+ GREEN_BOLD = "\033[1;92m"
19
+ RESET = "\033[0m"
20
+ @functools.wraps(func)
21
+ def sync_wrapper(*args, **kwargs):
22
+ start_time = time.time()
23
+ result = func(*args, **kwargs)
24
+ end_time = time.time()
25
+ print(f"{GREEN_BOLD}- Execution time for '{func.__name__}' : {end_time - start_time:.6f} Seconds. {RESET}\n")
26
+ return result
27
+
28
+ @functools.wraps(func)
29
+ async def async_wrapper(*args, **kwargs):
30
+ start_time = time.time()
31
+ result = await func(*args, **kwargs)
32
+ end_time = time.time()
33
+ print(f"{GREEN_BOLD}- Execution time for '{func.__name__}' : {end_time - start_time:.6f} Seconds. {RESET}\n")
34
+ return result
35
+
36
+ if asyncio.iscoroutinefunction(func):
37
+ return async_wrapper
588
38
  else:
589
- raise TypeError(
590
- f"Stream must yield strings or bytes, not {type(first_item).__name__}"
591
- )
592
-
593
- async for line in line_iterator:
594
- if not line:
595
- continue
596
- buffer += line
597
- while True:
598
- if not found_start and start_marker:
599
- idx = buffer.find(start_marker)
600
- if idx != -1:
601
- found_start = True
602
- buffer = buffer[idx + len(start_marker) :]
603
- else:
604
- buffer = buffer[-max(len(start_marker), 256) :]
605
- break
606
- if found_start and end_marker:
607
- idx = buffer.find(end_marker)
608
- if idx != -1:
609
- chunk = buffer[:idx]
610
- buffer = buffer[idx + len(end_marker) :]
611
- processing_active = False
612
- else:
613
- chunk = buffer
614
- buffer = ""
615
- processing_active = True
616
- if chunk and processing_active:
617
- for subline in (
618
- chunk.split(line_delimiter)
619
- if line_delimiter is not None
620
- else chunk.splitlines()
621
- ):
622
- result = _process_chunk(
623
- subline,
624
- intro_value,
625
- to_json,
626
- effective_skip_markers,
627
- strip_chars,
628
- yield_raw_on_error,
629
- error_handler,
630
- compiled_skip_regexes,
631
- compiled_extract_regexes,
632
- )
633
- if result is None:
634
- continue
635
- if content_extractor:
636
- try:
637
- final_content = content_extractor(result)
638
- if final_content is not None:
639
- yield final_content
640
- except Exception:
641
- pass
642
- else:
643
- yield result
644
- if not processing_active:
645
- found_start = False
646
- if idx == -1:
647
- break
648
- elif found_start:
649
- chunk = buffer
650
- buffer = ""
651
- if chunk:
652
- for subline in (
653
- chunk.split(line_delimiter)
654
- if line_delimiter is not None
655
- else chunk.splitlines()
656
- ):
657
- result = _process_chunk(
658
- subline,
659
- intro_value,
660
- to_json,
661
- effective_skip_markers,
662
- strip_chars,
663
- yield_raw_on_error,
664
- error_handler,
665
- compiled_skip_regexes,
666
- compiled_extract_regexes,
667
- )
668
- if result is None:
669
- continue
670
- if content_extractor:
671
- try:
672
- final_content = content_extractor(result)
673
- if final_content is not None:
674
- yield final_content
675
- except Exception:
676
- pass
677
- else:
678
- yield result
679
- break
680
- else:
681
- break
682
-
39
+ return sync_wrapper
683
40
 
684
- def sanitize_stream(
685
- data: Union[
686
- str,
687
- bytes,
688
- Iterable[str],
689
- Iterable[bytes],
690
- AsyncIterable[str],
691
- AsyncIterable[bytes],
692
- dict,
693
- list,
694
- int,
695
- float,
696
- bool,
697
- None,
698
- ],
699
- intro_value: str = "data:",
700
- to_json: bool = True,
701
- skip_markers: Optional[List[str]] = None,
702
- strip_chars: Optional[str] = None,
703
- start_marker: Optional[str] = None,
704
- end_marker: Optional[str] = None,
705
- content_extractor: Optional[Callable[[Union[str, Dict[str, Any]]], Optional[Any]]] = None,
706
- yield_raw_on_error: bool = True,
707
- encoding: EncodingType = "utf-8",
708
- encoding_errors: str = "replace",
709
- buffer_size: int = 8192,
710
- line_delimiter: Optional[str] = None,
711
- error_handler: Optional[Callable[[Exception, str], Optional[Any]]] = None,
712
- skip_regexes: Optional[List[Union[str, Pattern[str]]]] = None,
713
- extract_regexes: Optional[List[Union[str, Pattern[str]]]] = None,
714
- object_mode: Literal["as_is", "json", "str"] = "json",
715
- raw: bool = False,
716
- ) -> Union[Generator[Any, None, None], AsyncGenerator[Any, None]]:
41
+ def retry(retries: int = 3, delay: float = 1) -> Callable:
717
42
  """
718
- Processes streaming data (strings or bytes) in either synchronous or asynchronous mode.
719
- Now supports non-iterable and miscellaneous input types (dict, list, int, float, bool, None).
720
- Includes regex-based content filtering and extraction capabilities.
721
-
722
- Args:
723
- data: The data to be processed. Can be a string, bytes, a synchronous iterable of strings or bytes,
724
- an asynchronous iterable of strings or bytes, or a single object (dict, list, int, float, bool, None).
725
- intro_value (str): Prefix indicating the start of meaningful data. Defaults to "data:".
726
- to_json (bool): Parse JSON content if ``True``. Defaults to True.
727
- skip_markers (Optional[List[str]]): Lines containing any of these markers are skipped. Defaults to None.
728
- strip_chars (Optional[str]): Characters to strip from each line. Defaults to None.
729
- start_marker (Optional[str]): Begin processing only after this marker is found. Defaults to None.
730
- end_marker (Optional[str]): Stop processing once this marker is found. Defaults to None.
731
- content_extractor (Optional[Callable[[Union[str, Dict[str, Any]]], Optional[Any]]]):
732
- Optional callable to transform parsed content before yielding. Defaults to None.
733
- yield_raw_on_error (bool): Yield raw lines when JSON parsing fails. Defaults to True.
734
- encoding (EncodingType): Byte stream encoding. Defaults to "utf-8".
735
- encoding_errors (str): How to handle encoding errors. Defaults to "replace".
736
- buffer_size (int): Buffer size for byte decoding. Defaults to 8192.
737
- line_delimiter (Optional[str]): Delimiter used to split incoming text into lines.
738
- ``None`` uses ``str.splitlines()``. Defaults to None.
739
- error_handler (Optional[Callable[[Exception, str], Optional[Any]]]):
740
- Callback invoked with ``(Exception, str)`` when JSON parsing fails.
741
- If the callback returns a value, it is yielded in place of the raw line. Defaults to None.
742
- skip_regexes (Optional[List[Union[str, Pattern[str]]]]): List of regex patterns (strings or compiled)
743
- for skipping lines that match any pattern. Defaults to None.
744
- extract_regexes (Optional[List[Union[str, Pattern[str]]]]): List of regex patterns (strings or compiled)
745
- for extracting content using capturing groups. If multiple groups are captured, they are returned as a tuple string. Defaults to None.
746
- object_mode (Literal["as_is", "json", "str"]): How to handle non-string, non-iterable objects.
747
- "json" (default) yields as JSON string, "str" yields as str(obj), "as_is" yields the object as-is.
748
- raw (bool): If True, yields the raw response as returned by the API, chunk by chunk (no splitting or joining).
749
-
750
- Returns:
751
- Union[Generator[Any, None, None], AsyncGenerator[Any, None]]:
752
- A generator or an asynchronous generator yielding the processed data, or raw data if raw=True.
753
-
754
- Raises:
755
- ValueError: If any regex pattern is invalid.
756
- """ # --- RAW MODE: yield each chunk exactly as returned by the API ---
757
- if raw:
758
- def _raw_passthrough_sync(source_iter):
759
- for chunk in source_iter:
760
- if isinstance(chunk, (bytes, bytearray)):
761
- # Decode bytes preserving all whitespace and newlines
762
- yield chunk.decode(encoding, encoding_errors)
763
- elif chunk is not None:
764
- # Yield string chunks as-is, preserving all formatting
765
- yield chunk
766
- # Skip None chunks entirely
767
- async def _raw_passthrough_async(source_aiter):
768
- async for chunk in source_aiter:
769
- if isinstance(chunk, (bytes, bytearray)):
770
- # Decode bytes preserving all whitespace and newlines
771
- yield chunk.decode(encoding, encoding_errors)
772
- elif chunk is not None:
773
- # Yield string chunks as-is, preserving all formatting
774
- yield chunk
775
- # Skip None chunks entirely
776
- # Sync iterable (but not str/bytes)
777
- if hasattr(data, "__iter__") and not isinstance(data, (str, bytes)):
778
- return _raw_passthrough_sync(data)
779
- # Async iterable
780
- if hasattr(data, "__aiter__"):
781
- return _raw_passthrough_async(data)
782
- # Single string or bytes
783
- if isinstance(data, (bytes, bytearray)):
784
- def _yield_single():
785
- yield data.decode(encoding, encoding_errors)
786
- return _yield_single()
787
- else:
788
- def _yield_single():
789
- if data is not None:
790
- yield data
791
- return _yield_single()
792
- # --- END RAW MODE ---
793
-
794
- text_attr = getattr(data, "text", None)
795
- content_attr = getattr(data, "content", None)
796
-
797
- # Handle None
798
- if data is None:
799
- def _empty_gen():
800
- if False:
801
- yield None
802
- return _empty_gen()
803
-
804
- # Handle bytes directly
805
- if isinstance(data, bytes):
806
- try:
807
- payload = data.decode(encoding, encoding_errors)
808
- except Exception:
809
- payload = str(data)
810
- return _sanitize_stream_sync(
811
- payload, intro_value, to_json, skip_markers, strip_chars,
812
- start_marker, end_marker, content_extractor, yield_raw_on_error,
813
- encoding, encoding_errors, buffer_size, line_delimiter, error_handler,
814
- skip_regexes, extract_regexes,
815
- )
816
-
817
- # Handle string directly
818
- if isinstance(data, str):
819
- return _sanitize_stream_sync(
820
- data, intro_value, to_json, skip_markers, strip_chars,
821
- start_marker, end_marker, content_extractor, yield_raw_on_error,
822
- encoding, encoding_errors, buffer_size, line_delimiter, error_handler,
823
- skip_regexes, extract_regexes,
824
- )
825
-
826
- # Handle dict, list, int, float, bool (non-iterable, non-string/bytes)
827
- if isinstance(data, (dict, list, int, float, bool)):
828
- if object_mode == "as_is":
829
- def _as_is_gen():
830
- yield data
831
- return _as_is_gen()
832
- elif object_mode == "str":
833
- return _sanitize_stream_sync(
834
- str(data), intro_value, to_json, skip_markers, strip_chars,
835
- start_marker, end_marker, content_extractor, yield_raw_on_error,
836
- encoding, encoding_errors, buffer_size, line_delimiter, error_handler,
837
- skip_regexes, extract_regexes,
838
- )
839
- else: # "json"
840
- try:
841
- json_str = json.dumps(data)
842
- except Exception:
843
- json_str = str(data)
844
- return _sanitize_stream_sync(
845
- json_str, intro_value, to_json, skip_markers, strip_chars,
846
- start_marker, end_marker, content_extractor, yield_raw_on_error,
847
- encoding, encoding_errors, buffer_size, line_delimiter, error_handler,
848
- skip_regexes, extract_regexes,
849
- )
850
-
851
- # Handle file-like objects (optional, treat as string if .read exists)
852
- if hasattr(data, "read") and callable(data.read):
853
- try:
854
- file_content = data.read()
855
- if isinstance(file_content, bytes):
856
- file_content = file_content.decode(encoding, encoding_errors)
857
- return _sanitize_stream_sync(
858
- file_content, intro_value, to_json, skip_markers, strip_chars,
859
- start_marker, end_marker, content_extractor, yield_raw_on_error,
860
- encoding, encoding_errors, buffer_size, line_delimiter, error_handler,
861
- skip_regexes, extract_regexes,
862
- )
863
- except Exception:
864
- pass # fallback to next
865
-
866
- # Handle .text or .content attributes
867
- if isinstance(text_attr, str):
868
- payload = text_attr
869
- return _sanitize_stream_sync(
870
- payload, intro_value, to_json, skip_markers, strip_chars,
871
- start_marker, end_marker, content_extractor, yield_raw_on_error,
872
- encoding, encoding_errors, buffer_size, line_delimiter, error_handler,
873
- skip_regexes, extract_regexes,
874
- )
875
- elif isinstance(content_attr, bytes):
876
- try:
877
- payload = content_attr.decode(encoding, encoding_errors)
878
- except Exception:
879
- payload = str(content_attr)
880
- return _sanitize_stream_sync(
881
- payload, intro_value, to_json, skip_markers, strip_chars,
882
- start_marker, end_marker, content_extractor, yield_raw_on_error,
883
- encoding, encoding_errors, buffer_size, line_delimiter, error_handler,
884
- skip_regexes, extract_regexes,
885
- )
886
-
887
- # Handle async iterables
888
- if hasattr(data, "__aiter__"):
889
- return _sanitize_stream_async(
890
- data, intro_value, to_json, skip_markers, strip_chars,
891
- start_marker, end_marker, content_extractor, yield_raw_on_error,
892
- encoding, encoding_errors, buffer_size, line_delimiter, error_handler,
893
- skip_regexes, extract_regexes,
894
- )
895
- # Handle sync iterables (but not strings/bytes)
896
- if hasattr(data, "__iter__"):
897
- return _sanitize_stream_sync(
898
- data, intro_value, to_json, skip_markers, strip_chars,
899
- start_marker, end_marker, content_extractor, yield_raw_on_error,
900
- encoding, encoding_errors, buffer_size, line_delimiter, error_handler,
901
- skip_regexes, extract_regexes,
902
- )
903
- # Fallback: treat as string
904
- return _sanitize_stream_sync(
905
- str(data), intro_value, to_json, skip_markers, strip_chars,
906
- start_marker, end_marker, content_extractor, yield_raw_on_error,
907
- encoding, encoding_errors, buffer_size, line_delimiter, error_handler,
908
- skip_regexes, extract_regexes,
909
- )
910
-
911
- # --- Decorator version of sanitize_stream ---
912
- import functools
913
- from typing import overload
914
-
915
- def _sanitize_stream_decorator(
916
- _func=None,
917
- *,
918
- intro_value: str = "data:",
919
- to_json: bool = True,
920
- skip_markers: Optional[List[str]] = None,
921
- strip_chars: Optional[str] = None,
922
- start_marker: Optional[str] = None,
923
- end_marker: Optional[str] = None,
924
- content_extractor: Optional[Callable[[Union[str, Dict[str, Any]]], Optional[Any]]] = None,
925
- yield_raw_on_error: bool = True,
926
- encoding: EncodingType = "utf-8",
927
- encoding_errors: str = "replace",
928
- buffer_size: int = 8192,
929
- line_delimiter: Optional[str] = None,
930
- error_handler: Optional[Callable[[Exception, str], Optional[Any]]] = None,
931
- skip_regexes: Optional[List[Union[str, Pattern[str]]]] = None,
932
- extract_regexes: Optional[List[Union[str, Pattern[str]]]] = None,
933
- object_mode: Literal["as_is", "json", "str"] = "json",
934
- raw: bool = False,
935
- ):
43
+ Decorator to retry a function on exception.
936
44
  """
937
- Decorator for sanitize_stream. Can be used as @sanitize_stream or @sanitize_stream(...).
938
- All arguments are the same as sanitize_stream().
939
- """
940
- def decorator(func):
941
- if asyncio.iscoroutinefunction(func):
942
- @functools.wraps(func)
943
- async def async_wrapper(*args, **kwargs):
944
- result = await func(*args, **kwargs)
945
- return sanitize_stream(
946
- result,
947
- intro_value=intro_value,
948
- to_json=to_json,
949
- skip_markers=skip_markers,
950
- strip_chars=strip_chars,
951
- start_marker=start_marker,
952
- end_marker=end_marker,
953
- content_extractor=content_extractor,
954
- yield_raw_on_error=yield_raw_on_error,
955
- encoding=encoding,
956
- encoding_errors=encoding_errors,
957
- buffer_size=buffer_size,
958
- line_delimiter=line_delimiter,
959
- error_handler=error_handler,
960
- skip_regexes=skip_regexes,
961
- extract_regexes=extract_regexes,
962
- object_mode=object_mode,
963
- raw=raw,
964
- )
965
- return async_wrapper
966
- else:
967
- @functools.wraps(func)
968
- def sync_wrapper(*args, **kwargs):
969
- result = func(*args, **kwargs)
970
- return sanitize_stream(
971
- result,
972
- intro_value=intro_value,
973
- to_json=to_json,
974
- skip_markers=skip_markers,
975
- strip_chars=strip_chars,
976
- start_marker=start_marker,
977
- end_marker=end_marker,
978
- content_extractor=content_extractor,
979
- yield_raw_on_error=yield_raw_on_error,
980
- encoding=encoding,
981
- encoding_errors=encoding_errors,
982
- buffer_size=buffer_size,
983
- line_delimiter=line_delimiter,
984
- error_handler=error_handler,
985
- skip_regexes=skip_regexes,
986
- extract_regexes=extract_regexes,
987
- object_mode=object_mode,
988
- raw=raw,
989
- )
990
- return sync_wrapper
991
- if _func is None:
992
- return decorator
993
- else:
994
- return decorator(_func)
995
-
996
- # Alias for decorator usage
997
- LITSTREAM = sanitize_stream
998
-
999
- # Decorator aliases
1000
- sanitize_stream_decorator = _sanitize_stream_decorator
1001
- lit_streamer = _sanitize_stream_decorator
1002
-
1003
- # Allow @sanitize_stream and @lit_streamer as decorators
1004
- import asyncio
1005
- sanitize_stream.__decorator__ = _sanitize_stream_decorator
1006
- LITSTREAM.__decorator__ = _sanitize_stream_decorator
1007
- lit_streamer.__decorator__ = _sanitize_stream_decorator
1008
-
1009
- def __getattr__(name):
1010
- if name == 'sanitize_stream':
1011
- return sanitize_stream
1012
- if name == 'LITSTREAM':
1013
- return LITSTREAM
1014
- if name == 'sanitize_stream_decorator':
1015
- return _sanitize_stream_decorator
1016
- if name == 'lit_streamer':
1017
- return _sanitize_stream_decorator
1018
- raise AttributeError(f"module {__name__} has no attribute {name}")
1019
- from .conversation import Conversation # noqa: E402,F401
1020
- from .Extra.autocoder import AutoCoder # noqa: E402,F401
1021
- from .optimizers import Optimizers # noqa: E402,F401
1022
- from .prompt_manager import AwesomePrompts # noqa: E402,F401
45
+ def decorator(func: Callable):
46
+ @functools.wraps(func)
47
+ def wrapper(*args, **kwargs):
48
+ last_exc = None
49
+ for attempt in range(retries):
50
+ try:
51
+ return func(*args, **kwargs)
52
+ except Exception as exc:
53
+ last_exc = exc
54
+ print(f"Attempt {attempt + 1} failed: {exc}. Retrying in {delay} seconds...")
55
+ time.sleep(delay)
56
+ raise last_exc
57
+ return wrapper
58
+ return decorator