webscout 8.3.2__py3-none-any.whl → 8.3.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of webscout might be problematic. Click here for more details.
- webscout/AIutel.py +367 -41
- webscout/Bard.py +2 -22
- webscout/Bing_search.py +1 -2
- webscout/Provider/AISEARCH/__init__.py +1 -0
- webscout/Provider/AISEARCH/scira_search.py +24 -11
- webscout/Provider/AISEARCH/stellar_search.py +132 -0
- webscout/Provider/Deepinfra.py +75 -57
- webscout/Provider/ExaChat.py +93 -63
- webscout/Provider/Flowith.py +1 -1
- webscout/Provider/FreeGemini.py +2 -2
- webscout/Provider/Gemini.py +3 -10
- webscout/Provider/GeminiProxy.py +31 -5
- webscout/Provider/HeckAI.py +85 -80
- webscout/Provider/Jadve.py +56 -50
- webscout/Provider/LambdaChat.py +39 -31
- webscout/Provider/MiniMax.py +207 -0
- webscout/Provider/Nemotron.py +41 -13
- webscout/Provider/Netwrck.py +39 -59
- webscout/Provider/OLLAMA.py +8 -9
- webscout/Provider/OPENAI/BLACKBOXAI.py +0 -1
- webscout/Provider/OPENAI/MiniMax.py +298 -0
- webscout/Provider/OPENAI/README.md +31 -30
- webscout/Provider/OPENAI/TogetherAI.py +4 -17
- webscout/Provider/OPENAI/__init__.py +4 -2
- webscout/Provider/OPENAI/autoproxy.py +753 -18
- webscout/Provider/OPENAI/base.py +7 -76
- webscout/Provider/OPENAI/copilot.py +73 -26
- webscout/Provider/OPENAI/deepinfra.py +96 -132
- webscout/Provider/OPENAI/exachat.py +9 -5
- webscout/Provider/OPENAI/flowith.py +179 -166
- webscout/Provider/OPENAI/friendli.py +233 -0
- webscout/Provider/OPENAI/monochat.py +329 -0
- webscout/Provider/OPENAI/netwrck.py +4 -7
- webscout/Provider/OPENAI/pydantic_imports.py +1 -172
- webscout/Provider/OPENAI/qodo.py +630 -0
- webscout/Provider/OPENAI/scirachat.py +82 -49
- webscout/Provider/OPENAI/textpollinations.py +13 -12
- webscout/Provider/OPENAI/toolbaz.py +1 -0
- webscout/Provider/OPENAI/typegpt.py +4 -4
- webscout/Provider/OPENAI/utils.py +19 -42
- webscout/Provider/OPENAI/x0gpt.py +14 -2
- webscout/Provider/OpenGPT.py +54 -32
- webscout/Provider/PI.py +58 -84
- webscout/Provider/Qodo.py +454 -0
- webscout/Provider/StandardInput.py +32 -13
- webscout/Provider/TTI/README.md +9 -9
- webscout/Provider/TTI/__init__.py +2 -1
- webscout/Provider/TTI/aiarta.py +92 -78
- webscout/Provider/TTI/infip.py +212 -0
- webscout/Provider/TTI/monochat.py +220 -0
- webscout/Provider/TeachAnything.py +11 -3
- webscout/Provider/TextPollinationsAI.py +91 -82
- webscout/Provider/TogetherAI.py +32 -48
- webscout/Provider/Venice.py +37 -46
- webscout/Provider/VercelAI.py +27 -24
- webscout/Provider/WiseCat.py +35 -35
- webscout/Provider/WrDoChat.py +22 -26
- webscout/Provider/WritingMate.py +26 -22
- webscout/Provider/__init__.py +6 -6
- webscout/Provider/copilot.py +58 -61
- webscout/Provider/freeaichat.py +64 -55
- webscout/Provider/granite.py +48 -57
- webscout/Provider/koala.py +51 -39
- webscout/Provider/learnfastai.py +49 -64
- webscout/Provider/llmchat.py +79 -93
- webscout/Provider/llmchatco.py +63 -78
- webscout/Provider/monochat.py +275 -0
- webscout/Provider/multichat.py +51 -40
- webscout/Provider/oivscode.py +1 -1
- webscout/Provider/scira_chat.py +257 -104
- webscout/Provider/scnet.py +13 -13
- webscout/Provider/searchchat.py +13 -13
- webscout/Provider/sonus.py +12 -11
- webscout/Provider/toolbaz.py +25 -8
- webscout/Provider/turboseek.py +41 -42
- webscout/Provider/typefully.py +27 -12
- webscout/Provider/typegpt.py +43 -48
- webscout/Provider/uncovr.py +55 -90
- webscout/Provider/x0gpt.py +325 -299
- webscout/Provider/yep.py +79 -96
- webscout/__init__.py +7 -2
- webscout/auth/__init__.py +12 -1
- webscout/auth/providers.py +27 -5
- webscout/auth/routes.py +146 -105
- webscout/auth/server.py +367 -312
- webscout/client.py +121 -116
- webscout/litagent/Readme.md +68 -55
- webscout/litagent/agent.py +99 -9
- webscout/version.py +1 -1
- {webscout-8.3.2.dist-info → webscout-8.3.4.dist-info}/METADATA +102 -91
- {webscout-8.3.2.dist-info → webscout-8.3.4.dist-info}/RECORD +95 -107
- webscout/Provider/AI21.py +0 -177
- webscout/Provider/HuggingFaceChat.py +0 -469
- webscout/Provider/OPENAI/freeaichat.py +0 -363
- webscout/Provider/TTI/fastflux.py +0 -233
- webscout/Provider/Writecream.py +0 -246
- webscout/auth/static/favicon.svg +0 -11
- webscout/auth/swagger_ui.py +0 -203
- webscout/auth/templates/components/authentication.html +0 -237
- webscout/auth/templates/components/base.html +0 -103
- webscout/auth/templates/components/endpoints.html +0 -750
- webscout/auth/templates/components/examples.html +0 -491
- webscout/auth/templates/components/footer.html +0 -75
- webscout/auth/templates/components/header.html +0 -27
- webscout/auth/templates/components/models.html +0 -286
- webscout/auth/templates/components/navigation.html +0 -70
- webscout/auth/templates/static/api.js +0 -455
- webscout/auth/templates/static/icons.js +0 -168
- webscout/auth/templates/static/main.js +0 -784
- webscout/auth/templates/static/particles.js +0 -201
- webscout/auth/templates/static/styles.css +0 -3353
- webscout/auth/templates/static/ui.js +0 -374
- webscout/auth/templates/swagger_ui.html +0 -170
- {webscout-8.3.2.dist-info → webscout-8.3.4.dist-info}/WHEEL +0 -0
- {webscout-8.3.2.dist-info → webscout-8.3.4.dist-info}/entry_points.txt +0 -0
- {webscout-8.3.2.dist-info → webscout-8.3.4.dist-info}/licenses/LICENSE.md +0 -0
- {webscout-8.3.2.dist-info → webscout-8.3.4.dist-info}/top_level.txt +0 -0
webscout/AIutel.py
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import codecs
|
|
2
2
|
import json
|
|
3
|
+
import re
|
|
3
4
|
from typing import (
|
|
4
5
|
Any,
|
|
5
6
|
AsyncGenerator,
|
|
@@ -11,6 +12,7 @@ from typing import (
|
|
|
11
12
|
List,
|
|
12
13
|
Literal,
|
|
13
14
|
Optional,
|
|
15
|
+
Pattern,
|
|
14
16
|
Union,
|
|
15
17
|
)
|
|
16
18
|
|
|
@@ -19,6 +21,36 @@ EncodingType = Literal['utf-8', 'utf-16', 'utf-32', 'ascii', 'latin1', 'cp1252',
|
|
|
19
21
|
'iso-8859-2', 'windows-1250', 'windows-1251', 'windows-1252', 'gbk', 'big5',
|
|
20
22
|
'shift_jis', 'euc-jp', 'euc-kr']
|
|
21
23
|
|
|
24
|
+
def _compile_regexes(patterns: Optional[List[Union[str, Pattern[str]]]]) -> Optional[List[Pattern[str]]]:
|
|
25
|
+
"""
|
|
26
|
+
Compile regex patterns from strings or return compiled patterns as-is.
|
|
27
|
+
|
|
28
|
+
Args:
|
|
29
|
+
patterns: List of regex patterns as strings or compiled Pattern objects.
|
|
30
|
+
|
|
31
|
+
Returns:
|
|
32
|
+
List of compiled Pattern objects, or None if input is None.
|
|
33
|
+
|
|
34
|
+
Raises:
|
|
35
|
+
ValueError: If any pattern is invalid.
|
|
36
|
+
"""
|
|
37
|
+
if not patterns:
|
|
38
|
+
return None
|
|
39
|
+
|
|
40
|
+
compiled_patterns = []
|
|
41
|
+
for i, pattern in enumerate(patterns):
|
|
42
|
+
try:
|
|
43
|
+
if isinstance(pattern, str):
|
|
44
|
+
compiled_patterns.append(re.compile(pattern))
|
|
45
|
+
elif isinstance(pattern, Pattern):
|
|
46
|
+
compiled_patterns.append(pattern)
|
|
47
|
+
else:
|
|
48
|
+
raise ValueError(f"Pattern at index {i} must be a string or compiled regex pattern, got {type(pattern)}")
|
|
49
|
+
except re.error as e:
|
|
50
|
+
raise ValueError(f"Invalid regex pattern at index {i}: '{pattern}' - {str(e)}")
|
|
51
|
+
|
|
52
|
+
return compiled_patterns
|
|
53
|
+
|
|
22
54
|
def _process_chunk(
|
|
23
55
|
chunk: str,
|
|
24
56
|
intro_value: str,
|
|
@@ -27,6 +59,8 @@ def _process_chunk(
|
|
|
27
59
|
strip_chars: Optional[str],
|
|
28
60
|
yield_raw_on_error: bool,
|
|
29
61
|
error_handler: Optional[Callable[[Exception, str], Optional[Any]]] = None,
|
|
62
|
+
skip_regexes: Optional[List[Pattern[str]]] = None,
|
|
63
|
+
extract_regexes: Optional[List[Pattern[str]]] = None,
|
|
30
64
|
) -> Union[str, Dict[str, Any], None]:
|
|
31
65
|
"""
|
|
32
66
|
Sanitizes and potentially parses a single chunk of text.
|
|
@@ -35,6 +69,8 @@ def _process_chunk(
|
|
|
35
69
|
- Removes a specified prefix (`intro_value`).
|
|
36
70
|
- Strips leading/trailing characters (`strip_chars`).
|
|
37
71
|
- Skips chunks matching specific markers (`skip_markers`).
|
|
72
|
+
- Skips chunks matching regex patterns (`skip_regexes`).
|
|
73
|
+
- Extracts content using regex capturing groups (`extract_regexes`).
|
|
38
74
|
- Optionally parses the chunk as JSON (`to_json`).
|
|
39
75
|
- Handles JSON parsing errors with an optional callback (`error_handler`).
|
|
40
76
|
|
|
@@ -47,18 +83,9 @@ def _process_chunk(
|
|
|
47
83
|
yield_raw_on_error (bool): If True, returns the raw chunk when JSON parsing fails; otherwise, returns None.
|
|
48
84
|
error_handler (Optional[Callable[[Exception, str], Optional[Any]]]): An optional callback function that is called when JSON parsing fails.
|
|
49
85
|
It receives the exception and the sanitized chunk as arguments. It should return a value to yield instead of the raw chunk, or None to ignore.
|
|
86
|
+
skip_regexes (Optional[List[Pattern[str]]]): A list of compiled regex patterns; chunks matching any of these are skipped.
|
|
87
|
+
extract_regexes (Optional[List[Pattern[str]]]): A list of compiled regex patterns for extracting content using capturing groups.
|
|
50
88
|
|
|
51
|
-
|
|
52
|
-
Args:
|
|
53
|
-
chunk: Chunk of text to process.
|
|
54
|
-
intro_value: Prefix to remove from the chunk.
|
|
55
|
-
to_json: Parse the chunk as JSON if True.
|
|
56
|
-
skip_markers: List of markers to skip.
|
|
57
|
-
strip_chars: Characters to strip from the chunk.
|
|
58
|
-
yield_raw_on_error: Whether to return the raw chunk on parse errors.
|
|
59
|
-
error_handler: Optional callback ``Callable[[Exception, str], Optional[Any]]``
|
|
60
|
-
invoked when JSON parsing fails. The callback should return a value to
|
|
61
|
-
yield instead of the raw chunk, or ``None`` to ignore.
|
|
62
89
|
"""
|
|
63
90
|
if not isinstance(chunk, str):
|
|
64
91
|
return None
|
|
@@ -83,6 +110,28 @@ def _process_chunk(
|
|
|
83
110
|
if not sanitized_chunk or any(marker == sanitized_chunk for marker in skip_markers):
|
|
84
111
|
return None
|
|
85
112
|
|
|
113
|
+
# Apply regex-based extraction first (if provided)
|
|
114
|
+
if extract_regexes:
|
|
115
|
+
for regex in extract_regexes:
|
|
116
|
+
match = regex.search(sanitized_chunk)
|
|
117
|
+
if match:
|
|
118
|
+
# If there are capturing groups, return the first group or all groups as a tuple
|
|
119
|
+
if match.groups():
|
|
120
|
+
if len(match.groups()) == 1:
|
|
121
|
+
sanitized_chunk = match.group(1)
|
|
122
|
+
else:
|
|
123
|
+
# Multiple groups - return as tuple converted to string for JSON compatibility
|
|
124
|
+
sanitized_chunk = str(match.groups())
|
|
125
|
+
else:
|
|
126
|
+
# No capturing groups, return the full match
|
|
127
|
+
sanitized_chunk = match.group(0)
|
|
128
|
+
break # Use first matching extraction regex
|
|
129
|
+
|
|
130
|
+
# Apply regex-based skipping (after extraction)
|
|
131
|
+
if skip_regexes:
|
|
132
|
+
if any(regex.search(sanitized_chunk) for regex in skip_regexes):
|
|
133
|
+
return None
|
|
134
|
+
|
|
86
135
|
# JSON parsing with optimized error handling
|
|
87
136
|
if to_json:
|
|
88
137
|
try:
|
|
@@ -128,11 +177,6 @@ def _decode_byte_stream(
|
|
|
128
177
|
Defaults to 'replace'.
|
|
129
178
|
buffer_size (int): The size of the internal buffer used for decoding.
|
|
130
179
|
|
|
131
|
-
Args:
|
|
132
|
-
byte_iterator: Iterator yielding bytes
|
|
133
|
-
encoding: Character encoding to use
|
|
134
|
-
errors: How to handle encoding errors ('strict', 'ignore', 'replace')
|
|
135
|
-
buffer_size: Size of internal buffer for performance tuning
|
|
136
180
|
"""
|
|
137
181
|
# Initialize decoder with the specified encoding
|
|
138
182
|
try:
|
|
@@ -241,18 +285,20 @@ def _sanitize_stream_sync(
|
|
|
241
285
|
buffer_size: int = 8192,
|
|
242
286
|
line_delimiter: Optional[str] = None,
|
|
243
287
|
error_handler: Optional[Callable[[Exception, str], Optional[Any]]] = None,
|
|
288
|
+
skip_regexes: Optional[List[Union[str, Pattern[str]]]] = None,
|
|
289
|
+
extract_regexes: Optional[List[Union[str, Pattern[str]]]] = None,
|
|
244
290
|
) -> Generator[Any, None, None]:
|
|
245
291
|
"""
|
|
246
292
|
Processes a stream of data (strings or bytes) in real-time, applying various transformations and filtering.
|
|
247
293
|
|
|
248
294
|
This function is designed to handle streaming data, allowing for operations such as
|
|
249
|
-
prefix removal, JSON parsing, skipping lines based on markers,
|
|
250
|
-
It also supports custom error handling for JSON parsing failures.
|
|
295
|
+
prefix removal, JSON parsing, skipping lines based on markers, regex-based filtering,
|
|
296
|
+
and extracting specific content. It also supports custom error handling for JSON parsing failures.
|
|
251
297
|
|
|
252
298
|
Args:
|
|
253
299
|
data: String, iterable of strings, or iterable of bytes to process.
|
|
254
300
|
intro_value: Prefix indicating the start of meaningful data.
|
|
255
|
-
to_json: Parse JSON
|
|
301
|
+
to_json: Parse the chunk as JSON if True.
|
|
256
302
|
skip_markers: Lines containing any of these markers are skipped.
|
|
257
303
|
strip_chars: Characters to strip from each line.
|
|
258
304
|
start_marker: Begin processing only after this marker is found.
|
|
@@ -266,14 +312,21 @@ def _sanitize_stream_sync(
|
|
|
266
312
|
uses ``str.splitlines()``.
|
|
267
313
|
error_handler: Callback invoked with ``(Exception, str)`` when JSON
|
|
268
314
|
parsing fails. If the callback returns a value, it is yielded instead of the raw line.
|
|
315
|
+
skip_regexes: List of regex patterns (strings or compiled) for skipping lines that match.
|
|
316
|
+
extract_regexes: List of regex patterns (strings or compiled) for extracting content using capturing groups.
|
|
269
317
|
|
|
270
318
|
Yields:
|
|
271
319
|
Any: Processed data, which can be a string, a dictionary (if `to_json` is True), or the result of `content_extractor`.
|
|
272
320
|
|
|
273
321
|
Raises:
|
|
274
322
|
TypeError: If the input `data` is not a string or an iterable.
|
|
323
|
+
ValueError: If any regex pattern is invalid.
|
|
275
324
|
"""
|
|
276
325
|
effective_skip_markers = skip_markers or []
|
|
326
|
+
# Compile regex patterns
|
|
327
|
+
compiled_skip_regexes = _compile_regexes(skip_regexes)
|
|
328
|
+
compiled_extract_regexes = _compile_regexes(extract_regexes)
|
|
329
|
+
|
|
277
330
|
processing_active = start_marker is None
|
|
278
331
|
buffer = ""
|
|
279
332
|
found_start = False if start_marker else True
|
|
@@ -358,6 +411,8 @@ def _sanitize_stream_sync(
|
|
|
358
411
|
strip_chars,
|
|
359
412
|
yield_raw_on_error,
|
|
360
413
|
error_handler,
|
|
414
|
+
compiled_skip_regexes,
|
|
415
|
+
compiled_extract_regexes,
|
|
361
416
|
)
|
|
362
417
|
if result is None:
|
|
363
418
|
continue
|
|
@@ -388,6 +443,8 @@ def _sanitize_stream_sync(
|
|
|
388
443
|
strip_chars,
|
|
389
444
|
yield_raw_on_error,
|
|
390
445
|
error_handler,
|
|
446
|
+
compiled_skip_regexes,
|
|
447
|
+
compiled_extract_regexes,
|
|
391
448
|
)
|
|
392
449
|
if result is None:
|
|
393
450
|
continue
|
|
@@ -423,14 +480,16 @@ async def _sanitize_stream_async(
|
|
|
423
480
|
buffer_size: int = 8192,
|
|
424
481
|
line_delimiter: Optional[str] = None,
|
|
425
482
|
error_handler: Optional[Callable[[Exception, str], Optional[Any]]] = None,
|
|
483
|
+
skip_regexes: Optional[List[Union[str, Pattern[str]]]] = None,
|
|
484
|
+
extract_regexes: Optional[List[Union[str, Pattern[str]]]] = None,
|
|
426
485
|
) -> AsyncGenerator[Any, None]:
|
|
427
486
|
"""
|
|
428
487
|
Asynchronously processes a stream of data (strings or bytes), applying transformations and filtering.
|
|
429
488
|
|
|
430
489
|
This function is the asynchronous counterpart to `_sanitize_stream_sync`. It handles
|
|
431
490
|
streaming data, allowing for operations such as prefix removal, JSON parsing,
|
|
432
|
-
skipping lines based on markers, and extracting specific content.
|
|
433
|
-
custom error handling for JSON parsing failures.
|
|
491
|
+
skipping lines based on markers, regex-based filtering, and extracting specific content.
|
|
492
|
+
It also supports custom error handling for JSON parsing failures.
|
|
434
493
|
|
|
435
494
|
Args:
|
|
436
495
|
data: String, iterable of strings, or iterable of bytes to process.
|
|
@@ -447,6 +506,8 @@ async def _sanitize_stream_async(
|
|
|
447
506
|
buffer_size: Buffer size for byte decoding.
|
|
448
507
|
line_delimiter: Delimiter used to split incoming text into lines. ``None`` uses ``str.splitlines()``.
|
|
449
508
|
error_handler: Callback invoked with ``(Exception, str)`` when JSON parsing fails. If the callback returns a value, it is yielded in place of the raw line.
|
|
509
|
+
skip_regexes: List of regex patterns (strings or compiled) for skipping lines that match.
|
|
510
|
+
extract_regexes: List of regex patterns (strings or compiled) for extracting content using capturing groups.
|
|
450
511
|
"""
|
|
451
512
|
if isinstance(data, str):
|
|
452
513
|
for item in _sanitize_stream_sync(
|
|
@@ -464,6 +525,8 @@ async def _sanitize_stream_async(
|
|
|
464
525
|
buffer_size=buffer_size,
|
|
465
526
|
line_delimiter=line_delimiter,
|
|
466
527
|
error_handler=error_handler,
|
|
528
|
+
skip_regexes=skip_regexes,
|
|
529
|
+
extract_regexes=extract_regexes,
|
|
467
530
|
):
|
|
468
531
|
yield item
|
|
469
532
|
return
|
|
@@ -485,11 +548,17 @@ async def _sanitize_stream_async(
|
|
|
485
548
|
buffer_size=buffer_size,
|
|
486
549
|
line_delimiter=line_delimiter,
|
|
487
550
|
error_handler=error_handler,
|
|
551
|
+
skip_regexes=skip_regexes,
|
|
552
|
+
extract_regexes=extract_regexes,
|
|
488
553
|
):
|
|
489
554
|
yield item
|
|
490
555
|
return
|
|
491
556
|
|
|
492
557
|
effective_skip_markers = skip_markers or []
|
|
558
|
+
# Compile regex patterns
|
|
559
|
+
compiled_skip_regexes = _compile_regexes(skip_regexes)
|
|
560
|
+
compiled_extract_regexes = _compile_regexes(extract_regexes)
|
|
561
|
+
|
|
493
562
|
processing_active = start_marker is None
|
|
494
563
|
buffer = ""
|
|
495
564
|
found_start = False if start_marker else True
|
|
@@ -558,6 +627,8 @@ async def _sanitize_stream_async(
|
|
|
558
627
|
strip_chars,
|
|
559
628
|
yield_raw_on_error,
|
|
560
629
|
error_handler,
|
|
630
|
+
compiled_skip_regexes,
|
|
631
|
+
compiled_extract_regexes,
|
|
561
632
|
)
|
|
562
633
|
if result is None:
|
|
563
634
|
continue
|
|
@@ -591,6 +662,8 @@ async def _sanitize_stream_async(
|
|
|
591
662
|
strip_chars,
|
|
592
663
|
yield_raw_on_error,
|
|
593
664
|
error_handler,
|
|
665
|
+
compiled_skip_regexes,
|
|
666
|
+
compiled_extract_regexes,
|
|
594
667
|
)
|
|
595
668
|
if result is None:
|
|
596
669
|
continue
|
|
@@ -611,10 +684,17 @@ async def _sanitize_stream_async(
|
|
|
611
684
|
def sanitize_stream(
|
|
612
685
|
data: Union[
|
|
613
686
|
str,
|
|
687
|
+
bytes,
|
|
614
688
|
Iterable[str],
|
|
615
689
|
Iterable[bytes],
|
|
616
690
|
AsyncIterable[str],
|
|
617
691
|
AsyncIterable[bytes],
|
|
692
|
+
dict,
|
|
693
|
+
list,
|
|
694
|
+
int,
|
|
695
|
+
float,
|
|
696
|
+
bool,
|
|
697
|
+
None,
|
|
618
698
|
],
|
|
619
699
|
intro_value: str = "data:",
|
|
620
700
|
to_json: bool = True,
|
|
@@ -629,19 +709,19 @@ def sanitize_stream(
|
|
|
629
709
|
buffer_size: int = 8192,
|
|
630
710
|
line_delimiter: Optional[str] = None,
|
|
631
711
|
error_handler: Optional[Callable[[Exception, str], Optional[Any]]] = None,
|
|
712
|
+
skip_regexes: Optional[List[Union[str, Pattern[str]]]] = None,
|
|
713
|
+
extract_regexes: Optional[List[Union[str, Pattern[str]]]] = None,
|
|
714
|
+
object_mode: Literal["as_is", "json", "str"] = "json",
|
|
715
|
+
raw: bool = False,
|
|
632
716
|
) -> Union[Generator[Any, None, None], AsyncGenerator[Any, None]]:
|
|
633
717
|
"""
|
|
634
718
|
Processes streaming data (strings or bytes) in either synchronous or asynchronous mode.
|
|
635
|
-
|
|
636
|
-
|
|
637
|
-
asynchronous data streams. It automatically detects the type of input data and
|
|
638
|
-
dispatches it to the appropriate processing function (`_sanitize_stream_sync` or
|
|
639
|
-
`_sanitize_stream_async`).
|
|
719
|
+
Now supports non-iterable and miscellaneous input types (dict, list, int, float, bool, None).
|
|
720
|
+
Includes regex-based content filtering and extraction capabilities.
|
|
640
721
|
|
|
641
722
|
Args:
|
|
642
|
-
data
|
|
643
|
-
|
|
644
|
-
or an asynchronous iterable of strings or bytes.
|
|
723
|
+
data: The data to be processed. Can be a string, bytes, a synchronous iterable of strings or bytes,
|
|
724
|
+
an asynchronous iterable of strings or bytes, or a single object (dict, list, int, float, bool, None).
|
|
645
725
|
intro_value (str): Prefix indicating the start of meaningful data. Defaults to "data:".
|
|
646
726
|
to_json (bool): Parse JSON content if ``True``. Defaults to True.
|
|
647
727
|
skip_markers (Optional[List[str]]): Lines containing any of these markers are skipped. Defaults to None.
|
|
@@ -659,37 +739,283 @@ def sanitize_stream(
|
|
|
659
739
|
error_handler (Optional[Callable[[Exception, str], Optional[Any]]]):
|
|
660
740
|
Callback invoked with ``(Exception, str)`` when JSON parsing fails.
|
|
661
741
|
If the callback returns a value, it is yielded in place of the raw line. Defaults to None.
|
|
742
|
+
skip_regexes (Optional[List[Union[str, Pattern[str]]]]): List of regex patterns (strings or compiled)
|
|
743
|
+
for skipping lines that match any pattern. Defaults to None.
|
|
744
|
+
extract_regexes (Optional[List[Union[str, Pattern[str]]]]): List of regex patterns (strings or compiled)
|
|
745
|
+
for extracting content using capturing groups. If multiple groups are captured, they are returned as a tuple string. Defaults to None.
|
|
746
|
+
object_mode (Literal["as_is", "json", "str"]): How to handle non-string, non-iterable objects.
|
|
747
|
+
"json" (default) yields as JSON string, "str" yields as str(obj), "as_is" yields the object as-is.
|
|
748
|
+
raw (bool): If True, yields the raw response as returned by the API, chunk by chunk (no splitting or joining).
|
|
662
749
|
|
|
663
750
|
Returns:
|
|
664
751
|
Union[Generator[Any, None, None], AsyncGenerator[Any, None]]:
|
|
665
|
-
A generator or an asynchronous generator yielding the processed data.
|
|
666
|
-
|
|
667
|
-
|
|
668
|
-
|
|
752
|
+
A generator or an asynchronous generator yielding the processed data, or raw data if raw=True.
|
|
753
|
+
|
|
754
|
+
Raises:
|
|
755
|
+
ValueError: If any regex pattern is invalid.
|
|
756
|
+
""" # --- RAW MODE: yield each chunk exactly as returned by the API ---
|
|
757
|
+
if raw:
|
|
758
|
+
def _raw_passthrough_sync(source_iter):
|
|
759
|
+
for chunk in source_iter:
|
|
760
|
+
if isinstance(chunk, (bytes, bytearray)):
|
|
761
|
+
# Decode bytes preserving all whitespace and newlines
|
|
762
|
+
yield chunk.decode(encoding, encoding_errors)
|
|
763
|
+
elif chunk is not None:
|
|
764
|
+
# Yield string chunks as-is, preserving all formatting
|
|
765
|
+
yield chunk
|
|
766
|
+
# Skip None chunks entirely
|
|
767
|
+
async def _raw_passthrough_async(source_aiter):
|
|
768
|
+
async for chunk in source_aiter:
|
|
769
|
+
if isinstance(chunk, (bytes, bytearray)):
|
|
770
|
+
# Decode bytes preserving all whitespace and newlines
|
|
771
|
+
yield chunk.decode(encoding, encoding_errors)
|
|
772
|
+
elif chunk is not None:
|
|
773
|
+
# Yield string chunks as-is, preserving all formatting
|
|
774
|
+
yield chunk
|
|
775
|
+
# Skip None chunks entirely
|
|
776
|
+
# Sync iterable (but not str/bytes)
|
|
777
|
+
if hasattr(data, "__iter__") and not isinstance(data, (str, bytes)):
|
|
778
|
+
return _raw_passthrough_sync(data)
|
|
779
|
+
# Async iterable
|
|
780
|
+
if hasattr(data, "__aiter__"):
|
|
781
|
+
return _raw_passthrough_async(data)
|
|
782
|
+
# Single string or bytes
|
|
783
|
+
if isinstance(data, (bytes, bytearray)):
|
|
784
|
+
def _yield_single():
|
|
785
|
+
yield data.decode(encoding, encoding_errors)
|
|
786
|
+
return _yield_single()
|
|
787
|
+
else:
|
|
788
|
+
def _yield_single():
|
|
789
|
+
if data is not None:
|
|
790
|
+
yield data
|
|
791
|
+
return _yield_single()
|
|
792
|
+
# --- END RAW MODE ---
|
|
669
793
|
|
|
670
794
|
text_attr = getattr(data, "text", None)
|
|
671
795
|
content_attr = getattr(data, "content", None)
|
|
672
796
|
|
|
797
|
+
# Handle None
|
|
798
|
+
if data is None:
|
|
799
|
+
def _empty_gen():
|
|
800
|
+
if False:
|
|
801
|
+
yield None
|
|
802
|
+
return _empty_gen()
|
|
803
|
+
|
|
804
|
+
# Handle bytes directly
|
|
805
|
+
if isinstance(data, bytes):
|
|
806
|
+
try:
|
|
807
|
+
payload = data.decode(encoding, encoding_errors)
|
|
808
|
+
except Exception:
|
|
809
|
+
payload = str(data)
|
|
810
|
+
return _sanitize_stream_sync(
|
|
811
|
+
payload, intro_value, to_json, skip_markers, strip_chars,
|
|
812
|
+
start_marker, end_marker, content_extractor, yield_raw_on_error,
|
|
813
|
+
encoding, encoding_errors, buffer_size, line_delimiter, error_handler,
|
|
814
|
+
skip_regexes, extract_regexes,
|
|
815
|
+
)
|
|
816
|
+
|
|
817
|
+
# Handle string directly
|
|
818
|
+
if isinstance(data, str):
|
|
819
|
+
return _sanitize_stream_sync(
|
|
820
|
+
data, intro_value, to_json, skip_markers, strip_chars,
|
|
821
|
+
start_marker, end_marker, content_extractor, yield_raw_on_error,
|
|
822
|
+
encoding, encoding_errors, buffer_size, line_delimiter, error_handler,
|
|
823
|
+
skip_regexes, extract_regexes,
|
|
824
|
+
)
|
|
825
|
+
|
|
826
|
+
# Handle dict, list, int, float, bool (non-iterable, non-string/bytes)
|
|
827
|
+
if isinstance(data, (dict, list, int, float, bool)):
|
|
828
|
+
if object_mode == "as_is":
|
|
829
|
+
def _as_is_gen():
|
|
830
|
+
yield data
|
|
831
|
+
return _as_is_gen()
|
|
832
|
+
elif object_mode == "str":
|
|
833
|
+
return _sanitize_stream_sync(
|
|
834
|
+
str(data), intro_value, to_json, skip_markers, strip_chars,
|
|
835
|
+
start_marker, end_marker, content_extractor, yield_raw_on_error,
|
|
836
|
+
encoding, encoding_errors, buffer_size, line_delimiter, error_handler,
|
|
837
|
+
skip_regexes, extract_regexes,
|
|
838
|
+
)
|
|
839
|
+
else: # "json"
|
|
840
|
+
try:
|
|
841
|
+
json_str = json.dumps(data)
|
|
842
|
+
except Exception:
|
|
843
|
+
json_str = str(data)
|
|
844
|
+
return _sanitize_stream_sync(
|
|
845
|
+
json_str, intro_value, to_json, skip_markers, strip_chars,
|
|
846
|
+
start_marker, end_marker, content_extractor, yield_raw_on_error,
|
|
847
|
+
encoding, encoding_errors, buffer_size, line_delimiter, error_handler,
|
|
848
|
+
skip_regexes, extract_regexes,
|
|
849
|
+
)
|
|
850
|
+
|
|
851
|
+
# Handle file-like objects (optional, treat as string if .read exists)
|
|
852
|
+
if hasattr(data, "read") and callable(data.read):
|
|
853
|
+
try:
|
|
854
|
+
file_content = data.read()
|
|
855
|
+
if isinstance(file_content, bytes):
|
|
856
|
+
file_content = file_content.decode(encoding, encoding_errors)
|
|
857
|
+
return _sanitize_stream_sync(
|
|
858
|
+
file_content, intro_value, to_json, skip_markers, strip_chars,
|
|
859
|
+
start_marker, end_marker, content_extractor, yield_raw_on_error,
|
|
860
|
+
encoding, encoding_errors, buffer_size, line_delimiter, error_handler,
|
|
861
|
+
skip_regexes, extract_regexes,
|
|
862
|
+
)
|
|
863
|
+
except Exception:
|
|
864
|
+
pass # fallback to next
|
|
865
|
+
|
|
866
|
+
# Handle .text or .content attributes
|
|
673
867
|
if isinstance(text_attr, str):
|
|
674
868
|
payload = text_attr
|
|
869
|
+
return _sanitize_stream_sync(
|
|
870
|
+
payload, intro_value, to_json, skip_markers, strip_chars,
|
|
871
|
+
start_marker, end_marker, content_extractor, yield_raw_on_error,
|
|
872
|
+
encoding, encoding_errors, buffer_size, line_delimiter, error_handler,
|
|
873
|
+
skip_regexes, extract_regexes,
|
|
874
|
+
)
|
|
675
875
|
elif isinstance(content_attr, bytes):
|
|
676
|
-
|
|
677
|
-
|
|
678
|
-
|
|
679
|
-
|
|
876
|
+
try:
|
|
877
|
+
payload = content_attr.decode(encoding, encoding_errors)
|
|
878
|
+
except Exception:
|
|
879
|
+
payload = str(content_attr)
|
|
880
|
+
return _sanitize_stream_sync(
|
|
881
|
+
payload, intro_value, to_json, skip_markers, strip_chars,
|
|
882
|
+
start_marker, end_marker, content_extractor, yield_raw_on_error,
|
|
883
|
+
encoding, encoding_errors, buffer_size, line_delimiter, error_handler,
|
|
884
|
+
skip_regexes, extract_regexes,
|
|
885
|
+
)
|
|
680
886
|
|
|
681
|
-
#
|
|
682
|
-
if hasattr(
|
|
887
|
+
# Handle async iterables
|
|
888
|
+
if hasattr(data, "__aiter__"):
|
|
683
889
|
return _sanitize_stream_async(
|
|
684
|
-
|
|
890
|
+
data, intro_value, to_json, skip_markers, strip_chars,
|
|
891
|
+
start_marker, end_marker, content_extractor, yield_raw_on_error,
|
|
892
|
+
encoding, encoding_errors, buffer_size, line_delimiter, error_handler,
|
|
893
|
+
skip_regexes, extract_regexes,
|
|
894
|
+
)
|
|
895
|
+
# Handle sync iterables (but not strings/bytes)
|
|
896
|
+
if hasattr(data, "__iter__"):
|
|
897
|
+
return _sanitize_stream_sync(
|
|
898
|
+
data, intro_value, to_json, skip_markers, strip_chars,
|
|
685
899
|
start_marker, end_marker, content_extractor, yield_raw_on_error,
|
|
686
900
|
encoding, encoding_errors, buffer_size, line_delimiter, error_handler,
|
|
901
|
+
skip_regexes, extract_regexes,
|
|
687
902
|
)
|
|
903
|
+
# Fallback: treat as string
|
|
688
904
|
return _sanitize_stream_sync(
|
|
689
|
-
|
|
905
|
+
str(data), intro_value, to_json, skip_markers, strip_chars,
|
|
690
906
|
start_marker, end_marker, content_extractor, yield_raw_on_error,
|
|
691
907
|
encoding, encoding_errors, buffer_size, line_delimiter, error_handler,
|
|
908
|
+
skip_regexes, extract_regexes,
|
|
692
909
|
)
|
|
910
|
+
|
|
911
|
+
# --- Decorator version of sanitize_stream ---
|
|
912
|
+
import functools
|
|
913
|
+
from typing import overload
|
|
914
|
+
|
|
915
|
+
def _sanitize_stream_decorator(
|
|
916
|
+
_func=None,
|
|
917
|
+
*,
|
|
918
|
+
intro_value: str = "data:",
|
|
919
|
+
to_json: bool = True,
|
|
920
|
+
skip_markers: Optional[List[str]] = None,
|
|
921
|
+
strip_chars: Optional[str] = None,
|
|
922
|
+
start_marker: Optional[str] = None,
|
|
923
|
+
end_marker: Optional[str] = None,
|
|
924
|
+
content_extractor: Optional[Callable[[Union[str, Dict[str, Any]]], Optional[Any]]] = None,
|
|
925
|
+
yield_raw_on_error: bool = True,
|
|
926
|
+
encoding: EncodingType = "utf-8",
|
|
927
|
+
encoding_errors: str = "replace",
|
|
928
|
+
buffer_size: int = 8192,
|
|
929
|
+
line_delimiter: Optional[str] = None,
|
|
930
|
+
error_handler: Optional[Callable[[Exception, str], Optional[Any]]] = None,
|
|
931
|
+
skip_regexes: Optional[List[Union[str, Pattern[str]]]] = None,
|
|
932
|
+
extract_regexes: Optional[List[Union[str, Pattern[str]]]] = None,
|
|
933
|
+
object_mode: Literal["as_is", "json", "str"] = "json",
|
|
934
|
+
raw: bool = False,
|
|
935
|
+
):
|
|
936
|
+
"""
|
|
937
|
+
Decorator for sanitize_stream. Can be used as @sanitize_stream or @sanitize_stream(...).
|
|
938
|
+
All arguments are the same as sanitize_stream().
|
|
939
|
+
"""
|
|
940
|
+
def decorator(func):
|
|
941
|
+
if asyncio.iscoroutinefunction(func):
|
|
942
|
+
@functools.wraps(func)
|
|
943
|
+
async def async_wrapper(*args, **kwargs):
|
|
944
|
+
result = await func(*args, **kwargs)
|
|
945
|
+
return sanitize_stream(
|
|
946
|
+
result,
|
|
947
|
+
intro_value=intro_value,
|
|
948
|
+
to_json=to_json,
|
|
949
|
+
skip_markers=skip_markers,
|
|
950
|
+
strip_chars=strip_chars,
|
|
951
|
+
start_marker=start_marker,
|
|
952
|
+
end_marker=end_marker,
|
|
953
|
+
content_extractor=content_extractor,
|
|
954
|
+
yield_raw_on_error=yield_raw_on_error,
|
|
955
|
+
encoding=encoding,
|
|
956
|
+
encoding_errors=encoding_errors,
|
|
957
|
+
buffer_size=buffer_size,
|
|
958
|
+
line_delimiter=line_delimiter,
|
|
959
|
+
error_handler=error_handler,
|
|
960
|
+
skip_regexes=skip_regexes,
|
|
961
|
+
extract_regexes=extract_regexes,
|
|
962
|
+
object_mode=object_mode,
|
|
963
|
+
raw=raw,
|
|
964
|
+
)
|
|
965
|
+
return async_wrapper
|
|
966
|
+
else:
|
|
967
|
+
@functools.wraps(func)
|
|
968
|
+
def sync_wrapper(*args, **kwargs):
|
|
969
|
+
result = func(*args, **kwargs)
|
|
970
|
+
return sanitize_stream(
|
|
971
|
+
result,
|
|
972
|
+
intro_value=intro_value,
|
|
973
|
+
to_json=to_json,
|
|
974
|
+
skip_markers=skip_markers,
|
|
975
|
+
strip_chars=strip_chars,
|
|
976
|
+
start_marker=start_marker,
|
|
977
|
+
end_marker=end_marker,
|
|
978
|
+
content_extractor=content_extractor,
|
|
979
|
+
yield_raw_on_error=yield_raw_on_error,
|
|
980
|
+
encoding=encoding,
|
|
981
|
+
encoding_errors=encoding_errors,
|
|
982
|
+
buffer_size=buffer_size,
|
|
983
|
+
line_delimiter=line_delimiter,
|
|
984
|
+
error_handler=error_handler,
|
|
985
|
+
skip_regexes=skip_regexes,
|
|
986
|
+
extract_regexes=extract_regexes,
|
|
987
|
+
object_mode=object_mode,
|
|
988
|
+
raw=raw,
|
|
989
|
+
)
|
|
990
|
+
return sync_wrapper
|
|
991
|
+
if _func is None:
|
|
992
|
+
return decorator
|
|
993
|
+
else:
|
|
994
|
+
return decorator(_func)
|
|
995
|
+
|
|
996
|
+
# Alias for decorator usage
|
|
997
|
+
LITSTREAM = sanitize_stream
|
|
998
|
+
|
|
999
|
+
# Decorator aliases
|
|
1000
|
+
sanitize_stream_decorator = _sanitize_stream_decorator
|
|
1001
|
+
lit_streamer = _sanitize_stream_decorator
|
|
1002
|
+
|
|
1003
|
+
# Allow @sanitize_stream and @lit_streamer as decorators
|
|
1004
|
+
import asyncio
|
|
1005
|
+
sanitize_stream.__decorator__ = _sanitize_stream_decorator
|
|
1006
|
+
LITSTREAM.__decorator__ = _sanitize_stream_decorator
|
|
1007
|
+
lit_streamer.__decorator__ = _sanitize_stream_decorator
|
|
1008
|
+
|
|
1009
|
+
def __getattr__(name):
|
|
1010
|
+
if name == 'sanitize_stream':
|
|
1011
|
+
return sanitize_stream
|
|
1012
|
+
if name == 'LITSTREAM':
|
|
1013
|
+
return LITSTREAM
|
|
1014
|
+
if name == 'sanitize_stream_decorator':
|
|
1015
|
+
return _sanitize_stream_decorator
|
|
1016
|
+
if name == 'lit_streamer':
|
|
1017
|
+
return _sanitize_stream_decorator
|
|
1018
|
+
raise AttributeError(f"module {__name__} has no attribute {name}")
|
|
693
1019
|
from .conversation import Conversation # noqa: E402,F401
|
|
694
1020
|
from .Extra.autocoder import AutoCoder # noqa: E402,F401
|
|
695
1021
|
from .optimizers import Optimizers # noqa: E402,F401
|
webscout/Bard.py
CHANGED
|
@@ -81,21 +81,11 @@ class Model(Enum):
|
|
|
81
81
|
model_header (dict): Additional headers required for the model.
|
|
82
82
|
advanced_only (bool): Whether the model is available only for advanced users.
|
|
83
83
|
"""
|
|
84
|
-
#
|
|
84
|
+
# Only the specified models
|
|
85
85
|
UNSPECIFIED = ("unspecified", {}, False)
|
|
86
|
-
G_2_0_FLASH = (
|
|
87
|
-
"gemini-2.0-flash",
|
|
88
|
-
{"x-goog-ext-525001261-jspb": '[1,null,null,null,"f299729663a2343f"]'},
|
|
89
|
-
False,
|
|
90
|
-
)
|
|
91
|
-
G_2_0_FLASH_THINKING = (
|
|
92
|
-
"gemini-2.0-flash-thinking",
|
|
93
|
-
{"x-goog-ext-525001261-jspb": '[null,null,null,null,"7ca48d02d802f20a"]'},
|
|
94
|
-
False,
|
|
95
|
-
)
|
|
96
86
|
G_2_5_FLASH = (
|
|
97
87
|
"gemini-2.5-flash",
|
|
98
|
-
{"x-goog-ext-525001261-jspb": '[1,null,null,null,"
|
|
88
|
+
{"x-goog-ext-525001261-jspb": '[1,null,null,null,"71c2d248d3b102ff"]'},
|
|
99
89
|
False,
|
|
100
90
|
)
|
|
101
91
|
G_2_5_PRO = (
|
|
@@ -103,16 +93,6 @@ class Model(Enum):
|
|
|
103
93
|
{"x-goog-ext-525001261-jspb": '[1,null,null,null,"2525e3954d185b3c"]'},
|
|
104
94
|
False,
|
|
105
95
|
)
|
|
106
|
-
G_2_0_EXP_ADVANCED = (
|
|
107
|
-
"gemini-2.0-exp-advanced",
|
|
108
|
-
{"x-goog-ext-525001261-jspb": '[null,null,null,null,"b1e46a6037e6aa9f"]'},
|
|
109
|
-
True,
|
|
110
|
-
)
|
|
111
|
-
G_2_5_EXP_ADVANCED = (
|
|
112
|
-
"gemini-2.5-exp-advanced",
|
|
113
|
-
{"x-goog-ext-525001261-jspb": '[null,null,null,null,"203e6bb81620bcfe"]'},
|
|
114
|
-
True,
|
|
115
|
-
)
|
|
116
96
|
|
|
117
97
|
def __init__(self, name, header, advanced_only):
|
|
118
98
|
"""
|
webscout/Bing_search.py
CHANGED
|
@@ -175,8 +175,7 @@ class BingSearch:
|
|
|
175
175
|
url = self._first_page(keywords)['url']
|
|
176
176
|
urls_to_fetch = [url]
|
|
177
177
|
while len(fetched_results) < max_results and urls_to_fetch:
|
|
178
|
-
|
|
179
|
-
html_pages = list(executor.map(fetch_page, urls_to_fetch))
|
|
178
|
+
html_pages = list(self._executor.map(fetch_page, urls_to_fetch))
|
|
180
179
|
urls_to_fetch = []
|
|
181
180
|
for html in html_pages:
|
|
182
181
|
soup = BeautifulSoup(html, "html.parser")
|