webscout 8.2.2__py3-none-any.whl → 8.2.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of webscout might be problematic. Click here for more details.
- webscout/AIauto.py +112 -22
- webscout/AIbase.py +144 -7
- webscout/AIutel.py +249 -131
- webscout/Bard.py +579 -206
- webscout/DWEBS.py +78 -35
- webscout/__init__.py +0 -1
- webscout/cli.py +256 -0
- webscout/conversation.py +307 -436
- webscout/exceptions.py +23 -0
- webscout/prompt_manager.py +56 -42
- webscout/version.py +1 -1
- webscout/webscout_search.py +65 -47
- webscout/webscout_search_async.py +81 -126
- webscout/yep_search.py +93 -43
- {webscout-8.2.2.dist-info → webscout-8.2.7.dist-info}/METADATA +172 -52
- webscout-8.2.7.dist-info/RECORD +26 -0
- {webscout-8.2.2.dist-info → webscout-8.2.7.dist-info}/WHEEL +1 -1
- webscout-8.2.7.dist-info/entry_points.txt +3 -0
- webscout-8.2.7.dist-info/top_level.txt +1 -0
- inferno/__init__.py +0 -6
- inferno/__main__.py +0 -9
- inferno/cli.py +0 -6
- webscout/Extra/GitToolkit/__init__.py +0 -10
- webscout/Extra/GitToolkit/gitapi/__init__.py +0 -12
- webscout/Extra/GitToolkit/gitapi/repository.py +0 -195
- webscout/Extra/GitToolkit/gitapi/user.py +0 -96
- webscout/Extra/GitToolkit/gitapi/utils.py +0 -62
- webscout/Extra/YTToolkit/YTdownloader.py +0 -957
- webscout/Extra/YTToolkit/__init__.py +0 -3
- webscout/Extra/YTToolkit/transcriber.py +0 -476
- webscout/Extra/YTToolkit/ytapi/__init__.py +0 -6
- webscout/Extra/YTToolkit/ytapi/channel.py +0 -307
- webscout/Extra/YTToolkit/ytapi/errors.py +0 -13
- webscout/Extra/YTToolkit/ytapi/extras.py +0 -45
- webscout/Extra/YTToolkit/ytapi/https.py +0 -88
- webscout/Extra/YTToolkit/ytapi/patterns.py +0 -61
- webscout/Extra/YTToolkit/ytapi/playlist.py +0 -59
- webscout/Extra/YTToolkit/ytapi/pool.py +0 -8
- webscout/Extra/YTToolkit/ytapi/query.py +0 -40
- webscout/Extra/YTToolkit/ytapi/stream.py +0 -63
- webscout/Extra/YTToolkit/ytapi/utils.py +0 -62
- webscout/Extra/YTToolkit/ytapi/video.py +0 -232
- webscout/Extra/__init__.py +0 -7
- webscout/Extra/autocoder/__init__.py +0 -9
- webscout/Extra/autocoder/autocoder.py +0 -849
- webscout/Extra/autocoder/autocoder_utiles.py +0 -332
- webscout/Extra/gguf.py +0 -682
- webscout/Extra/tempmail/__init__.py +0 -28
- webscout/Extra/tempmail/async_utils.py +0 -141
- webscout/Extra/tempmail/base.py +0 -161
- webscout/Extra/tempmail/cli.py +0 -187
- webscout/Extra/tempmail/emailnator.py +0 -84
- webscout/Extra/tempmail/mail_tm.py +0 -361
- webscout/Extra/tempmail/temp_mail_io.py +0 -292
- webscout/Extra/weather.py +0 -194
- webscout/Extra/weather_ascii.py +0 -76
- webscout/LLM.py +0 -442
- webscout/Litlogger/__init__.py +0 -67
- webscout/Litlogger/core/__init__.py +0 -6
- webscout/Litlogger/core/level.py +0 -23
- webscout/Litlogger/core/logger.py +0 -165
- webscout/Litlogger/handlers/__init__.py +0 -12
- webscout/Litlogger/handlers/console.py +0 -33
- webscout/Litlogger/handlers/file.py +0 -143
- webscout/Litlogger/handlers/network.py +0 -173
- webscout/Litlogger/styles/__init__.py +0 -7
- webscout/Litlogger/styles/colors.py +0 -249
- webscout/Litlogger/styles/formats.py +0 -458
- webscout/Litlogger/styles/text.py +0 -87
- webscout/Litlogger/utils/__init__.py +0 -6
- webscout/Litlogger/utils/detectors.py +0 -153
- webscout/Litlogger/utils/formatters.py +0 -200
- webscout/Local/__init__.py +0 -12
- webscout/Local/__main__.py +0 -9
- webscout/Local/api.py +0 -576
- webscout/Local/cli.py +0 -516
- webscout/Local/config.py +0 -75
- webscout/Local/llm.py +0 -287
- webscout/Local/model_manager.py +0 -253
- webscout/Local/server.py +0 -721
- webscout/Local/utils.py +0 -93
- webscout/Provider/AI21.py +0 -177
- webscout/Provider/AISEARCH/DeepFind.py +0 -250
- webscout/Provider/AISEARCH/ISou.py +0 -256
- webscout/Provider/AISEARCH/Perplexity.py +0 -359
- webscout/Provider/AISEARCH/__init__.py +0 -10
- webscout/Provider/AISEARCH/felo_search.py +0 -228
- webscout/Provider/AISEARCH/genspark_search.py +0 -208
- webscout/Provider/AISEARCH/hika_search.py +0 -194
- webscout/Provider/AISEARCH/iask_search.py +0 -436
- webscout/Provider/AISEARCH/monica_search.py +0 -246
- webscout/Provider/AISEARCH/scira_search.py +0 -324
- webscout/Provider/AISEARCH/webpilotai_search.py +0 -281
- webscout/Provider/Aitopia.py +0 -292
- webscout/Provider/AllenAI.py +0 -413
- webscout/Provider/Andi.py +0 -228
- webscout/Provider/Blackboxai.py +0 -229
- webscout/Provider/C4ai.py +0 -432
- webscout/Provider/ChatGPTClone.py +0 -226
- webscout/Provider/ChatGPTES.py +0 -237
- webscout/Provider/ChatGPTGratis.py +0 -194
- webscout/Provider/Chatify.py +0 -175
- webscout/Provider/Cloudflare.py +0 -273
- webscout/Provider/Cohere.py +0 -208
- webscout/Provider/DeepSeek.py +0 -196
- webscout/Provider/Deepinfra.py +0 -297
- webscout/Provider/ElectronHub.py +0 -709
- webscout/Provider/ExaAI.py +0 -261
- webscout/Provider/ExaChat.py +0 -342
- webscout/Provider/Free2GPT.py +0 -241
- webscout/Provider/GPTWeb.py +0 -193
- webscout/Provider/Gemini.py +0 -169
- webscout/Provider/GithubChat.py +0 -367
- webscout/Provider/Glider.py +0 -211
- webscout/Provider/Groq.py +0 -670
- webscout/Provider/HF_space/__init__.py +0 -0
- webscout/Provider/HF_space/qwen_qwen2.py +0 -206
- webscout/Provider/HeckAI.py +0 -233
- webscout/Provider/HuggingFaceChat.py +0 -462
- webscout/Provider/Hunyuan.py +0 -272
- webscout/Provider/Jadve.py +0 -266
- webscout/Provider/Koboldai.py +0 -381
- webscout/Provider/LambdaChat.py +0 -392
- webscout/Provider/Llama.py +0 -200
- webscout/Provider/Llama3.py +0 -204
- webscout/Provider/Marcus.py +0 -148
- webscout/Provider/Netwrck.py +0 -228
- webscout/Provider/OLLAMA.py +0 -396
- webscout/Provider/OPENAI/__init__.py +0 -25
- webscout/Provider/OPENAI/base.py +0 -46
- webscout/Provider/OPENAI/c4ai.py +0 -367
- webscout/Provider/OPENAI/chatgpt.py +0 -549
- webscout/Provider/OPENAI/chatgptclone.py +0 -460
- webscout/Provider/OPENAI/deepinfra.py +0 -272
- webscout/Provider/OPENAI/e2b.py +0 -1350
- webscout/Provider/OPENAI/exaai.py +0 -404
- webscout/Provider/OPENAI/exachat.py +0 -433
- webscout/Provider/OPENAI/freeaichat.py +0 -352
- webscout/Provider/OPENAI/glider.py +0 -316
- webscout/Provider/OPENAI/heckai.py +0 -337
- webscout/Provider/OPENAI/llmchatco.py +0 -327
- webscout/Provider/OPENAI/netwrck.py +0 -348
- webscout/Provider/OPENAI/opkfc.py +0 -488
- webscout/Provider/OPENAI/scirachat.py +0 -463
- webscout/Provider/OPENAI/sonus.py +0 -294
- webscout/Provider/OPENAI/standardinput.py +0 -425
- webscout/Provider/OPENAI/textpollinations.py +0 -285
- webscout/Provider/OPENAI/toolbaz.py +0 -405
- webscout/Provider/OPENAI/typegpt.py +0 -346
- webscout/Provider/OPENAI/uncovrAI.py +0 -455
- webscout/Provider/OPENAI/utils.py +0 -211
- webscout/Provider/OPENAI/venice.py +0 -413
- webscout/Provider/OPENAI/wisecat.py +0 -381
- webscout/Provider/OPENAI/writecream.py +0 -156
- webscout/Provider/OPENAI/x0gpt.py +0 -371
- webscout/Provider/OPENAI/yep.py +0 -327
- webscout/Provider/OpenGPT.py +0 -199
- webscout/Provider/Openai.py +0 -496
- webscout/Provider/PI.py +0 -344
- webscout/Provider/Perplexitylabs.py +0 -415
- webscout/Provider/Phind.py +0 -535
- webscout/Provider/PizzaGPT.py +0 -198
- webscout/Provider/QwenLM.py +0 -254
- webscout/Provider/Reka.py +0 -214
- webscout/Provider/StandardInput.py +0 -278
- webscout/Provider/TTI/AiForce/__init__.py +0 -22
- webscout/Provider/TTI/AiForce/async_aiforce.py +0 -224
- webscout/Provider/TTI/AiForce/sync_aiforce.py +0 -245
- webscout/Provider/TTI/FreeAIPlayground/__init__.py +0 -9
- webscout/Provider/TTI/FreeAIPlayground/async_freeaiplayground.py +0 -181
- webscout/Provider/TTI/FreeAIPlayground/sync_freeaiplayground.py +0 -180
- webscout/Provider/TTI/ImgSys/__init__.py +0 -23
- webscout/Provider/TTI/ImgSys/async_imgsys.py +0 -202
- webscout/Provider/TTI/ImgSys/sync_imgsys.py +0 -195
- webscout/Provider/TTI/MagicStudio/__init__.py +0 -2
- webscout/Provider/TTI/MagicStudio/async_magicstudio.py +0 -111
- webscout/Provider/TTI/MagicStudio/sync_magicstudio.py +0 -109
- webscout/Provider/TTI/Nexra/__init__.py +0 -22
- webscout/Provider/TTI/Nexra/async_nexra.py +0 -286
- webscout/Provider/TTI/Nexra/sync_nexra.py +0 -258
- webscout/Provider/TTI/PollinationsAI/__init__.py +0 -23
- webscout/Provider/TTI/PollinationsAI/async_pollinations.py +0 -311
- webscout/Provider/TTI/PollinationsAI/sync_pollinations.py +0 -265
- webscout/Provider/TTI/__init__.py +0 -12
- webscout/Provider/TTI/aiarta/__init__.py +0 -2
- webscout/Provider/TTI/aiarta/async_aiarta.py +0 -482
- webscout/Provider/TTI/aiarta/sync_aiarta.py +0 -440
- webscout/Provider/TTI/artbit/__init__.py +0 -22
- webscout/Provider/TTI/artbit/async_artbit.py +0 -155
- webscout/Provider/TTI/artbit/sync_artbit.py +0 -148
- webscout/Provider/TTI/fastflux/__init__.py +0 -22
- webscout/Provider/TTI/fastflux/async_fastflux.py +0 -261
- webscout/Provider/TTI/fastflux/sync_fastflux.py +0 -252
- webscout/Provider/TTI/huggingface/__init__.py +0 -22
- webscout/Provider/TTI/huggingface/async_huggingface.py +0 -199
- webscout/Provider/TTI/huggingface/sync_huggingface.py +0 -195
- webscout/Provider/TTI/piclumen/__init__.py +0 -23
- webscout/Provider/TTI/piclumen/async_piclumen.py +0 -268
- webscout/Provider/TTI/piclumen/sync_piclumen.py +0 -233
- webscout/Provider/TTI/pixelmuse/__init__.py +0 -4
- webscout/Provider/TTI/pixelmuse/async_pixelmuse.py +0 -249
- webscout/Provider/TTI/pixelmuse/sync_pixelmuse.py +0 -182
- webscout/Provider/TTI/talkai/__init__.py +0 -4
- webscout/Provider/TTI/talkai/async_talkai.py +0 -229
- webscout/Provider/TTI/talkai/sync_talkai.py +0 -207
- webscout/Provider/TTS/__init__.py +0 -7
- webscout/Provider/TTS/deepgram.py +0 -156
- webscout/Provider/TTS/elevenlabs.py +0 -111
- webscout/Provider/TTS/gesserit.py +0 -127
- webscout/Provider/TTS/murfai.py +0 -113
- webscout/Provider/TTS/parler.py +0 -111
- webscout/Provider/TTS/speechma.py +0 -180
- webscout/Provider/TTS/streamElements.py +0 -333
- webscout/Provider/TTS/utils.py +0 -280
- webscout/Provider/TeachAnything.py +0 -187
- webscout/Provider/TextPollinationsAI.py +0 -231
- webscout/Provider/TwoAI.py +0 -199
- webscout/Provider/Venice.py +0 -219
- webscout/Provider/VercelAI.py +0 -234
- webscout/Provider/WebSim.py +0 -228
- webscout/Provider/WiseCat.py +0 -196
- webscout/Provider/Writecream.py +0 -211
- webscout/Provider/WritingMate.py +0 -197
- webscout/Provider/Youchat.py +0 -330
- webscout/Provider/__init__.py +0 -198
- webscout/Provider/ai4chat.py +0 -202
- webscout/Provider/aimathgpt.py +0 -189
- webscout/Provider/akashgpt.py +0 -342
- webscout/Provider/askmyai.py +0 -158
- webscout/Provider/asksteve.py +0 -203
- webscout/Provider/bagoodex.py +0 -145
- webscout/Provider/cerebras.py +0 -242
- webscout/Provider/chatglm.py +0 -205
- webscout/Provider/cleeai.py +0 -213
- webscout/Provider/copilot.py +0 -428
- webscout/Provider/elmo.py +0 -234
- webscout/Provider/freeaichat.py +0 -271
- webscout/Provider/gaurish.py +0 -244
- webscout/Provider/geminiapi.py +0 -208
- webscout/Provider/geminiprorealtime.py +0 -160
- webscout/Provider/granite.py +0 -187
- webscout/Provider/hermes.py +0 -219
- webscout/Provider/julius.py +0 -223
- webscout/Provider/koala.py +0 -268
- webscout/Provider/labyrinth.py +0 -340
- webscout/Provider/learnfastai.py +0 -266
- webscout/Provider/lepton.py +0 -194
- webscout/Provider/llama3mitril.py +0 -180
- webscout/Provider/llamatutor.py +0 -192
- webscout/Provider/llmchat.py +0 -213
- webscout/Provider/llmchatco.py +0 -311
- webscout/Provider/meta.py +0 -794
- webscout/Provider/multichat.py +0 -325
- webscout/Provider/promptrefine.py +0 -193
- webscout/Provider/scira_chat.py +0 -277
- webscout/Provider/scnet.py +0 -187
- webscout/Provider/searchchat.py +0 -293
- webscout/Provider/sonus.py +0 -208
- webscout/Provider/talkai.py +0 -194
- webscout/Provider/toolbaz.py +0 -320
- webscout/Provider/turboseek.py +0 -219
- webscout/Provider/tutorai.py +0 -252
- webscout/Provider/typefully.py +0 -280
- webscout/Provider/typegpt.py +0 -232
- webscout/Provider/uncovr.py +0 -312
- webscout/Provider/x0gpt.py +0 -256
- webscout/Provider/yep.py +0 -376
- webscout/litagent/__init__.py +0 -29
- webscout/litagent/agent.py +0 -455
- webscout/litagent/constants.py +0 -60
- webscout/litprinter/__init__.py +0 -59
- webscout/scout/__init__.py +0 -8
- webscout/scout/core/__init__.py +0 -7
- webscout/scout/core/crawler.py +0 -140
- webscout/scout/core/scout.py +0 -568
- webscout/scout/core/search_result.py +0 -96
- webscout/scout/core/text_analyzer.py +0 -63
- webscout/scout/core/text_utils.py +0 -277
- webscout/scout/core/web_analyzer.py +0 -52
- webscout/scout/core.py +0 -881
- webscout/scout/element.py +0 -460
- webscout/scout/parsers/__init__.py +0 -69
- webscout/scout/parsers/html5lib_parser.py +0 -172
- webscout/scout/parsers/html_parser.py +0 -236
- webscout/scout/parsers/lxml_parser.py +0 -178
- webscout/scout/utils.py +0 -37
- webscout/swiftcli/__init__.py +0 -809
- webscout/zeroart/__init__.py +0 -55
- webscout/zeroart/base.py +0 -60
- webscout/zeroart/effects.py +0 -99
- webscout/zeroart/fonts.py +0 -816
- webscout-8.2.2.dist-info/RECORD +0 -309
- webscout-8.2.2.dist-info/entry_points.txt +0 -5
- webscout-8.2.2.dist-info/top_level.txt +0 -3
- webstoken/__init__.py +0 -30
- webstoken/classifier.py +0 -189
- webstoken/keywords.py +0 -216
- webstoken/language.py +0 -128
- webstoken/ner.py +0 -164
- webstoken/normalizer.py +0 -35
- webstoken/processor.py +0 -77
- webstoken/sentiment.py +0 -206
- webstoken/stemmer.py +0 -73
- webstoken/tagger.py +0 -60
- webstoken/tokenizer.py +0 -158
- {webscout-8.2.2.dist-info → webscout-8.2.7.dist-info/licenses}/LICENSE.md +0 -0
webscout/AIutel.py
CHANGED
|
@@ -1,132 +1,250 @@
|
|
|
1
|
-
import json
|
|
2
|
-
import
|
|
3
|
-
import
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
"""
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
1
|
+
import json
|
|
2
|
+
from typing import Union, Optional, Dict, Any, Iterable, Generator, List, Callable, Literal
|
|
3
|
+
import codecs
|
|
4
|
+
|
|
5
|
+
# Expanded encoding types
|
|
6
|
+
EncodingType = Literal['utf-8', 'utf-16', 'utf-32', 'ascii', 'latin1', 'cp1252', 'iso-8859-1',
|
|
7
|
+
'iso-8859-2', 'windows-1250', 'windows-1251', 'windows-1252', 'gbk', 'big5',
|
|
8
|
+
'shift_jis', 'euc-jp', 'euc-kr']
|
|
9
|
+
|
|
10
|
+
def _process_chunk(
|
|
11
|
+
chunk: str,
|
|
12
|
+
intro_value: str,
|
|
13
|
+
to_json: bool,
|
|
14
|
+
skip_markers: List[str],
|
|
15
|
+
strip_chars: Optional[str],
|
|
16
|
+
yield_raw_on_error: bool,
|
|
17
|
+
) -> Union[str, Dict[str, Any], None]:
|
|
18
|
+
"""Internal helper to sanitize and potentially parse a single chunk."""
|
|
19
|
+
if not isinstance(chunk, str):
|
|
20
|
+
return None
|
|
21
|
+
|
|
22
|
+
# Fast path for empty chunks
|
|
23
|
+
if not chunk:
|
|
24
|
+
return None
|
|
25
|
+
|
|
26
|
+
# Use slicing for prefix removal (faster than startswith+slicing)
|
|
27
|
+
sanitized_chunk = chunk
|
|
28
|
+
if intro_value and len(chunk) >= len(intro_value) and chunk[:len(intro_value)] == intro_value:
|
|
29
|
+
sanitized_chunk = chunk[len(intro_value):]
|
|
30
|
+
|
|
31
|
+
# Optimize string stripping operations
|
|
32
|
+
if strip_chars is not None:
|
|
33
|
+
sanitized_chunk = sanitized_chunk.strip(strip_chars)
|
|
34
|
+
else:
|
|
35
|
+
# lstrip() is faster than strip() when we only need leading whitespace removed
|
|
36
|
+
sanitized_chunk = sanitized_chunk.lstrip()
|
|
37
|
+
|
|
38
|
+
# Skip empty chunks and markers
|
|
39
|
+
if not sanitized_chunk or any(marker == sanitized_chunk for marker in skip_markers):
|
|
40
|
+
return None
|
|
41
|
+
|
|
42
|
+
# JSON parsing with optimized error handling
|
|
43
|
+
if to_json:
|
|
44
|
+
try:
|
|
45
|
+
# Only strip before JSON parsing if needed
|
|
46
|
+
if sanitized_chunk[0] not in '{[' or sanitized_chunk[-1] not in '}]':
|
|
47
|
+
sanitized_chunk = sanitized_chunk.strip()
|
|
48
|
+
return json.loads(sanitized_chunk)
|
|
49
|
+
except (json.JSONDecodeError, Exception):
|
|
50
|
+
return sanitized_chunk if yield_raw_on_error else None
|
|
51
|
+
|
|
52
|
+
return sanitized_chunk
|
|
53
|
+
|
|
54
|
+
def _decode_byte_stream(
|
|
55
|
+
byte_iterator: Iterable[bytes],
|
|
56
|
+
encoding: EncodingType = 'utf-8',
|
|
57
|
+
errors: str = 'replace',
|
|
58
|
+
buffer_size: int = 8192
|
|
59
|
+
) -> Generator[str, None, None]:
|
|
60
|
+
"""
|
|
61
|
+
Realtime byte stream decoder with flexible encoding support.
|
|
62
|
+
|
|
63
|
+
Args:
|
|
64
|
+
byte_iterator: Iterator yielding bytes
|
|
65
|
+
encoding: Character encoding to use
|
|
66
|
+
errors: How to handle encoding errors ('strict', 'ignore', 'replace')
|
|
67
|
+
buffer_size: Size of internal buffer for performance tuning
|
|
68
|
+
"""
|
|
69
|
+
# Initialize decoder with the specified encoding
|
|
70
|
+
try:
|
|
71
|
+
decoder = codecs.getincrementaldecoder(encoding)(errors=errors)
|
|
72
|
+
except LookupError:
|
|
73
|
+
# Fallback to utf-8 if the encoding is not supported
|
|
74
|
+
decoder = codecs.getincrementaldecoder('utf-8')(errors=errors)
|
|
75
|
+
|
|
76
|
+
# Process byte stream in realtime
|
|
77
|
+
buffer = bytearray(buffer_size)
|
|
78
|
+
buffer_view = memoryview(buffer)
|
|
79
|
+
|
|
80
|
+
for chunk_bytes in byte_iterator:
|
|
81
|
+
if not chunk_bytes:
|
|
82
|
+
continue
|
|
83
|
+
|
|
84
|
+
try:
|
|
85
|
+
# Use buffer for processing if chunk size is appropriate
|
|
86
|
+
if len(chunk_bytes) <= buffer_size:
|
|
87
|
+
buffer[:len(chunk_bytes)] = chunk_bytes
|
|
88
|
+
text = decoder.decode(buffer_view[:len(chunk_bytes)], final=False)
|
|
89
|
+
else:
|
|
90
|
+
text = decoder.decode(chunk_bytes, final=False)
|
|
91
|
+
|
|
92
|
+
if text:
|
|
93
|
+
yield text
|
|
94
|
+
except UnicodeDecodeError:
|
|
95
|
+
yield f"[Encoding Error: Could not decode bytes with {encoding}]\n"
|
|
96
|
+
|
|
97
|
+
# Final flush
|
|
98
|
+
try:
|
|
99
|
+
final_text = decoder.decode(b'', final=True)
|
|
100
|
+
if final_text:
|
|
101
|
+
yield final_text
|
|
102
|
+
except UnicodeDecodeError:
|
|
103
|
+
yield f"[Encoding Error: Could not decode final bytes with {encoding}]\n"
|
|
104
|
+
|
|
105
|
+
def sanitize_stream(
|
|
106
|
+
data: Union[str, Iterable[str], Iterable[bytes]],
|
|
107
|
+
intro_value: str = "data:",
|
|
108
|
+
to_json: bool = True,
|
|
109
|
+
skip_markers: Optional[List[str]] = None,
|
|
110
|
+
strip_chars: Optional[str] = None,
|
|
111
|
+
start_marker: Optional[str] = None,
|
|
112
|
+
end_marker: Optional[str] = None,
|
|
113
|
+
content_extractor: Optional[Callable[[Union[str, Dict[str, Any]]], Optional[Any]]] = None,
|
|
114
|
+
yield_raw_on_error: bool = True,
|
|
115
|
+
encoding: EncodingType = 'utf-8',
|
|
116
|
+
encoding_errors: str = 'replace',
|
|
117
|
+
buffer_size: int = 8192,
|
|
118
|
+
) -> Generator[Any, None, None]:
|
|
119
|
+
"""
|
|
120
|
+
Optimized realtime stream processor that handles string/byte streams with minimal latency.
|
|
121
|
+
|
|
122
|
+
Features:
|
|
123
|
+
- Direct realtime processing of byte streams
|
|
124
|
+
- Optimized string handling and JSON parsing
|
|
125
|
+
- Robust error handling and validation
|
|
126
|
+
- Flexible encoding support with memory-efficient buffering
|
|
127
|
+
- High performance for large streams
|
|
128
|
+
|
|
129
|
+
Args:
|
|
130
|
+
data: Input data (string, string iterator, or bytes iterator)
|
|
131
|
+
intro_value: Prefix to remove from each chunk
|
|
132
|
+
to_json: Whether to parse chunks as JSON
|
|
133
|
+
skip_markers: Markers to skip
|
|
134
|
+
strip_chars: Characters to strip
|
|
135
|
+
start_marker: Processing start marker
|
|
136
|
+
end_marker: Processing end marker
|
|
137
|
+
content_extractor: Function to extract content
|
|
138
|
+
yield_raw_on_error: Yield raw content on JSON errors
|
|
139
|
+
encoding: Character encoding for byte streams
|
|
140
|
+
encoding_errors: How to handle encoding errors
|
|
141
|
+
buffer_size: Size of internal processing buffer
|
|
142
|
+
|
|
143
|
+
Yields:
|
|
144
|
+
Processed chunks (string or dictionary)
|
|
145
|
+
"""
|
|
146
|
+
effective_skip_markers = skip_markers or []
|
|
147
|
+
processing_active = start_marker is None
|
|
148
|
+
|
|
149
|
+
# Fast path for single string processing
|
|
150
|
+
if isinstance(data, str):
|
|
151
|
+
processed_item = None
|
|
152
|
+
if processing_active:
|
|
153
|
+
# Optimize JSON parsing for large strings
|
|
154
|
+
if to_json:
|
|
155
|
+
try:
|
|
156
|
+
# Use faster JSON parser for large strings
|
|
157
|
+
data = data.strip()
|
|
158
|
+
if data:
|
|
159
|
+
processed_item = json.loads(data)
|
|
160
|
+
except json.JSONDecodeError:
|
|
161
|
+
processed_item = data if yield_raw_on_error else None
|
|
162
|
+
else:
|
|
163
|
+
processed_item = _process_chunk(
|
|
164
|
+
data, intro_value, False, effective_skip_markers,
|
|
165
|
+
strip_chars, yield_raw_on_error
|
|
166
|
+
)
|
|
167
|
+
|
|
168
|
+
if processed_item is not None:
|
|
169
|
+
if content_extractor:
|
|
170
|
+
try:
|
|
171
|
+
final_content = content_extractor(processed_item)
|
|
172
|
+
if final_content is not None:
|
|
173
|
+
yield final_content
|
|
174
|
+
except Exception:
|
|
175
|
+
pass
|
|
176
|
+
else:
|
|
177
|
+
yield processed_item
|
|
178
|
+
return
|
|
179
|
+
|
|
180
|
+
# Stream processing path
|
|
181
|
+
if not hasattr(data, '__iter__'):
|
|
182
|
+
raise TypeError(f"Input must be a string or an iterable, not {type(data).__name__}")
|
|
183
|
+
|
|
184
|
+
try:
|
|
185
|
+
iterator = iter(data)
|
|
186
|
+
first_item = next(iterator, None)
|
|
187
|
+
if first_item is None:
|
|
188
|
+
return
|
|
189
|
+
|
|
190
|
+
# Efficient streaming with itertools
|
|
191
|
+
from itertools import chain
|
|
192
|
+
stream = chain([first_item], iterator)
|
|
193
|
+
|
|
194
|
+
# Determine if we're dealing with bytes or strings
|
|
195
|
+
if isinstance(first_item, bytes):
|
|
196
|
+
line_iterator = _decode_byte_stream(
|
|
197
|
+
stream,
|
|
198
|
+
encoding=encoding,
|
|
199
|
+
errors=encoding_errors,
|
|
200
|
+
buffer_size=buffer_size
|
|
201
|
+
)
|
|
202
|
+
elif isinstance(first_item, str):
|
|
203
|
+
line_iterator = stream
|
|
204
|
+
else:
|
|
205
|
+
raise TypeError(f"Stream must yield strings or bytes, not {type(first_item).__name__}")
|
|
206
|
+
|
|
207
|
+
# Process stream with minimal allocations
|
|
208
|
+
for line in line_iterator:
|
|
209
|
+
if not line:
|
|
210
|
+
continue
|
|
211
|
+
|
|
212
|
+
# Handle markers efficiently
|
|
213
|
+
if not processing_active and start_marker is not None:
|
|
214
|
+
if line.strip() == start_marker:
|
|
215
|
+
processing_active = True
|
|
216
|
+
continue
|
|
217
|
+
|
|
218
|
+
if processing_active and end_marker is not None and line.strip() == end_marker:
|
|
219
|
+
processing_active = False
|
|
220
|
+
continue
|
|
221
|
+
|
|
222
|
+
if processing_active:
|
|
223
|
+
# Process chunk with optimized function
|
|
224
|
+
processed = _process_chunk(
|
|
225
|
+
line, intro_value, to_json, effective_skip_markers,
|
|
226
|
+
strip_chars, yield_raw_on_error
|
|
227
|
+
)
|
|
228
|
+
|
|
229
|
+
if processed is not None:
|
|
230
|
+
if content_extractor:
|
|
231
|
+
try:
|
|
232
|
+
final_content = content_extractor(processed)
|
|
233
|
+
if final_content is not None:
|
|
234
|
+
yield final_content
|
|
235
|
+
except Exception:
|
|
236
|
+
# Continue on extraction errors
|
|
237
|
+
pass
|
|
238
|
+
else:
|
|
239
|
+
yield processed
|
|
240
|
+
|
|
241
|
+
except Exception as e:
|
|
242
|
+
# Log error but don't crash on stream processing exceptions
|
|
243
|
+
import sys
|
|
244
|
+
print(f"Stream processing error: {str(e)}", file=sys.stderr)
|
|
245
|
+
|
|
246
|
+
|
|
247
|
+
from .conversation import Conversation
|
|
248
|
+
from .optimizers import Optimizers
|
|
249
|
+
from .Extra.autocoder import AutoCoder
|
|
132
250
|
from .prompt_manager import AwesomePrompts
|