webscout 8.2.9__py3-none-any.whl → 2026.1.19__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- webscout/AIauto.py +524 -251
- webscout/AIbase.py +247 -319
- webscout/AIutel.py +68 -703
- webscout/Bard.py +1072 -1026
- webscout/Extra/GitToolkit/__init__.py +10 -10
- webscout/Extra/GitToolkit/gitapi/__init__.py +20 -12
- webscout/Extra/GitToolkit/gitapi/gist.py +142 -0
- webscout/Extra/GitToolkit/gitapi/organization.py +91 -0
- webscout/Extra/GitToolkit/gitapi/repository.py +308 -195
- webscout/Extra/GitToolkit/gitapi/search.py +162 -0
- webscout/Extra/GitToolkit/gitapi/trending.py +236 -0
- webscout/Extra/GitToolkit/gitapi/user.py +128 -96
- webscout/Extra/GitToolkit/gitapi/utils.py +82 -62
- webscout/Extra/YTToolkit/README.md +443 -375
- webscout/Extra/YTToolkit/YTdownloader.py +953 -957
- webscout/Extra/YTToolkit/__init__.py +3 -3
- webscout/Extra/YTToolkit/transcriber.py +595 -476
- webscout/Extra/YTToolkit/ytapi/README.md +230 -44
- webscout/Extra/YTToolkit/ytapi/__init__.py +22 -6
- webscout/Extra/YTToolkit/ytapi/captions.py +190 -0
- webscout/Extra/YTToolkit/ytapi/channel.py +302 -307
- webscout/Extra/YTToolkit/ytapi/errors.py +13 -13
- webscout/Extra/YTToolkit/ytapi/extras.py +178 -118
- webscout/Extra/YTToolkit/ytapi/hashtag.py +120 -0
- webscout/Extra/YTToolkit/ytapi/https.py +89 -88
- webscout/Extra/YTToolkit/ytapi/patterns.py +61 -61
- webscout/Extra/YTToolkit/ytapi/playlist.py +59 -59
- webscout/Extra/YTToolkit/ytapi/pool.py +8 -8
- webscout/Extra/YTToolkit/ytapi/query.py +143 -40
- webscout/Extra/YTToolkit/ytapi/shorts.py +122 -0
- webscout/Extra/YTToolkit/ytapi/stream.py +68 -63
- webscout/Extra/YTToolkit/ytapi/suggestions.py +97 -0
- webscout/Extra/YTToolkit/ytapi/utils.py +66 -62
- webscout/Extra/YTToolkit/ytapi/video.py +403 -232
- webscout/Extra/__init__.py +2 -3
- webscout/Extra/gguf.py +1298 -684
- webscout/Extra/tempmail/README.md +487 -487
- webscout/Extra/tempmail/__init__.py +28 -28
- webscout/Extra/tempmail/async_utils.py +143 -141
- webscout/Extra/tempmail/base.py +172 -161
- webscout/Extra/tempmail/cli.py +191 -187
- webscout/Extra/tempmail/emailnator.py +88 -84
- webscout/Extra/tempmail/mail_tm.py +378 -361
- webscout/Extra/tempmail/temp_mail_io.py +304 -292
- webscout/Extra/weather.py +196 -194
- webscout/Extra/weather_ascii.py +17 -15
- webscout/Provider/AISEARCH/PERPLEXED_search.py +175 -0
- webscout/Provider/AISEARCH/Perplexity.py +292 -333
- webscout/Provider/AISEARCH/README.md +106 -279
- webscout/Provider/AISEARCH/__init__.py +16 -9
- webscout/Provider/AISEARCH/brave_search.py +298 -0
- webscout/Provider/AISEARCH/iask_search.py +357 -410
- webscout/Provider/AISEARCH/monica_search.py +200 -220
- webscout/Provider/AISEARCH/webpilotai_search.py +242 -255
- webscout/Provider/Algion.py +413 -0
- webscout/Provider/Andi.py +74 -69
- webscout/Provider/Apriel.py +313 -0
- webscout/Provider/Ayle.py +323 -0
- webscout/Provider/ChatSandbox.py +329 -342
- webscout/Provider/ClaudeOnline.py +365 -0
- webscout/Provider/Cohere.py +232 -208
- webscout/Provider/DeepAI.py +367 -0
- webscout/Provider/Deepinfra.py +467 -340
- webscout/Provider/EssentialAI.py +217 -0
- webscout/Provider/ExaAI.py +274 -261
- webscout/Provider/Gemini.py +175 -169
- webscout/Provider/GithubChat.py +385 -369
- webscout/Provider/Gradient.py +286 -0
- webscout/Provider/Groq.py +556 -801
- webscout/Provider/HadadXYZ.py +323 -0
- webscout/Provider/HeckAI.py +392 -375
- webscout/Provider/HuggingFace.py +387 -0
- webscout/Provider/IBM.py +340 -0
- webscout/Provider/Jadve.py +317 -291
- webscout/Provider/K2Think.py +306 -0
- webscout/Provider/Koboldai.py +221 -384
- webscout/Provider/Netwrck.py +273 -270
- webscout/Provider/Nvidia.py +310 -0
- webscout/Provider/OPENAI/DeepAI.py +489 -0
- webscout/Provider/OPENAI/K2Think.py +423 -0
- webscout/Provider/OPENAI/PI.py +463 -0
- webscout/Provider/OPENAI/README.md +890 -952
- webscout/Provider/OPENAI/TogetherAI.py +405 -0
- webscout/Provider/OPENAI/TwoAI.py +255 -357
- webscout/Provider/OPENAI/__init__.py +148 -40
- webscout/Provider/OPENAI/ai4chat.py +348 -293
- webscout/Provider/OPENAI/akashgpt.py +436 -0
- webscout/Provider/OPENAI/algion.py +303 -0
- webscout/Provider/OPENAI/{exachat.py → ayle.py} +365 -444
- webscout/Provider/OPENAI/base.py +253 -249
- webscout/Provider/OPENAI/cerebras.py +296 -0
- webscout/Provider/OPENAI/chatgpt.py +870 -556
- webscout/Provider/OPENAI/chatsandbox.py +233 -173
- webscout/Provider/OPENAI/deepinfra.py +403 -322
- webscout/Provider/OPENAI/e2b.py +2370 -1414
- webscout/Provider/OPENAI/elmo.py +278 -0
- webscout/Provider/OPENAI/exaai.py +452 -417
- webscout/Provider/OPENAI/freeassist.py +446 -0
- webscout/Provider/OPENAI/gradient.py +448 -0
- webscout/Provider/OPENAI/groq.py +380 -364
- webscout/Provider/OPENAI/hadadxyz.py +292 -0
- webscout/Provider/OPENAI/heckai.py +333 -308
- webscout/Provider/OPENAI/huggingface.py +321 -0
- webscout/Provider/OPENAI/ibm.py +425 -0
- webscout/Provider/OPENAI/llmchat.py +253 -0
- webscout/Provider/OPENAI/llmchatco.py +378 -335
- webscout/Provider/OPENAI/meta.py +541 -0
- webscout/Provider/OPENAI/netwrck.py +374 -357
- webscout/Provider/OPENAI/nvidia.py +317 -0
- webscout/Provider/OPENAI/oivscode.py +348 -287
- webscout/Provider/OPENAI/openrouter.py +328 -0
- webscout/Provider/OPENAI/pydantic_imports.py +1 -172
- webscout/Provider/OPENAI/sambanova.py +397 -0
- webscout/Provider/OPENAI/sonus.py +305 -304
- webscout/Provider/OPENAI/textpollinations.py +370 -339
- webscout/Provider/OPENAI/toolbaz.py +375 -413
- webscout/Provider/OPENAI/typefully.py +419 -355
- webscout/Provider/OPENAI/typliai.py +279 -0
- webscout/Provider/OPENAI/utils.py +314 -318
- webscout/Provider/OPENAI/wisecat.py +359 -387
- webscout/Provider/OPENAI/writecream.py +185 -163
- webscout/Provider/OPENAI/x0gpt.py +462 -365
- webscout/Provider/OPENAI/zenmux.py +380 -0
- webscout/Provider/OpenRouter.py +386 -0
- webscout/Provider/Openai.py +337 -496
- webscout/Provider/PI.py +443 -429
- webscout/Provider/QwenLM.py +346 -254
- webscout/Provider/STT/__init__.py +28 -0
- webscout/Provider/STT/base.py +303 -0
- webscout/Provider/STT/elevenlabs.py +264 -0
- webscout/Provider/Sambanova.py +317 -0
- webscout/Provider/TTI/README.md +69 -82
- webscout/Provider/TTI/__init__.py +37 -7
- webscout/Provider/TTI/base.py +147 -64
- webscout/Provider/TTI/claudeonline.py +393 -0
- webscout/Provider/TTI/magicstudio.py +292 -201
- webscout/Provider/TTI/miragic.py +180 -0
- webscout/Provider/TTI/pollinations.py +331 -221
- webscout/Provider/TTI/together.py +334 -0
- webscout/Provider/TTI/utils.py +14 -11
- webscout/Provider/TTS/README.md +186 -192
- webscout/Provider/TTS/__init__.py +43 -10
- webscout/Provider/TTS/base.py +523 -159
- webscout/Provider/TTS/deepgram.py +286 -156
- webscout/Provider/TTS/elevenlabs.py +189 -111
- webscout/Provider/TTS/freetts.py +218 -0
- webscout/Provider/TTS/murfai.py +288 -113
- webscout/Provider/TTS/openai_fm.py +364 -129
- webscout/Provider/TTS/parler.py +203 -111
- webscout/Provider/TTS/qwen.py +334 -0
- webscout/Provider/TTS/sherpa.py +286 -0
- webscout/Provider/TTS/speechma.py +693 -580
- webscout/Provider/TTS/streamElements.py +275 -333
- webscout/Provider/TTS/utils.py +280 -280
- webscout/Provider/TextPollinationsAI.py +331 -308
- webscout/Provider/TogetherAI.py +450 -0
- webscout/Provider/TwoAI.py +309 -475
- webscout/Provider/TypliAI.py +311 -305
- webscout/Provider/UNFINISHED/ChatHub.py +219 -209
- webscout/Provider/{OPENAI/glider.py → UNFINISHED/ChutesAI.py} +331 -326
- webscout/Provider/{GizAI.py → UNFINISHED/GizAI.py} +300 -295
- webscout/Provider/{Marcus.py → UNFINISHED/Marcus.py} +218 -198
- webscout/Provider/UNFINISHED/Qodo.py +481 -0
- webscout/Provider/{MCPCore.py → UNFINISHED/XenAI.py} +330 -315
- webscout/Provider/UNFINISHED/Youchat.py +347 -330
- webscout/Provider/UNFINISHED/aihumanizer.py +41 -0
- webscout/Provider/UNFINISHED/grammerchecker.py +37 -0
- webscout/Provider/UNFINISHED/liner.py +342 -0
- webscout/Provider/UNFINISHED/liner_api_request.py +246 -263
- webscout/Provider/{samurai.py → UNFINISHED/samurai.py} +231 -224
- webscout/Provider/WiseCat.py +256 -233
- webscout/Provider/WrDoChat.py +390 -370
- webscout/Provider/__init__.py +115 -174
- webscout/Provider/ai4chat.py +181 -174
- webscout/Provider/akashgpt.py +330 -335
- webscout/Provider/cerebras.py +397 -290
- webscout/Provider/cleeai.py +236 -213
- webscout/Provider/elmo.py +291 -283
- webscout/Provider/geminiapi.py +343 -208
- webscout/Provider/julius.py +245 -223
- webscout/Provider/learnfastai.py +333 -325
- webscout/Provider/llama3mitril.py +230 -215
- webscout/Provider/llmchat.py +308 -258
- webscout/Provider/llmchatco.py +321 -306
- webscout/Provider/meta.py +996 -801
- webscout/Provider/oivscode.py +332 -309
- webscout/Provider/searchchat.py +316 -292
- webscout/Provider/sonus.py +264 -258
- webscout/Provider/toolbaz.py +359 -353
- webscout/Provider/turboseek.py +332 -266
- webscout/Provider/typefully.py +262 -202
- webscout/Provider/x0gpt.py +332 -299
- webscout/__init__.py +31 -39
- webscout/__main__.py +5 -5
- webscout/cli.py +585 -524
- webscout/client.py +1497 -70
- webscout/conversation.py +140 -436
- webscout/exceptions.py +383 -362
- webscout/litagent/__init__.py +29 -29
- webscout/litagent/agent.py +492 -455
- webscout/litagent/constants.py +60 -60
- webscout/models.py +505 -181
- webscout/optimizers.py +74 -420
- webscout/prompt_manager.py +376 -288
- webscout/sanitize.py +1514 -0
- webscout/scout/README.md +452 -404
- webscout/scout/__init__.py +8 -8
- webscout/scout/core/__init__.py +7 -7
- webscout/scout/core/crawler.py +330 -210
- webscout/scout/core/scout.py +800 -607
- webscout/scout/core/search_result.py +51 -96
- webscout/scout/core/text_analyzer.py +64 -63
- webscout/scout/core/text_utils.py +412 -277
- webscout/scout/core/web_analyzer.py +54 -52
- webscout/scout/element.py +872 -478
- webscout/scout/parsers/__init__.py +70 -69
- webscout/scout/parsers/html5lib_parser.py +182 -172
- webscout/scout/parsers/html_parser.py +238 -236
- webscout/scout/parsers/lxml_parser.py +203 -178
- webscout/scout/utils.py +38 -37
- webscout/search/__init__.py +47 -0
- webscout/search/base.py +201 -0
- webscout/search/bing_main.py +45 -0
- webscout/search/brave_main.py +92 -0
- webscout/search/duckduckgo_main.py +57 -0
- webscout/search/engines/__init__.py +127 -0
- webscout/search/engines/bing/__init__.py +15 -0
- webscout/search/engines/bing/base.py +35 -0
- webscout/search/engines/bing/images.py +114 -0
- webscout/search/engines/bing/news.py +96 -0
- webscout/search/engines/bing/suggestions.py +36 -0
- webscout/search/engines/bing/text.py +109 -0
- webscout/search/engines/brave/__init__.py +19 -0
- webscout/search/engines/brave/base.py +47 -0
- webscout/search/engines/brave/images.py +213 -0
- webscout/search/engines/brave/news.py +353 -0
- webscout/search/engines/brave/suggestions.py +318 -0
- webscout/search/engines/brave/text.py +167 -0
- webscout/search/engines/brave/videos.py +364 -0
- webscout/search/engines/duckduckgo/__init__.py +25 -0
- webscout/search/engines/duckduckgo/answers.py +80 -0
- webscout/search/engines/duckduckgo/base.py +189 -0
- webscout/search/engines/duckduckgo/images.py +100 -0
- webscout/search/engines/duckduckgo/maps.py +183 -0
- webscout/search/engines/duckduckgo/news.py +70 -0
- webscout/search/engines/duckduckgo/suggestions.py +22 -0
- webscout/search/engines/duckduckgo/text.py +221 -0
- webscout/search/engines/duckduckgo/translate.py +48 -0
- webscout/search/engines/duckduckgo/videos.py +80 -0
- webscout/search/engines/duckduckgo/weather.py +84 -0
- webscout/search/engines/mojeek.py +61 -0
- webscout/search/engines/wikipedia.py +77 -0
- webscout/search/engines/yahoo/__init__.py +41 -0
- webscout/search/engines/yahoo/answers.py +19 -0
- webscout/search/engines/yahoo/base.py +34 -0
- webscout/search/engines/yahoo/images.py +323 -0
- webscout/search/engines/yahoo/maps.py +19 -0
- webscout/search/engines/yahoo/news.py +258 -0
- webscout/search/engines/yahoo/suggestions.py +140 -0
- webscout/search/engines/yahoo/text.py +273 -0
- webscout/search/engines/yahoo/translate.py +19 -0
- webscout/search/engines/yahoo/videos.py +302 -0
- webscout/search/engines/yahoo/weather.py +220 -0
- webscout/search/engines/yandex.py +67 -0
- webscout/search/engines/yep/__init__.py +13 -0
- webscout/search/engines/yep/base.py +34 -0
- webscout/search/engines/yep/images.py +101 -0
- webscout/search/engines/yep/suggestions.py +38 -0
- webscout/search/engines/yep/text.py +99 -0
- webscout/search/http_client.py +172 -0
- webscout/search/results.py +141 -0
- webscout/search/yahoo_main.py +57 -0
- webscout/search/yep_main.py +48 -0
- webscout/server/__init__.py +48 -0
- webscout/server/config.py +78 -0
- webscout/server/exceptions.py +69 -0
- webscout/server/providers.py +286 -0
- webscout/server/request_models.py +131 -0
- webscout/server/request_processing.py +404 -0
- webscout/server/routes.py +642 -0
- webscout/server/server.py +351 -0
- webscout/server/ui_templates.py +1171 -0
- webscout/swiftcli/__init__.py +79 -95
- webscout/swiftcli/core/__init__.py +7 -7
- webscout/swiftcli/core/cli.py +574 -297
- webscout/swiftcli/core/context.py +98 -104
- webscout/swiftcli/core/group.py +268 -241
- webscout/swiftcli/decorators/__init__.py +28 -28
- webscout/swiftcli/decorators/command.py +243 -221
- webscout/swiftcli/decorators/options.py +247 -220
- webscout/swiftcli/decorators/output.py +392 -252
- webscout/swiftcli/exceptions.py +21 -21
- webscout/swiftcli/plugins/__init__.py +9 -9
- webscout/swiftcli/plugins/base.py +134 -135
- webscout/swiftcli/plugins/manager.py +269 -269
- webscout/swiftcli/utils/__init__.py +58 -59
- webscout/swiftcli/utils/formatting.py +251 -252
- webscout/swiftcli/utils/parsing.py +368 -267
- webscout/update_checker.py +280 -136
- webscout/utils.py +28 -14
- webscout/version.py +2 -1
- webscout/version.py.bak +3 -0
- webscout/zeroart/__init__.py +218 -135
- webscout/zeroart/base.py +70 -66
- webscout/zeroart/effects.py +155 -101
- webscout/zeroart/fonts.py +1799 -1239
- webscout-2026.1.19.dist-info/METADATA +638 -0
- webscout-2026.1.19.dist-info/RECORD +312 -0
- {webscout-8.2.9.dist-info → webscout-2026.1.19.dist-info}/WHEEL +1 -1
- {webscout-8.2.9.dist-info → webscout-2026.1.19.dist-info}/entry_points.txt +1 -1
- webscout/DWEBS.py +0 -520
- webscout/Extra/Act.md +0 -309
- webscout/Extra/GitToolkit/gitapi/README.md +0 -110
- webscout/Extra/autocoder/__init__.py +0 -9
- webscout/Extra/autocoder/autocoder.py +0 -1105
- webscout/Extra/autocoder/autocoder_utiles.py +0 -332
- webscout/Extra/gguf.md +0 -430
- webscout/Extra/weather.md +0 -281
- webscout/Litlogger/README.md +0 -10
- webscout/Litlogger/__init__.py +0 -15
- webscout/Litlogger/formats.py +0 -4
- webscout/Litlogger/handlers.py +0 -103
- webscout/Litlogger/levels.py +0 -13
- webscout/Litlogger/logger.py +0 -92
- webscout/Provider/AI21.py +0 -177
- webscout/Provider/AISEARCH/DeepFind.py +0 -254
- webscout/Provider/AISEARCH/felo_search.py +0 -202
- webscout/Provider/AISEARCH/genspark_search.py +0 -324
- webscout/Provider/AISEARCH/hika_search.py +0 -186
- webscout/Provider/AISEARCH/scira_search.py +0 -298
- webscout/Provider/Aitopia.py +0 -316
- webscout/Provider/AllenAI.py +0 -440
- webscout/Provider/Blackboxai.py +0 -791
- webscout/Provider/ChatGPTClone.py +0 -237
- webscout/Provider/ChatGPTGratis.py +0 -194
- webscout/Provider/Cloudflare.py +0 -324
- webscout/Provider/ExaChat.py +0 -358
- webscout/Provider/Flowith.py +0 -217
- webscout/Provider/FreeGemini.py +0 -250
- webscout/Provider/Glider.py +0 -225
- webscout/Provider/HF_space/__init__.py +0 -0
- webscout/Provider/HF_space/qwen_qwen2.py +0 -206
- webscout/Provider/HuggingFaceChat.py +0 -469
- webscout/Provider/Hunyuan.py +0 -283
- webscout/Provider/LambdaChat.py +0 -411
- webscout/Provider/Llama3.py +0 -259
- webscout/Provider/Nemotron.py +0 -218
- webscout/Provider/OLLAMA.py +0 -396
- webscout/Provider/OPENAI/BLACKBOXAI.py +0 -766
- webscout/Provider/OPENAI/Cloudflare.py +0 -378
- webscout/Provider/OPENAI/FreeGemini.py +0 -283
- webscout/Provider/OPENAI/NEMOTRON.py +0 -232
- webscout/Provider/OPENAI/Qwen3.py +0 -283
- webscout/Provider/OPENAI/api.py +0 -969
- webscout/Provider/OPENAI/c4ai.py +0 -373
- webscout/Provider/OPENAI/chatgptclone.py +0 -494
- webscout/Provider/OPENAI/copilot.py +0 -242
- webscout/Provider/OPENAI/flowith.py +0 -162
- webscout/Provider/OPENAI/freeaichat.py +0 -359
- webscout/Provider/OPENAI/mcpcore.py +0 -389
- webscout/Provider/OPENAI/multichat.py +0 -376
- webscout/Provider/OPENAI/opkfc.py +0 -496
- webscout/Provider/OPENAI/scirachat.py +0 -477
- webscout/Provider/OPENAI/standardinput.py +0 -433
- webscout/Provider/OPENAI/typegpt.py +0 -364
- webscout/Provider/OPENAI/uncovrAI.py +0 -463
- webscout/Provider/OPENAI/venice.py +0 -431
- webscout/Provider/OPENAI/yep.py +0 -382
- webscout/Provider/OpenGPT.py +0 -209
- webscout/Provider/Perplexitylabs.py +0 -415
- webscout/Provider/Reka.py +0 -214
- webscout/Provider/StandardInput.py +0 -290
- webscout/Provider/TTI/aiarta.py +0 -365
- webscout/Provider/TTI/artbit.py +0 -0
- webscout/Provider/TTI/fastflux.py +0 -200
- webscout/Provider/TTI/piclumen.py +0 -203
- webscout/Provider/TTI/pixelmuse.py +0 -225
- webscout/Provider/TTS/gesserit.py +0 -128
- webscout/Provider/TTS/sthir.py +0 -94
- webscout/Provider/TeachAnything.py +0 -229
- webscout/Provider/UNFINISHED/puterjs.py +0 -635
- webscout/Provider/UNFINISHED/test_lmarena.py +0 -119
- webscout/Provider/Venice.py +0 -258
- webscout/Provider/VercelAI.py +0 -253
- webscout/Provider/Writecream.py +0 -246
- webscout/Provider/WritingMate.py +0 -269
- webscout/Provider/asksteve.py +0 -220
- webscout/Provider/chatglm.py +0 -215
- webscout/Provider/copilot.py +0 -425
- webscout/Provider/freeaichat.py +0 -285
- webscout/Provider/granite.py +0 -235
- webscout/Provider/hermes.py +0 -266
- webscout/Provider/koala.py +0 -170
- webscout/Provider/lmarena.py +0 -198
- webscout/Provider/multichat.py +0 -364
- webscout/Provider/scira_chat.py +0 -299
- webscout/Provider/scnet.py +0 -243
- webscout/Provider/talkai.py +0 -194
- webscout/Provider/typegpt.py +0 -289
- webscout/Provider/uncovr.py +0 -368
- webscout/Provider/yep.py +0 -389
- webscout/litagent/Readme.md +0 -276
- webscout/litprinter/__init__.py +0 -59
- webscout/swiftcli/Readme.md +0 -323
- webscout/tempid.py +0 -128
- webscout/webscout_search.py +0 -1184
- webscout/webscout_search_async.py +0 -654
- webscout/yep_search.py +0 -347
- webscout/zeroart/README.md +0 -89
- webscout-8.2.9.dist-info/METADATA +0 -1033
- webscout-8.2.9.dist-info/RECORD +0 -289
- {webscout-8.2.9.dist-info → webscout-2026.1.19.dist-info}/licenses/LICENSE.md +0 -0
- {webscout-8.2.9.dist-info → webscout-2026.1.19.dist-info}/top_level.txt +0 -0
webscout/AIutel.py
CHANGED
|
@@ -1,703 +1,68 @@
|
|
|
1
|
-
import
|
|
2
|
-
import
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
)
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
def
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
# Use slicing for prefix removal (faster than startswith+slicing)
|
|
71
|
-
sanitized_chunk = chunk
|
|
72
|
-
if intro_value and len(chunk) >= len(intro_value) and chunk[:len(intro_value)] == intro_value:
|
|
73
|
-
sanitized_chunk = chunk[len(intro_value):]
|
|
74
|
-
|
|
75
|
-
# Optimize string stripping operations
|
|
76
|
-
if strip_chars is not None:
|
|
77
|
-
sanitized_chunk = sanitized_chunk.strip(strip_chars)
|
|
78
|
-
else:
|
|
79
|
-
# lstrip() is faster than strip() when we only need leading whitespace removed
|
|
80
|
-
sanitized_chunk = sanitized_chunk.lstrip()
|
|
81
|
-
|
|
82
|
-
# Skip empty chunks and markers
|
|
83
|
-
if not sanitized_chunk or any(marker == sanitized_chunk for marker in skip_markers):
|
|
84
|
-
return None
|
|
85
|
-
|
|
86
|
-
# JSON parsing with optimized error handling
|
|
87
|
-
if to_json:
|
|
88
|
-
try:
|
|
89
|
-
# Only strip before JSON parsing if needed
|
|
90
|
-
if sanitized_chunk[0] not in '{[' or sanitized_chunk[-1] not in '}]':
|
|
91
|
-
sanitized_chunk = sanitized_chunk.strip()
|
|
92
|
-
return json.loads(sanitized_chunk)
|
|
93
|
-
except (json.JSONDecodeError, Exception) as e:
|
|
94
|
-
if error_handler:
|
|
95
|
-
try:
|
|
96
|
-
handled = error_handler(e, sanitized_chunk)
|
|
97
|
-
if handled is not None:
|
|
98
|
-
return handled
|
|
99
|
-
except Exception:
|
|
100
|
-
pass
|
|
101
|
-
return sanitized_chunk if yield_raw_on_error else None
|
|
102
|
-
|
|
103
|
-
return sanitized_chunk
|
|
104
|
-
|
|
105
|
-
def _decode_byte_stream(
|
|
106
|
-
byte_iterator: Iterable[bytes],
|
|
107
|
-
encoding: EncodingType = 'utf-8',
|
|
108
|
-
errors: str = 'replace',
|
|
109
|
-
buffer_size: int = 8192
|
|
110
|
-
) -> Generator[str, None, None]:
|
|
111
|
-
"""
|
|
112
|
-
Decodes a byte stream in realtime with flexible encoding support.
|
|
113
|
-
|
|
114
|
-
This function takes an iterator of bytes and decodes it into a stream of strings
|
|
115
|
-
using the specified character encoding. It handles encoding errors gracefully
|
|
116
|
-
and can be tuned for performance with the `buffer_size` parameter.
|
|
117
|
-
|
|
118
|
-
Args:
|
|
119
|
-
byte_iterator (Iterable[bytes]): An iterator that yields chunks of bytes.
|
|
120
|
-
encoding (EncodingType): The character encoding to use for decoding.
|
|
121
|
-
Defaults to 'utf-8'. Supports a wide range of encodings, including:
|
|
122
|
-
'utf-8', 'utf-16', 'utf-32', 'ascii', 'latin1', 'cp1252', 'iso-8859-1',
|
|
123
|
-
'iso-8859-2', 'windows-1250', 'windows-1251', 'windows-1252', 'gbk', 'big5',
|
|
124
|
-
'shift_jis', 'euc-jp', 'euc-kr'.
|
|
125
|
-
errors (str): Specifies how encoding errors should be handled.
|
|
126
|
-
Options are 'strict' (raises an error), 'ignore' (skips the error), and
|
|
127
|
-
'replace' (replaces the erroneous byte with a replacement character).
|
|
128
|
-
Defaults to 'replace'.
|
|
129
|
-
buffer_size (int): The size of the internal buffer used for decoding.
|
|
130
|
-
|
|
131
|
-
Args:
|
|
132
|
-
byte_iterator: Iterator yielding bytes
|
|
133
|
-
encoding: Character encoding to use
|
|
134
|
-
errors: How to handle encoding errors ('strict', 'ignore', 'replace')
|
|
135
|
-
buffer_size: Size of internal buffer for performance tuning
|
|
136
|
-
"""
|
|
137
|
-
# Initialize decoder with the specified encoding
|
|
138
|
-
try:
|
|
139
|
-
decoder = codecs.getincrementaldecoder(encoding)(errors=errors)
|
|
140
|
-
except LookupError:
|
|
141
|
-
# Fallback to utf-8 if the encoding is not supported
|
|
142
|
-
decoder = codecs.getincrementaldecoder('utf-8')(errors=errors)
|
|
143
|
-
|
|
144
|
-
# Process byte stream in realtime
|
|
145
|
-
buffer = bytearray(buffer_size)
|
|
146
|
-
buffer_view = memoryview(buffer)
|
|
147
|
-
|
|
148
|
-
for chunk_bytes in byte_iterator:
|
|
149
|
-
if not chunk_bytes:
|
|
150
|
-
continue
|
|
151
|
-
|
|
152
|
-
try:
|
|
153
|
-
# Use buffer for processing if chunk size is appropriate
|
|
154
|
-
if len(chunk_bytes) <= buffer_size:
|
|
155
|
-
buffer[:len(chunk_bytes)] = chunk_bytes
|
|
156
|
-
text = decoder.decode(buffer_view[:len(chunk_bytes)], final=False)
|
|
157
|
-
else:
|
|
158
|
-
text = decoder.decode(chunk_bytes, final=False)
|
|
159
|
-
|
|
160
|
-
if text:
|
|
161
|
-
yield text
|
|
162
|
-
except UnicodeDecodeError:
|
|
163
|
-
yield f"[Encoding Error: Could not decode bytes with {encoding}]\n"
|
|
164
|
-
|
|
165
|
-
# Final flush
|
|
166
|
-
try:
|
|
167
|
-
final_text = decoder.decode(b'', final=True)
|
|
168
|
-
if final_text:
|
|
169
|
-
yield final_text
|
|
170
|
-
except UnicodeDecodeError:
|
|
171
|
-
yield f"[Encoding Error: Could not decode final bytes with {encoding}]\n"
|
|
172
|
-
|
|
173
|
-
async def _decode_byte_stream_async(
|
|
174
|
-
byte_iterator: Iterable[bytes],
|
|
175
|
-
encoding: EncodingType = 'utf-8',
|
|
176
|
-
errors: str = 'replace',
|
|
177
|
-
buffer_size: int = 8192
|
|
178
|
-
) -> AsyncGenerator[str, None]:
|
|
179
|
-
"""
|
|
180
|
-
Asynchronously decodes a byte stream with flexible encoding support.
|
|
181
|
-
|
|
182
|
-
This function is the asynchronous counterpart to `_decode_byte_stream`. It takes
|
|
183
|
-
an asynchronous iterator of bytes and decodes it into a stream of strings using
|
|
184
|
-
the specified character encoding. It handles encoding errors gracefully and can
|
|
185
|
-
be tuned for performance with the `buffer_size` parameter.
|
|
186
|
-
|
|
187
|
-
Args:
|
|
188
|
-
byte_iterator (Iterable[bytes]): An asynchronous iterator that yields chunks of bytes.
|
|
189
|
-
encoding (EncodingType): The character encoding to use for decoding.
|
|
190
|
-
Defaults to 'utf-8'. Supports a wide range of encodings, including:
|
|
191
|
-
'utf-8', 'utf-16', 'utf-32', 'ascii', 'latin1', 'cp1252', 'iso-8859-1',
|
|
192
|
-
'iso-8859-2', 'windows-1250', 'windows-1251', 'windows-1252', 'gbk', 'big5',
|
|
193
|
-
'shift_jis', 'euc-jp', 'euc-kr'.
|
|
194
|
-
errors (str): Specifies how encoding errors should be handled.
|
|
195
|
-
Options are 'strict' (raises an error), 'ignore' (skips the error), and
|
|
196
|
-
'replace' (replaces the erroneous byte with a replacement character).
|
|
197
|
-
Defaults to 'replace'.
|
|
198
|
-
buffer_size (int): The size of the internal buffer used for decoding.
|
|
199
|
-
"""
|
|
200
|
-
try:
|
|
201
|
-
decoder = codecs.getincrementaldecoder(encoding)(errors=errors)
|
|
202
|
-
except LookupError:
|
|
203
|
-
decoder = codecs.getincrementaldecoder('utf-8')(errors=errors)
|
|
204
|
-
|
|
205
|
-
buffer = bytearray(buffer_size)
|
|
206
|
-
buffer_view = memoryview(buffer)
|
|
207
|
-
|
|
208
|
-
async for chunk_bytes in byte_iterator:
|
|
209
|
-
if not chunk_bytes:
|
|
210
|
-
continue
|
|
211
|
-
try:
|
|
212
|
-
if len(chunk_bytes) <= buffer_size:
|
|
213
|
-
buffer[:len(chunk_bytes)] = chunk_bytes
|
|
214
|
-
text = decoder.decode(buffer_view[:len(chunk_bytes)], final=False)
|
|
215
|
-
else:
|
|
216
|
-
text = decoder.decode(chunk_bytes, final=False)
|
|
217
|
-
if text:
|
|
218
|
-
yield text
|
|
219
|
-
except UnicodeDecodeError:
|
|
220
|
-
yield f"[Encoding Error: Could not decode bytes with {encoding}]\n"
|
|
221
|
-
|
|
222
|
-
try:
|
|
223
|
-
final_text = decoder.decode(b'', final=True)
|
|
224
|
-
if final_text:
|
|
225
|
-
yield final_text
|
|
226
|
-
except UnicodeDecodeError:
|
|
227
|
-
yield f"[Encoding Error: Could not decode final bytes with {encoding}]\n"
|
|
228
|
-
|
|
229
|
-
def _sanitize_stream_sync(
|
|
230
|
-
data: Union[str, Iterable[str], Iterable[bytes]],
|
|
231
|
-
intro_value: str = "data:",
|
|
232
|
-
to_json: bool = True,
|
|
233
|
-
skip_markers: Optional[List[str]] = None,
|
|
234
|
-
strip_chars: Optional[str] = None,
|
|
235
|
-
start_marker: Optional[str] = None,
|
|
236
|
-
end_marker: Optional[str] = None,
|
|
237
|
-
content_extractor: Optional[Callable[[Union[str, Dict[str, Any]]], Optional[Any]]] = None,
|
|
238
|
-
yield_raw_on_error: bool = True,
|
|
239
|
-
encoding: EncodingType = 'utf-8',
|
|
240
|
-
encoding_errors: str = 'replace',
|
|
241
|
-
buffer_size: int = 8192,
|
|
242
|
-
line_delimiter: Optional[str] = None,
|
|
243
|
-
error_handler: Optional[Callable[[Exception, str], Optional[Any]]] = None,
|
|
244
|
-
) -> Generator[Any, None, None]:
|
|
245
|
-
"""
|
|
246
|
-
Processes a stream of data (strings or bytes) in real-time, applying various transformations and filtering.
|
|
247
|
-
|
|
248
|
-
This function is designed to handle streaming data, allowing for operations such as
|
|
249
|
-
prefix removal, JSON parsing, skipping lines based on markers, and extracting specific content.
|
|
250
|
-
It also supports custom error handling for JSON parsing failures.
|
|
251
|
-
|
|
252
|
-
Args:
|
|
253
|
-
data: String, iterable of strings, or iterable of bytes to process.
|
|
254
|
-
intro_value: Prefix indicating the start of meaningful data.
|
|
255
|
-
to_json: Parse JSON content if ``True``.
|
|
256
|
-
skip_markers: Lines containing any of these markers are skipped.
|
|
257
|
-
strip_chars: Characters to strip from each line.
|
|
258
|
-
start_marker: Begin processing only after this marker is found.
|
|
259
|
-
end_marker: Stop processing once this marker is found.
|
|
260
|
-
content_extractor: Optional callable to transform parsed content before yielding.
|
|
261
|
-
yield_raw_on_error: Yield raw lines when JSON parsing fails.
|
|
262
|
-
encoding: Byte stream encoding.
|
|
263
|
-
encoding_errors: How to handle encoding errors.
|
|
264
|
-
buffer_size: Buffer size for byte decoding.
|
|
265
|
-
line_delimiter: Delimiter used to split incoming text into lines. ``None``
|
|
266
|
-
uses ``str.splitlines()``.
|
|
267
|
-
error_handler: Callback invoked with ``(Exception, str)`` when JSON
|
|
268
|
-
parsing fails. If the callback returns a value, it is yielded instead of the raw line.
|
|
269
|
-
|
|
270
|
-
Yields:
|
|
271
|
-
Any: Processed data, which can be a string, a dictionary (if `to_json` is True), or the result of `content_extractor`.
|
|
272
|
-
|
|
273
|
-
Raises:
|
|
274
|
-
TypeError: If the input `data` is not a string or an iterable.
|
|
275
|
-
"""
|
|
276
|
-
effective_skip_markers = skip_markers or []
|
|
277
|
-
processing_active = start_marker is None
|
|
278
|
-
buffer = ""
|
|
279
|
-
found_start = False if start_marker else True
|
|
280
|
-
|
|
281
|
-
# Fast path for single string processing
|
|
282
|
-
if isinstance(data, str):
|
|
283
|
-
processed_item = None
|
|
284
|
-
if processing_active:
|
|
285
|
-
if to_json:
|
|
286
|
-
try:
|
|
287
|
-
data = data.strip()
|
|
288
|
-
if data:
|
|
289
|
-
processed_item = json.loads(data)
|
|
290
|
-
except Exception as e:
|
|
291
|
-
if error_handler:
|
|
292
|
-
try:
|
|
293
|
-
handled = error_handler(e, data)
|
|
294
|
-
if handled is not None:
|
|
295
|
-
processed_item = handled
|
|
296
|
-
|
|
297
|
-
except Exception:
|
|
298
|
-
pass
|
|
299
|
-
if processed_item is None:
|
|
300
|
-
processed_item = data if yield_raw_on_error else None
|
|
301
|
-
else:
|
|
302
|
-
processed_item = _process_chunk(
|
|
303
|
-
data, intro_value, False, effective_skip_markers,
|
|
304
|
-
strip_chars, yield_raw_on_error, error_handler
|
|
305
|
-
)
|
|
306
|
-
if processed_item is not None:
|
|
307
|
-
if content_extractor:
|
|
308
|
-
try:
|
|
309
|
-
final_content = content_extractor(processed_item)
|
|
310
|
-
if final_content is not None:
|
|
311
|
-
yield final_content
|
|
312
|
-
except Exception:
|
|
313
|
-
pass
|
|
314
|
-
else:
|
|
315
|
-
yield processed_item
|
|
316
|
-
return
|
|
317
|
-
|
|
318
|
-
# Stream processing path
|
|
319
|
-
if not hasattr(data, '__iter__'):
|
|
320
|
-
raise TypeError(f"Input must be a string or an iterable, not {type(data).__name__}")
|
|
321
|
-
|
|
322
|
-
try:
|
|
323
|
-
iterator = iter(data)
|
|
324
|
-
first_item = next(iterator, None)
|
|
325
|
-
if first_item is None:
|
|
326
|
-
return
|
|
327
|
-
from itertools import chain
|
|
328
|
-
stream = chain([first_item], iterator)
|
|
329
|
-
|
|
330
|
-
# Determine if we're dealing with bytes or strings
|
|
331
|
-
if isinstance(first_item, bytes):
|
|
332
|
-
line_iterator = _decode_byte_stream(
|
|
333
|
-
stream,
|
|
334
|
-
encoding=encoding,
|
|
335
|
-
errors=encoding_errors,
|
|
336
|
-
buffer_size=buffer_size
|
|
337
|
-
)
|
|
338
|
-
elif isinstance(first_item, str):
|
|
339
|
-
line_iterator = stream
|
|
340
|
-
else:
|
|
341
|
-
raise TypeError(f"Stream must yield strings or bytes, not {type(first_item).__name__}")
|
|
342
|
-
|
|
343
|
-
for line in line_iterator:
|
|
344
|
-
if not line:
|
|
345
|
-
continue
|
|
346
|
-
buffer += line
|
|
347
|
-
while True:
|
|
348
|
-
# Look for start marker if needed
|
|
349
|
-
if not found_start and start_marker:
|
|
350
|
-
idx = buffer.find(start_marker)
|
|
351
|
-
if idx != -1:
|
|
352
|
-
found_start = True
|
|
353
|
-
buffer = buffer[idx + len(start_marker):]
|
|
354
|
-
else:
|
|
355
|
-
# Not found, keep buffering
|
|
356
|
-
buffer = buffer[-max(len(start_marker), 256):] # avoid unbounded growth
|
|
357
|
-
break
|
|
358
|
-
# Look for end marker if needed
|
|
359
|
-
if found_start and end_marker:
|
|
360
|
-
idx = buffer.find(end_marker)
|
|
361
|
-
if idx != -1:
|
|
362
|
-
chunk = buffer[:idx]
|
|
363
|
-
buffer = buffer[idx + len(end_marker):]
|
|
364
|
-
processing_active = False
|
|
365
|
-
else:
|
|
366
|
-
chunk = buffer
|
|
367
|
-
buffer = ""
|
|
368
|
-
processing_active = True
|
|
369
|
-
# Process chunk if we are in active region
|
|
370
|
-
if chunk and processing_active:
|
|
371
|
-
for subline in (chunk.split(line_delimiter) if line_delimiter is not None else chunk.splitlines()):
|
|
372
|
-
result = _process_chunk(
|
|
373
|
-
subline,
|
|
374
|
-
intro_value,
|
|
375
|
-
to_json,
|
|
376
|
-
effective_skip_markers,
|
|
377
|
-
strip_chars,
|
|
378
|
-
yield_raw_on_error,
|
|
379
|
-
error_handler,
|
|
380
|
-
)
|
|
381
|
-
if result is None:
|
|
382
|
-
continue
|
|
383
|
-
if content_extractor:
|
|
384
|
-
try:
|
|
385
|
-
final_content = content_extractor(result)
|
|
386
|
-
if final_content is not None:
|
|
387
|
-
yield final_content
|
|
388
|
-
except Exception:
|
|
389
|
-
pass
|
|
390
|
-
else:
|
|
391
|
-
yield result
|
|
392
|
-
if not processing_active:
|
|
393
|
-
found_start = False
|
|
394
|
-
if idx == -1:
|
|
395
|
-
break
|
|
396
|
-
elif found_start:
|
|
397
|
-
# No end marker, process all buffered content
|
|
398
|
-
chunk = buffer
|
|
399
|
-
buffer = ""
|
|
400
|
-
if chunk:
|
|
401
|
-
for subline in (chunk.split(line_delimiter) if line_delimiter is not None else chunk.splitlines()):
|
|
402
|
-
result = _process_chunk(
|
|
403
|
-
subline,
|
|
404
|
-
intro_value,
|
|
405
|
-
to_json,
|
|
406
|
-
effective_skip_markers,
|
|
407
|
-
strip_chars,
|
|
408
|
-
yield_raw_on_error,
|
|
409
|
-
error_handler,
|
|
410
|
-
)
|
|
411
|
-
if result is None:
|
|
412
|
-
continue
|
|
413
|
-
if content_extractor:
|
|
414
|
-
try:
|
|
415
|
-
final_content = content_extractor(result)
|
|
416
|
-
if final_content is not None:
|
|
417
|
-
yield final_content
|
|
418
|
-
except Exception:
|
|
419
|
-
pass
|
|
420
|
-
else:
|
|
421
|
-
yield result
|
|
422
|
-
break
|
|
423
|
-
else:
|
|
424
|
-
break
|
|
425
|
-
except Exception as e:
|
|
426
|
-
import sys
|
|
427
|
-
print(f"Stream processing error: {str(e)}", file=sys.stderr)
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
async def _sanitize_stream_async(
|
|
431
|
-
data: Union[str, Iterable[str], Iterable[bytes]],
|
|
432
|
-
intro_value: str = "data:",
|
|
433
|
-
to_json: bool = True,
|
|
434
|
-
skip_markers: Optional[List[str]] = None,
|
|
435
|
-
strip_chars: Optional[str] = None,
|
|
436
|
-
start_marker: Optional[str] = None,
|
|
437
|
-
end_marker: Optional[str] = None,
|
|
438
|
-
content_extractor: Optional[Callable[[Union[str, Dict[str, Any]]], Optional[Any]]] = None,
|
|
439
|
-
yield_raw_on_error: bool = True,
|
|
440
|
-
encoding: EncodingType = 'utf-8',
|
|
441
|
-
encoding_errors: str = 'replace',
|
|
442
|
-
buffer_size: int = 8192,
|
|
443
|
-
line_delimiter: Optional[str] = None,
|
|
444
|
-
error_handler: Optional[Callable[[Exception, str], Optional[Any]]] = None,
|
|
445
|
-
) -> AsyncGenerator[Any, None]:
|
|
446
|
-
"""
|
|
447
|
-
Asynchronously processes a stream of data (strings or bytes), applying transformations and filtering.
|
|
448
|
-
|
|
449
|
-
This function is the asynchronous counterpart to `_sanitize_stream_sync`. It handles
|
|
450
|
-
streaming data, allowing for operations such as prefix removal, JSON parsing,
|
|
451
|
-
skipping lines based on markers, and extracting specific content. It also supports
|
|
452
|
-
custom error handling for JSON parsing failures.
|
|
453
|
-
|
|
454
|
-
Args:
|
|
455
|
-
data: String, iterable of strings, or iterable of bytes to process.
|
|
456
|
-
intro_value: Prefix indicating the start of meaningful data.
|
|
457
|
-
to_json: Parse JSON content if ``True``.
|
|
458
|
-
skip_markers: Lines containing any of these markers are skipped.
|
|
459
|
-
strip_chars: Characters to strip from each line.
|
|
460
|
-
start_marker: Begin processing only after this marker is found.
|
|
461
|
-
end_marker: Stop processing once this marker is found.
|
|
462
|
-
content_extractor: Optional callable to transform parsed content before yielding.
|
|
463
|
-
yield_raw_on_error: Yield raw lines when JSON parsing fails.
|
|
464
|
-
encoding: Byte stream encoding.
|
|
465
|
-
encoding_errors: How to handle encoding errors.
|
|
466
|
-
buffer_size: Buffer size for byte decoding.
|
|
467
|
-
line_delimiter: Delimiter used to split incoming text into lines. ``None`` uses ``str.splitlines()``.
|
|
468
|
-
error_handler: Callback invoked with ``(Exception, str)`` when JSON parsing fails. If the callback returns a value, it is yielded in place of the raw line.
|
|
469
|
-
"""
|
|
470
|
-
if isinstance(data, str):
|
|
471
|
-
for item in _sanitize_stream_sync(
|
|
472
|
-
data,
|
|
473
|
-
intro_value=intro_value,
|
|
474
|
-
to_json=to_json,
|
|
475
|
-
skip_markers=skip_markers,
|
|
476
|
-
strip_chars=strip_chars,
|
|
477
|
-
start_marker=start_marker,
|
|
478
|
-
end_marker=end_marker,
|
|
479
|
-
content_extractor=content_extractor,
|
|
480
|
-
yield_raw_on_error=yield_raw_on_error,
|
|
481
|
-
encoding=encoding,
|
|
482
|
-
encoding_errors=encoding_errors,
|
|
483
|
-
buffer_size=buffer_size,
|
|
484
|
-
line_delimiter=line_delimiter,
|
|
485
|
-
error_handler=error_handler,
|
|
486
|
-
):
|
|
487
|
-
yield item
|
|
488
|
-
return
|
|
489
|
-
|
|
490
|
-
if not hasattr(data, "__aiter__"):
|
|
491
|
-
# Fallback to synchronous processing if possible
|
|
492
|
-
for item in _sanitize_stream_sync(
|
|
493
|
-
data,
|
|
494
|
-
intro_value=intro_value,
|
|
495
|
-
to_json=to_json,
|
|
496
|
-
skip_markers=skip_markers,
|
|
497
|
-
strip_chars=strip_chars,
|
|
498
|
-
start_marker=start_marker,
|
|
499
|
-
end_marker=end_marker,
|
|
500
|
-
content_extractor=content_extractor,
|
|
501
|
-
yield_raw_on_error=yield_raw_on_error,
|
|
502
|
-
encoding=encoding,
|
|
503
|
-
encoding_errors=encoding_errors,
|
|
504
|
-
buffer_size=buffer_size,
|
|
505
|
-
line_delimiter=line_delimiter,
|
|
506
|
-
error_handler=error_handler,
|
|
507
|
-
):
|
|
508
|
-
yield item
|
|
509
|
-
return
|
|
510
|
-
|
|
511
|
-
effective_skip_markers = skip_markers or []
|
|
512
|
-
processing_active = start_marker is None
|
|
513
|
-
buffer = ""
|
|
514
|
-
found_start = False if start_marker else True
|
|
515
|
-
|
|
516
|
-
iterator = data.__aiter__()
|
|
517
|
-
first_item = None
|
|
518
|
-
async for first_item in iterator:
|
|
519
|
-
break
|
|
520
|
-
if first_item is None:
|
|
521
|
-
return
|
|
522
|
-
async def _chain(first, it):
|
|
523
|
-
yield first
|
|
524
|
-
async for x in it:
|
|
525
|
-
yield x
|
|
526
|
-
|
|
527
|
-
stream = _chain(first_item, iterator)
|
|
528
|
-
|
|
529
|
-
if isinstance(first_item, bytes):
|
|
530
|
-
line_iterator = _decode_byte_stream_async(
|
|
531
|
-
stream,
|
|
532
|
-
encoding=encoding,
|
|
533
|
-
errors=encoding_errors,
|
|
534
|
-
buffer_size=buffer_size,
|
|
535
|
-
)
|
|
536
|
-
elif isinstance(first_item, str):
|
|
537
|
-
line_iterator = stream
|
|
538
|
-
else:
|
|
539
|
-
raise TypeError(
|
|
540
|
-
f"Stream must yield strings or bytes, not {type(first_item).__name__}"
|
|
541
|
-
)
|
|
542
|
-
|
|
543
|
-
async for line in line_iterator:
|
|
544
|
-
if not line:
|
|
545
|
-
continue
|
|
546
|
-
buffer += line
|
|
547
|
-
while True:
|
|
548
|
-
if not found_start and start_marker:
|
|
549
|
-
idx = buffer.find(start_marker)
|
|
550
|
-
if idx != -1:
|
|
551
|
-
found_start = True
|
|
552
|
-
buffer = buffer[idx + len(start_marker) :]
|
|
553
|
-
else:
|
|
554
|
-
buffer = buffer[-max(len(start_marker), 256) :]
|
|
555
|
-
break
|
|
556
|
-
if found_start and end_marker:
|
|
557
|
-
idx = buffer.find(end_marker)
|
|
558
|
-
if idx != -1:
|
|
559
|
-
chunk = buffer[:idx]
|
|
560
|
-
buffer = buffer[idx + len(end_marker) :]
|
|
561
|
-
processing_active = False
|
|
562
|
-
else:
|
|
563
|
-
chunk = buffer
|
|
564
|
-
buffer = ""
|
|
565
|
-
processing_active = True
|
|
566
|
-
if chunk and processing_active:
|
|
567
|
-
for subline in (
|
|
568
|
-
chunk.split(line_delimiter)
|
|
569
|
-
if line_delimiter is not None
|
|
570
|
-
else chunk.splitlines()
|
|
571
|
-
):
|
|
572
|
-
result = _process_chunk(
|
|
573
|
-
subline,
|
|
574
|
-
intro_value,
|
|
575
|
-
to_json,
|
|
576
|
-
effective_skip_markers,
|
|
577
|
-
strip_chars,
|
|
578
|
-
yield_raw_on_error,
|
|
579
|
-
error_handler,
|
|
580
|
-
)
|
|
581
|
-
if result is None:
|
|
582
|
-
continue
|
|
583
|
-
if content_extractor:
|
|
584
|
-
try:
|
|
585
|
-
final_content = content_extractor(result)
|
|
586
|
-
if final_content is not None:
|
|
587
|
-
yield final_content
|
|
588
|
-
except Exception:
|
|
589
|
-
pass
|
|
590
|
-
else:
|
|
591
|
-
yield result
|
|
592
|
-
if not processing_active:
|
|
593
|
-
found_start = False
|
|
594
|
-
if idx == -1:
|
|
595
|
-
break
|
|
596
|
-
elif found_start:
|
|
597
|
-
chunk = buffer
|
|
598
|
-
buffer = ""
|
|
599
|
-
if chunk:
|
|
600
|
-
for subline in (
|
|
601
|
-
chunk.split(line_delimiter)
|
|
602
|
-
if line_delimiter is not None
|
|
603
|
-
else chunk.splitlines()
|
|
604
|
-
):
|
|
605
|
-
result = _process_chunk(
|
|
606
|
-
subline,
|
|
607
|
-
intro_value,
|
|
608
|
-
to_json,
|
|
609
|
-
effective_skip_markers,
|
|
610
|
-
strip_chars,
|
|
611
|
-
yield_raw_on_error,
|
|
612
|
-
error_handler,
|
|
613
|
-
)
|
|
614
|
-
if result is None:
|
|
615
|
-
continue
|
|
616
|
-
if content_extractor:
|
|
617
|
-
try:
|
|
618
|
-
final_content = content_extractor(result)
|
|
619
|
-
if final_content is not None:
|
|
620
|
-
yield final_content
|
|
621
|
-
except Exception:
|
|
622
|
-
pass
|
|
623
|
-
else:
|
|
624
|
-
yield result
|
|
625
|
-
break
|
|
626
|
-
else:
|
|
627
|
-
break
|
|
628
|
-
|
|
629
|
-
|
|
630
|
-
def sanitize_stream(
|
|
631
|
-
data: Union[
|
|
632
|
-
str,
|
|
633
|
-
Iterable[str],
|
|
634
|
-
Iterable[bytes],
|
|
635
|
-
AsyncIterable[str],
|
|
636
|
-
AsyncIterable[bytes],
|
|
637
|
-
],
|
|
638
|
-
intro_value: str = "data:",
|
|
639
|
-
to_json: bool = True,
|
|
640
|
-
skip_markers: Optional[List[str]] = None,
|
|
641
|
-
strip_chars: Optional[str] = None,
|
|
642
|
-
start_marker: Optional[str] = None,
|
|
643
|
-
end_marker: Optional[str] = None,
|
|
644
|
-
content_extractor: Optional[Callable[[Union[str, Dict[str, Any]]], Optional[Any]]] = None,
|
|
645
|
-
yield_raw_on_error: bool = True,
|
|
646
|
-
encoding: EncodingType = "utf-8",
|
|
647
|
-
encoding_errors: str = "replace",
|
|
648
|
-
buffer_size: int = 8192,
|
|
649
|
-
line_delimiter: Optional[str] = None,
|
|
650
|
-
error_handler: Optional[Callable[[Exception, str], Optional[Any]]] = None,
|
|
651
|
-
) -> Union[Generator[Any, None, None], AsyncGenerator[Any, None]]:
|
|
652
|
-
"""
|
|
653
|
-
Processes streaming data (strings or bytes) in either synchronous or asynchronous mode.
|
|
654
|
-
|
|
655
|
-
This function acts as a unified interface for handling both synchronous and
|
|
656
|
-
asynchronous data streams. It automatically detects the type of input data and
|
|
657
|
-
dispatches it to the appropriate processing function (`_sanitize_stream_sync` or
|
|
658
|
-
`_sanitize_stream_async`).
|
|
659
|
-
|
|
660
|
-
Args:
|
|
661
|
-
data (Union[str, Iterable[str], Iterable[bytes], AsyncIterable[str], AsyncIterable[bytes]]):
|
|
662
|
-
The data to be processed. Can be a string, a synchronous iterable of strings or bytes,
|
|
663
|
-
or an asynchronous iterable of strings or bytes.
|
|
664
|
-
intro_value (str): Prefix indicating the start of meaningful data. Defaults to "data:".
|
|
665
|
-
to_json (bool): Parse JSON content if ``True``. Defaults to True.
|
|
666
|
-
skip_markers (Optional[List[str]]): Lines containing any of these markers are skipped. Defaults to None.
|
|
667
|
-
strip_chars (Optional[str]): Characters to strip from each line. Defaults to None.
|
|
668
|
-
start_marker (Optional[str]): Begin processing only after this marker is found. Defaults to None.
|
|
669
|
-
end_marker (Optional[str]): Stop processing once this marker is found. Defaults to None.
|
|
670
|
-
content_extractor (Optional[Callable[[Union[str, Dict[str, Any]]], Optional[Any]]]):
|
|
671
|
-
Optional callable to transform parsed content before yielding. Defaults to None.
|
|
672
|
-
yield_raw_on_error (bool): Yield raw lines when JSON parsing fails. Defaults to True.
|
|
673
|
-
encoding (EncodingType): Byte stream encoding. Defaults to "utf-8".
|
|
674
|
-
encoding_errors (str): How to handle encoding errors. Defaults to "replace".
|
|
675
|
-
buffer_size (int): Buffer size for byte decoding. Defaults to 8192.
|
|
676
|
-
line_delimiter (Optional[str]): Delimiter used to split incoming text into lines.
|
|
677
|
-
``None`` uses ``str.splitlines()``. Defaults to None.
|
|
678
|
-
error_handler (Optional[Callable[[Exception, str], Optional[Any]]]):
|
|
679
|
-
Callback invoked with ``(Exception, str)`` when JSON parsing fails.
|
|
680
|
-
If the callback returns a value, it is yielded in place of the raw line. Defaults to None.
|
|
681
|
-
|
|
682
|
-
Returns:
|
|
683
|
-
Union[Generator[Any, None, None], AsyncGenerator[Any, None]]:
|
|
684
|
-
A generator or an asynchronous generator yielding the processed data.
|
|
685
|
-
"""
|
|
686
|
-
|
|
687
|
-
if hasattr(data, "__aiter__"):
|
|
688
|
-
return _sanitize_stream_async(
|
|
689
|
-
data, intro_value, to_json, skip_markers, strip_chars,
|
|
690
|
-
start_marker, end_marker, content_extractor, yield_raw_on_error,
|
|
691
|
-
encoding, encoding_errors, buffer_size, line_delimiter, error_handler,
|
|
692
|
-
)
|
|
693
|
-
return _sanitize_stream_sync(
|
|
694
|
-
data, intro_value, to_json, skip_markers, strip_chars,
|
|
695
|
-
start_marker, end_marker, content_extractor, yield_raw_on_error,
|
|
696
|
-
encoding, encoding_errors, buffer_size, line_delimiter, error_handler,
|
|
697
|
-
)
|
|
698
|
-
|
|
699
|
-
|
|
700
|
-
from .conversation import Conversation # noqa: E402,F401
|
|
701
|
-
from .Extra.autocoder import AutoCoder # noqa: E402,F401
|
|
702
|
-
from .optimizers import Optimizers # noqa: E402,F401
|
|
703
|
-
from .prompt_manager import AwesomePrompts # noqa: E402,F401
|
|
1
|
+
import functools
|
|
2
|
+
import time
|
|
3
|
+
|
|
4
|
+
# --- Utility Decorators ---
|
|
5
|
+
from typing import Callable, Optional
|
|
6
|
+
|
|
7
|
+
from .conversation import Conversation # noqa: E402,F401
|
|
8
|
+
from .optimizers import Optimizers # noqa: E402,F401
|
|
9
|
+
from .prompt_manager import AwesomePrompts # noqa: E402,F401
|
|
10
|
+
from .sanitize import * # noqa: E402, F401, F403
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def timeIt(func: Callable):
|
|
14
|
+
"""
|
|
15
|
+
Decorator to measure execution time of a function (sync or async).
|
|
16
|
+
Prints: - Execution time for '{func.__name__}' : {elapsed:.6f} Seconds.
|
|
17
|
+
"""
|
|
18
|
+
import asyncio
|
|
19
|
+
GREEN_BOLD = "\033[1;92m"
|
|
20
|
+
RESET = "\033[0m"
|
|
21
|
+
@functools.wraps(func)
|
|
22
|
+
def sync_wrapper(*args, **kwargs):
|
|
23
|
+
start_time = time.time()
|
|
24
|
+
result = func(*args, **kwargs)
|
|
25
|
+
end_time = time.time()
|
|
26
|
+
print()
|
|
27
|
+
func_name = getattr(func, "__name__", str(func))
|
|
28
|
+
print(f"{GREEN_BOLD}- Execution time for '{func_name}' : {end_time - start_time:.6f} Seconds. {RESET}\n")
|
|
29
|
+
return result
|
|
30
|
+
|
|
31
|
+
@functools.wraps(func)
|
|
32
|
+
async def async_wrapper(*args, **kwargs):
|
|
33
|
+
start_time = time.time()
|
|
34
|
+
result = await func(*args, **kwargs)
|
|
35
|
+
end_time = time.time()
|
|
36
|
+
print()
|
|
37
|
+
func_name = getattr(func, "__name__", str(func))
|
|
38
|
+
print(f"{GREEN_BOLD}- Execution time for '{func_name}' : {end_time - start_time:.6f} Seconds. {RESET}\n")
|
|
39
|
+
return result
|
|
40
|
+
|
|
41
|
+
if asyncio.iscoroutinefunction(func):
|
|
42
|
+
return async_wrapper
|
|
43
|
+
else:
|
|
44
|
+
return sync_wrapper
|
|
45
|
+
|
|
46
|
+
def retry(retries: int = 3, delay: float = 1) -> Callable:
|
|
47
|
+
"""
|
|
48
|
+
Decorator to retry a function on exception.
|
|
49
|
+
"""
|
|
50
|
+
def decorator(func: Callable):
|
|
51
|
+
@functools.wraps(func)
|
|
52
|
+
def wrapper(*args, **kwargs):
|
|
53
|
+
last_exc = None
|
|
54
|
+
for attempt in range(retries):
|
|
55
|
+
try:
|
|
56
|
+
return func(*args, **kwargs)
|
|
57
|
+
except Exception as exc:
|
|
58
|
+
last_exc = exc
|
|
59
|
+
print(f"Attempt {attempt + 1} failed: {exc}. Retrying in {delay} seconds...")
|
|
60
|
+
time.sleep(delay)
|
|
61
|
+
# Ensure we have an exception to raise
|
|
62
|
+
if last_exc is not None:
|
|
63
|
+
raise last_exc
|
|
64
|
+
else:
|
|
65
|
+
func_name = getattr(func, "__name__", str(func))
|
|
66
|
+
raise RuntimeError(f"Function {func_name} failed after {retries} retries with no exception recorded")
|
|
67
|
+
return wrapper
|
|
68
|
+
return decorator
|