webscout 8.2.2__py3-none-any.whl → 2026.1.19__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (483) hide show
  1. webscout/AIauto.py +524 -143
  2. webscout/AIbase.py +247 -123
  3. webscout/AIutel.py +68 -132
  4. webscout/Bard.py +1072 -535
  5. webscout/Extra/GitToolkit/__init__.py +2 -2
  6. webscout/Extra/GitToolkit/gitapi/__init__.py +20 -12
  7. webscout/Extra/GitToolkit/gitapi/gist.py +142 -0
  8. webscout/Extra/GitToolkit/gitapi/organization.py +91 -0
  9. webscout/Extra/GitToolkit/gitapi/repository.py +308 -195
  10. webscout/Extra/GitToolkit/gitapi/search.py +162 -0
  11. webscout/Extra/GitToolkit/gitapi/trending.py +236 -0
  12. webscout/Extra/GitToolkit/gitapi/user.py +128 -96
  13. webscout/Extra/GitToolkit/gitapi/utils.py +82 -62
  14. webscout/Extra/YTToolkit/README.md +443 -0
  15. webscout/Extra/YTToolkit/YTdownloader.py +953 -957
  16. webscout/Extra/YTToolkit/__init__.py +3 -3
  17. webscout/Extra/YTToolkit/transcriber.py +595 -476
  18. webscout/Extra/YTToolkit/ytapi/README.md +230 -0
  19. webscout/Extra/YTToolkit/ytapi/__init__.py +22 -6
  20. webscout/Extra/YTToolkit/ytapi/captions.py +190 -0
  21. webscout/Extra/YTToolkit/ytapi/channel.py +302 -307
  22. webscout/Extra/YTToolkit/ytapi/errors.py +13 -13
  23. webscout/Extra/YTToolkit/ytapi/extras.py +178 -45
  24. webscout/Extra/YTToolkit/ytapi/hashtag.py +120 -0
  25. webscout/Extra/YTToolkit/ytapi/https.py +89 -88
  26. webscout/Extra/YTToolkit/ytapi/patterns.py +61 -61
  27. webscout/Extra/YTToolkit/ytapi/playlist.py +59 -59
  28. webscout/Extra/YTToolkit/ytapi/pool.py +8 -8
  29. webscout/Extra/YTToolkit/ytapi/query.py +143 -40
  30. webscout/Extra/YTToolkit/ytapi/shorts.py +122 -0
  31. webscout/Extra/YTToolkit/ytapi/stream.py +68 -63
  32. webscout/Extra/YTToolkit/ytapi/suggestions.py +97 -0
  33. webscout/Extra/YTToolkit/ytapi/utils.py +66 -62
  34. webscout/Extra/YTToolkit/ytapi/video.py +189 -18
  35. webscout/Extra/__init__.py +2 -3
  36. webscout/Extra/gguf.py +1298 -682
  37. webscout/Extra/tempmail/README.md +488 -0
  38. webscout/Extra/tempmail/__init__.py +28 -28
  39. webscout/Extra/tempmail/async_utils.py +143 -141
  40. webscout/Extra/tempmail/base.py +172 -161
  41. webscout/Extra/tempmail/cli.py +191 -187
  42. webscout/Extra/tempmail/emailnator.py +88 -84
  43. webscout/Extra/tempmail/mail_tm.py +378 -361
  44. webscout/Extra/tempmail/temp_mail_io.py +304 -292
  45. webscout/Extra/weather.py +196 -194
  46. webscout/Extra/weather_ascii.py +17 -15
  47. webscout/Provider/AISEARCH/PERPLEXED_search.py +175 -0
  48. webscout/Provider/AISEARCH/Perplexity.py +237 -304
  49. webscout/Provider/AISEARCH/README.md +106 -0
  50. webscout/Provider/AISEARCH/__init__.py +16 -10
  51. webscout/Provider/AISEARCH/brave_search.py +298 -0
  52. webscout/Provider/AISEARCH/iask_search.py +130 -209
  53. webscout/Provider/AISEARCH/monica_search.py +200 -246
  54. webscout/Provider/AISEARCH/webpilotai_search.py +242 -281
  55. webscout/Provider/Algion.py +413 -0
  56. webscout/Provider/Andi.py +74 -69
  57. webscout/Provider/Apriel.py +313 -0
  58. webscout/Provider/Ayle.py +323 -0
  59. webscout/Provider/ChatSandbox.py +329 -0
  60. webscout/Provider/ClaudeOnline.py +365 -0
  61. webscout/Provider/Cohere.py +232 -208
  62. webscout/Provider/DeepAI.py +367 -0
  63. webscout/Provider/Deepinfra.py +343 -173
  64. webscout/Provider/EssentialAI.py +217 -0
  65. webscout/Provider/ExaAI.py +274 -261
  66. webscout/Provider/Gemini.py +60 -54
  67. webscout/Provider/GithubChat.py +385 -367
  68. webscout/Provider/Gradient.py +286 -0
  69. webscout/Provider/Groq.py +556 -670
  70. webscout/Provider/HadadXYZ.py +323 -0
  71. webscout/Provider/HeckAI.py +392 -233
  72. webscout/Provider/HuggingFace.py +387 -0
  73. webscout/Provider/IBM.py +340 -0
  74. webscout/Provider/Jadve.py +317 -266
  75. webscout/Provider/K2Think.py +306 -0
  76. webscout/Provider/Koboldai.py +221 -381
  77. webscout/Provider/Netwrck.py +273 -228
  78. webscout/Provider/Nvidia.py +310 -0
  79. webscout/Provider/OPENAI/DeepAI.py +489 -0
  80. webscout/Provider/OPENAI/K2Think.py +423 -0
  81. webscout/Provider/OPENAI/PI.py +463 -0
  82. webscout/Provider/OPENAI/README.md +890 -0
  83. webscout/Provider/OPENAI/TogetherAI.py +405 -0
  84. webscout/Provider/OPENAI/TwoAI.py +255 -0
  85. webscout/Provider/OPENAI/__init__.py +148 -25
  86. webscout/Provider/OPENAI/ai4chat.py +348 -0
  87. webscout/Provider/OPENAI/akashgpt.py +436 -0
  88. webscout/Provider/OPENAI/algion.py +303 -0
  89. webscout/Provider/OPENAI/ayle.py +365 -0
  90. webscout/Provider/OPENAI/base.py +253 -46
  91. webscout/Provider/OPENAI/cerebras.py +296 -0
  92. webscout/Provider/OPENAI/chatgpt.py +514 -193
  93. webscout/Provider/OPENAI/chatsandbox.py +233 -0
  94. webscout/Provider/OPENAI/deepinfra.py +403 -272
  95. webscout/Provider/OPENAI/e2b.py +2370 -1350
  96. webscout/Provider/OPENAI/elmo.py +278 -0
  97. webscout/Provider/OPENAI/exaai.py +186 -138
  98. webscout/Provider/OPENAI/freeassist.py +446 -0
  99. webscout/Provider/OPENAI/gradient.py +448 -0
  100. webscout/Provider/OPENAI/groq.py +380 -0
  101. webscout/Provider/OPENAI/hadadxyz.py +292 -0
  102. webscout/Provider/OPENAI/heckai.py +100 -104
  103. webscout/Provider/OPENAI/huggingface.py +321 -0
  104. webscout/Provider/OPENAI/ibm.py +425 -0
  105. webscout/Provider/OPENAI/llmchat.py +253 -0
  106. webscout/Provider/OPENAI/llmchatco.py +378 -327
  107. webscout/Provider/OPENAI/meta.py +541 -0
  108. webscout/Provider/OPENAI/netwrck.py +110 -84
  109. webscout/Provider/OPENAI/nvidia.py +317 -0
  110. webscout/Provider/OPENAI/oivscode.py +348 -0
  111. webscout/Provider/OPENAI/openrouter.py +328 -0
  112. webscout/Provider/OPENAI/pydantic_imports.py +1 -0
  113. webscout/Provider/OPENAI/sambanova.py +397 -0
  114. webscout/Provider/OPENAI/sonus.py +126 -115
  115. webscout/Provider/OPENAI/textpollinations.py +218 -133
  116. webscout/Provider/OPENAI/toolbaz.py +136 -166
  117. webscout/Provider/OPENAI/typefully.py +419 -0
  118. webscout/Provider/OPENAI/typliai.py +279 -0
  119. webscout/Provider/OPENAI/utils.py +314 -211
  120. webscout/Provider/OPENAI/wisecat.py +103 -125
  121. webscout/Provider/OPENAI/writecream.py +185 -156
  122. webscout/Provider/OPENAI/x0gpt.py +227 -136
  123. webscout/Provider/OPENAI/zenmux.py +380 -0
  124. webscout/Provider/OpenRouter.py +386 -0
  125. webscout/Provider/Openai.py +337 -496
  126. webscout/Provider/PI.py +443 -344
  127. webscout/Provider/QwenLM.py +346 -254
  128. webscout/Provider/STT/__init__.py +28 -0
  129. webscout/Provider/STT/base.py +303 -0
  130. webscout/Provider/STT/elevenlabs.py +264 -0
  131. webscout/Provider/Sambanova.py +317 -0
  132. webscout/Provider/TTI/README.md +69 -0
  133. webscout/Provider/TTI/__init__.py +37 -12
  134. webscout/Provider/TTI/base.py +147 -0
  135. webscout/Provider/TTI/claudeonline.py +393 -0
  136. webscout/Provider/TTI/magicstudio.py +292 -0
  137. webscout/Provider/TTI/miragic.py +180 -0
  138. webscout/Provider/TTI/pollinations.py +331 -0
  139. webscout/Provider/TTI/together.py +334 -0
  140. webscout/Provider/TTI/utils.py +14 -0
  141. webscout/Provider/TTS/README.md +186 -0
  142. webscout/Provider/TTS/__init__.py +43 -7
  143. webscout/Provider/TTS/base.py +523 -0
  144. webscout/Provider/TTS/deepgram.py +286 -156
  145. webscout/Provider/TTS/elevenlabs.py +189 -111
  146. webscout/Provider/TTS/freetts.py +218 -0
  147. webscout/Provider/TTS/murfai.py +288 -113
  148. webscout/Provider/TTS/openai_fm.py +364 -0
  149. webscout/Provider/TTS/parler.py +203 -111
  150. webscout/Provider/TTS/qwen.py +334 -0
  151. webscout/Provider/TTS/sherpa.py +286 -0
  152. webscout/Provider/TTS/speechma.py +693 -180
  153. webscout/Provider/TTS/streamElements.py +275 -333
  154. webscout/Provider/TTS/utils.py +280 -280
  155. webscout/Provider/TextPollinationsAI.py +221 -121
  156. webscout/Provider/TogetherAI.py +450 -0
  157. webscout/Provider/TwoAI.py +309 -199
  158. webscout/Provider/TypliAI.py +311 -0
  159. webscout/Provider/UNFINISHED/ChatHub.py +219 -0
  160. webscout/Provider/{OPENAI/glider.py → UNFINISHED/ChutesAI.py} +160 -145
  161. webscout/Provider/UNFINISHED/GizAI.py +300 -0
  162. webscout/Provider/UNFINISHED/Marcus.py +218 -0
  163. webscout/Provider/UNFINISHED/Qodo.py +481 -0
  164. webscout/Provider/UNFINISHED/XenAI.py +330 -0
  165. webscout/Provider/{Youchat.py → UNFINISHED/Youchat.py} +64 -47
  166. webscout/Provider/UNFINISHED/aihumanizer.py +41 -0
  167. webscout/Provider/UNFINISHED/grammerchecker.py +37 -0
  168. webscout/Provider/UNFINISHED/liner.py +342 -0
  169. webscout/Provider/UNFINISHED/liner_api_request.py +246 -0
  170. webscout/Provider/UNFINISHED/samurai.py +231 -0
  171. webscout/Provider/WiseCat.py +256 -196
  172. webscout/Provider/WrDoChat.py +390 -0
  173. webscout/Provider/__init__.py +115 -198
  174. webscout/Provider/ai4chat.py +181 -202
  175. webscout/Provider/akashgpt.py +330 -342
  176. webscout/Provider/cerebras.py +397 -242
  177. webscout/Provider/cleeai.py +236 -213
  178. webscout/Provider/elmo.py +291 -234
  179. webscout/Provider/geminiapi.py +343 -208
  180. webscout/Provider/julius.py +245 -223
  181. webscout/Provider/learnfastai.py +333 -266
  182. webscout/Provider/llama3mitril.py +230 -180
  183. webscout/Provider/llmchat.py +308 -213
  184. webscout/Provider/llmchatco.py +321 -311
  185. webscout/Provider/meta.py +996 -794
  186. webscout/Provider/oivscode.py +332 -0
  187. webscout/Provider/searchchat.py +316 -293
  188. webscout/Provider/sonus.py +264 -208
  189. webscout/Provider/toolbaz.py +359 -320
  190. webscout/Provider/turboseek.py +332 -219
  191. webscout/Provider/typefully.py +262 -280
  192. webscout/Provider/x0gpt.py +332 -256
  193. webscout/__init__.py +31 -38
  194. webscout/__main__.py +5 -5
  195. webscout/cli.py +585 -293
  196. webscout/client.py +1497 -0
  197. webscout/conversation.py +140 -565
  198. webscout/exceptions.py +383 -339
  199. webscout/litagent/__init__.py +29 -29
  200. webscout/litagent/agent.py +492 -455
  201. webscout/litagent/constants.py +60 -60
  202. webscout/models.py +505 -181
  203. webscout/optimizers.py +32 -378
  204. webscout/prompt_manager.py +376 -274
  205. webscout/sanitize.py +1514 -0
  206. webscout/scout/README.md +452 -0
  207. webscout/scout/__init__.py +8 -8
  208. webscout/scout/core/__init__.py +7 -7
  209. webscout/scout/core/crawler.py +330 -140
  210. webscout/scout/core/scout.py +800 -568
  211. webscout/scout/core/search_result.py +51 -96
  212. webscout/scout/core/text_analyzer.py +64 -63
  213. webscout/scout/core/text_utils.py +412 -277
  214. webscout/scout/core/web_analyzer.py +54 -52
  215. webscout/scout/element.py +872 -460
  216. webscout/scout/parsers/__init__.py +70 -69
  217. webscout/scout/parsers/html5lib_parser.py +182 -172
  218. webscout/scout/parsers/html_parser.py +238 -236
  219. webscout/scout/parsers/lxml_parser.py +203 -178
  220. webscout/scout/utils.py +38 -37
  221. webscout/search/__init__.py +47 -0
  222. webscout/search/base.py +201 -0
  223. webscout/search/bing_main.py +45 -0
  224. webscout/search/brave_main.py +92 -0
  225. webscout/search/duckduckgo_main.py +57 -0
  226. webscout/search/engines/__init__.py +127 -0
  227. webscout/search/engines/bing/__init__.py +15 -0
  228. webscout/search/engines/bing/base.py +35 -0
  229. webscout/search/engines/bing/images.py +114 -0
  230. webscout/search/engines/bing/news.py +96 -0
  231. webscout/search/engines/bing/suggestions.py +36 -0
  232. webscout/search/engines/bing/text.py +109 -0
  233. webscout/search/engines/brave/__init__.py +19 -0
  234. webscout/search/engines/brave/base.py +47 -0
  235. webscout/search/engines/brave/images.py +213 -0
  236. webscout/search/engines/brave/news.py +353 -0
  237. webscout/search/engines/brave/suggestions.py +318 -0
  238. webscout/search/engines/brave/text.py +167 -0
  239. webscout/search/engines/brave/videos.py +364 -0
  240. webscout/search/engines/duckduckgo/__init__.py +25 -0
  241. webscout/search/engines/duckduckgo/answers.py +80 -0
  242. webscout/search/engines/duckduckgo/base.py +189 -0
  243. webscout/search/engines/duckduckgo/images.py +100 -0
  244. webscout/search/engines/duckduckgo/maps.py +183 -0
  245. webscout/search/engines/duckduckgo/news.py +70 -0
  246. webscout/search/engines/duckduckgo/suggestions.py +22 -0
  247. webscout/search/engines/duckduckgo/text.py +221 -0
  248. webscout/search/engines/duckduckgo/translate.py +48 -0
  249. webscout/search/engines/duckduckgo/videos.py +80 -0
  250. webscout/search/engines/duckduckgo/weather.py +84 -0
  251. webscout/search/engines/mojeek.py +61 -0
  252. webscout/search/engines/wikipedia.py +77 -0
  253. webscout/search/engines/yahoo/__init__.py +41 -0
  254. webscout/search/engines/yahoo/answers.py +19 -0
  255. webscout/search/engines/yahoo/base.py +34 -0
  256. webscout/search/engines/yahoo/images.py +323 -0
  257. webscout/search/engines/yahoo/maps.py +19 -0
  258. webscout/search/engines/yahoo/news.py +258 -0
  259. webscout/search/engines/yahoo/suggestions.py +140 -0
  260. webscout/search/engines/yahoo/text.py +273 -0
  261. webscout/search/engines/yahoo/translate.py +19 -0
  262. webscout/search/engines/yahoo/videos.py +302 -0
  263. webscout/search/engines/yahoo/weather.py +220 -0
  264. webscout/search/engines/yandex.py +67 -0
  265. webscout/search/engines/yep/__init__.py +13 -0
  266. webscout/search/engines/yep/base.py +34 -0
  267. webscout/search/engines/yep/images.py +101 -0
  268. webscout/search/engines/yep/suggestions.py +38 -0
  269. webscout/search/engines/yep/text.py +99 -0
  270. webscout/search/http_client.py +172 -0
  271. webscout/search/results.py +141 -0
  272. webscout/search/yahoo_main.py +57 -0
  273. webscout/search/yep_main.py +48 -0
  274. webscout/server/__init__.py +48 -0
  275. webscout/server/config.py +78 -0
  276. webscout/server/exceptions.py +69 -0
  277. webscout/server/providers.py +286 -0
  278. webscout/server/request_models.py +131 -0
  279. webscout/server/request_processing.py +404 -0
  280. webscout/server/routes.py +642 -0
  281. webscout/server/server.py +351 -0
  282. webscout/server/ui_templates.py +1171 -0
  283. webscout/swiftcli/__init__.py +79 -809
  284. webscout/swiftcli/core/__init__.py +7 -0
  285. webscout/swiftcli/core/cli.py +574 -0
  286. webscout/swiftcli/core/context.py +98 -0
  287. webscout/swiftcli/core/group.py +268 -0
  288. webscout/swiftcli/decorators/__init__.py +28 -0
  289. webscout/swiftcli/decorators/command.py +243 -0
  290. webscout/swiftcli/decorators/options.py +247 -0
  291. webscout/swiftcli/decorators/output.py +392 -0
  292. webscout/swiftcli/exceptions.py +21 -0
  293. webscout/swiftcli/plugins/__init__.py +9 -0
  294. webscout/swiftcli/plugins/base.py +134 -0
  295. webscout/swiftcli/plugins/manager.py +269 -0
  296. webscout/swiftcli/utils/__init__.py +58 -0
  297. webscout/swiftcli/utils/formatting.py +251 -0
  298. webscout/swiftcli/utils/parsing.py +368 -0
  299. webscout/update_checker.py +280 -136
  300. webscout/utils.py +28 -14
  301. webscout/version.py +2 -1
  302. webscout/version.py.bak +3 -0
  303. webscout/zeroart/__init__.py +218 -55
  304. webscout/zeroart/base.py +70 -60
  305. webscout/zeroart/effects.py +155 -99
  306. webscout/zeroart/fonts.py +1799 -816
  307. webscout-2026.1.19.dist-info/METADATA +638 -0
  308. webscout-2026.1.19.dist-info/RECORD +312 -0
  309. {webscout-8.2.2.dist-info → webscout-2026.1.19.dist-info}/WHEEL +1 -1
  310. webscout-2026.1.19.dist-info/entry_points.txt +4 -0
  311. webscout-2026.1.19.dist-info/top_level.txt +1 -0
  312. inferno/__init__.py +0 -6
  313. inferno/__main__.py +0 -9
  314. inferno/cli.py +0 -6
  315. webscout/DWEBS.py +0 -477
  316. webscout/Extra/autocoder/__init__.py +0 -9
  317. webscout/Extra/autocoder/autocoder.py +0 -849
  318. webscout/Extra/autocoder/autocoder_utiles.py +0 -332
  319. webscout/LLM.py +0 -442
  320. webscout/Litlogger/__init__.py +0 -67
  321. webscout/Litlogger/core/__init__.py +0 -6
  322. webscout/Litlogger/core/level.py +0 -23
  323. webscout/Litlogger/core/logger.py +0 -165
  324. webscout/Litlogger/handlers/__init__.py +0 -12
  325. webscout/Litlogger/handlers/console.py +0 -33
  326. webscout/Litlogger/handlers/file.py +0 -143
  327. webscout/Litlogger/handlers/network.py +0 -173
  328. webscout/Litlogger/styles/__init__.py +0 -7
  329. webscout/Litlogger/styles/colors.py +0 -249
  330. webscout/Litlogger/styles/formats.py +0 -458
  331. webscout/Litlogger/styles/text.py +0 -87
  332. webscout/Litlogger/utils/__init__.py +0 -6
  333. webscout/Litlogger/utils/detectors.py +0 -153
  334. webscout/Litlogger/utils/formatters.py +0 -200
  335. webscout/Local/__init__.py +0 -12
  336. webscout/Local/__main__.py +0 -9
  337. webscout/Local/api.py +0 -576
  338. webscout/Local/cli.py +0 -516
  339. webscout/Local/config.py +0 -75
  340. webscout/Local/llm.py +0 -287
  341. webscout/Local/model_manager.py +0 -253
  342. webscout/Local/server.py +0 -721
  343. webscout/Local/utils.py +0 -93
  344. webscout/Provider/AI21.py +0 -177
  345. webscout/Provider/AISEARCH/DeepFind.py +0 -250
  346. webscout/Provider/AISEARCH/ISou.py +0 -256
  347. webscout/Provider/AISEARCH/felo_search.py +0 -228
  348. webscout/Provider/AISEARCH/genspark_search.py +0 -208
  349. webscout/Provider/AISEARCH/hika_search.py +0 -194
  350. webscout/Provider/AISEARCH/scira_search.py +0 -324
  351. webscout/Provider/Aitopia.py +0 -292
  352. webscout/Provider/AllenAI.py +0 -413
  353. webscout/Provider/Blackboxai.py +0 -229
  354. webscout/Provider/C4ai.py +0 -432
  355. webscout/Provider/ChatGPTClone.py +0 -226
  356. webscout/Provider/ChatGPTES.py +0 -237
  357. webscout/Provider/ChatGPTGratis.py +0 -194
  358. webscout/Provider/Chatify.py +0 -175
  359. webscout/Provider/Cloudflare.py +0 -273
  360. webscout/Provider/DeepSeek.py +0 -196
  361. webscout/Provider/ElectronHub.py +0 -709
  362. webscout/Provider/ExaChat.py +0 -342
  363. webscout/Provider/Free2GPT.py +0 -241
  364. webscout/Provider/GPTWeb.py +0 -193
  365. webscout/Provider/Glider.py +0 -211
  366. webscout/Provider/HF_space/__init__.py +0 -0
  367. webscout/Provider/HF_space/qwen_qwen2.py +0 -206
  368. webscout/Provider/HuggingFaceChat.py +0 -462
  369. webscout/Provider/Hunyuan.py +0 -272
  370. webscout/Provider/LambdaChat.py +0 -392
  371. webscout/Provider/Llama.py +0 -200
  372. webscout/Provider/Llama3.py +0 -204
  373. webscout/Provider/Marcus.py +0 -148
  374. webscout/Provider/OLLAMA.py +0 -396
  375. webscout/Provider/OPENAI/c4ai.py +0 -367
  376. webscout/Provider/OPENAI/chatgptclone.py +0 -460
  377. webscout/Provider/OPENAI/exachat.py +0 -433
  378. webscout/Provider/OPENAI/freeaichat.py +0 -352
  379. webscout/Provider/OPENAI/opkfc.py +0 -488
  380. webscout/Provider/OPENAI/scirachat.py +0 -463
  381. webscout/Provider/OPENAI/standardinput.py +0 -425
  382. webscout/Provider/OPENAI/typegpt.py +0 -346
  383. webscout/Provider/OPENAI/uncovrAI.py +0 -455
  384. webscout/Provider/OPENAI/venice.py +0 -413
  385. webscout/Provider/OPENAI/yep.py +0 -327
  386. webscout/Provider/OpenGPT.py +0 -199
  387. webscout/Provider/Perplexitylabs.py +0 -415
  388. webscout/Provider/Phind.py +0 -535
  389. webscout/Provider/PizzaGPT.py +0 -198
  390. webscout/Provider/Reka.py +0 -214
  391. webscout/Provider/StandardInput.py +0 -278
  392. webscout/Provider/TTI/AiForce/__init__.py +0 -22
  393. webscout/Provider/TTI/AiForce/async_aiforce.py +0 -224
  394. webscout/Provider/TTI/AiForce/sync_aiforce.py +0 -245
  395. webscout/Provider/TTI/FreeAIPlayground/__init__.py +0 -9
  396. webscout/Provider/TTI/FreeAIPlayground/async_freeaiplayground.py +0 -181
  397. webscout/Provider/TTI/FreeAIPlayground/sync_freeaiplayground.py +0 -180
  398. webscout/Provider/TTI/ImgSys/__init__.py +0 -23
  399. webscout/Provider/TTI/ImgSys/async_imgsys.py +0 -202
  400. webscout/Provider/TTI/ImgSys/sync_imgsys.py +0 -195
  401. webscout/Provider/TTI/MagicStudio/__init__.py +0 -2
  402. webscout/Provider/TTI/MagicStudio/async_magicstudio.py +0 -111
  403. webscout/Provider/TTI/MagicStudio/sync_magicstudio.py +0 -109
  404. webscout/Provider/TTI/Nexra/__init__.py +0 -22
  405. webscout/Provider/TTI/Nexra/async_nexra.py +0 -286
  406. webscout/Provider/TTI/Nexra/sync_nexra.py +0 -258
  407. webscout/Provider/TTI/PollinationsAI/__init__.py +0 -23
  408. webscout/Provider/TTI/PollinationsAI/async_pollinations.py +0 -311
  409. webscout/Provider/TTI/PollinationsAI/sync_pollinations.py +0 -265
  410. webscout/Provider/TTI/aiarta/__init__.py +0 -2
  411. webscout/Provider/TTI/aiarta/async_aiarta.py +0 -482
  412. webscout/Provider/TTI/aiarta/sync_aiarta.py +0 -440
  413. webscout/Provider/TTI/artbit/__init__.py +0 -22
  414. webscout/Provider/TTI/artbit/async_artbit.py +0 -155
  415. webscout/Provider/TTI/artbit/sync_artbit.py +0 -148
  416. webscout/Provider/TTI/fastflux/__init__.py +0 -22
  417. webscout/Provider/TTI/fastflux/async_fastflux.py +0 -261
  418. webscout/Provider/TTI/fastflux/sync_fastflux.py +0 -252
  419. webscout/Provider/TTI/huggingface/__init__.py +0 -22
  420. webscout/Provider/TTI/huggingface/async_huggingface.py +0 -199
  421. webscout/Provider/TTI/huggingface/sync_huggingface.py +0 -195
  422. webscout/Provider/TTI/piclumen/__init__.py +0 -23
  423. webscout/Provider/TTI/piclumen/async_piclumen.py +0 -268
  424. webscout/Provider/TTI/piclumen/sync_piclumen.py +0 -233
  425. webscout/Provider/TTI/pixelmuse/__init__.py +0 -4
  426. webscout/Provider/TTI/pixelmuse/async_pixelmuse.py +0 -249
  427. webscout/Provider/TTI/pixelmuse/sync_pixelmuse.py +0 -182
  428. webscout/Provider/TTI/talkai/__init__.py +0 -4
  429. webscout/Provider/TTI/talkai/async_talkai.py +0 -229
  430. webscout/Provider/TTI/talkai/sync_talkai.py +0 -207
  431. webscout/Provider/TTS/gesserit.py +0 -127
  432. webscout/Provider/TeachAnything.py +0 -187
  433. webscout/Provider/Venice.py +0 -219
  434. webscout/Provider/VercelAI.py +0 -234
  435. webscout/Provider/WebSim.py +0 -228
  436. webscout/Provider/Writecream.py +0 -211
  437. webscout/Provider/WritingMate.py +0 -197
  438. webscout/Provider/aimathgpt.py +0 -189
  439. webscout/Provider/askmyai.py +0 -158
  440. webscout/Provider/asksteve.py +0 -203
  441. webscout/Provider/bagoodex.py +0 -145
  442. webscout/Provider/chatglm.py +0 -205
  443. webscout/Provider/copilot.py +0 -428
  444. webscout/Provider/freeaichat.py +0 -271
  445. webscout/Provider/gaurish.py +0 -244
  446. webscout/Provider/geminiprorealtime.py +0 -160
  447. webscout/Provider/granite.py +0 -187
  448. webscout/Provider/hermes.py +0 -219
  449. webscout/Provider/koala.py +0 -268
  450. webscout/Provider/labyrinth.py +0 -340
  451. webscout/Provider/lepton.py +0 -194
  452. webscout/Provider/llamatutor.py +0 -192
  453. webscout/Provider/multichat.py +0 -325
  454. webscout/Provider/promptrefine.py +0 -193
  455. webscout/Provider/scira_chat.py +0 -277
  456. webscout/Provider/scnet.py +0 -187
  457. webscout/Provider/talkai.py +0 -194
  458. webscout/Provider/tutorai.py +0 -252
  459. webscout/Provider/typegpt.py +0 -232
  460. webscout/Provider/uncovr.py +0 -312
  461. webscout/Provider/yep.py +0 -376
  462. webscout/litprinter/__init__.py +0 -59
  463. webscout/scout/core.py +0 -881
  464. webscout/tempid.py +0 -128
  465. webscout/webscout_search.py +0 -1346
  466. webscout/webscout_search_async.py +0 -877
  467. webscout/yep_search.py +0 -297
  468. webscout-8.2.2.dist-info/METADATA +0 -734
  469. webscout-8.2.2.dist-info/RECORD +0 -309
  470. webscout-8.2.2.dist-info/entry_points.txt +0 -5
  471. webscout-8.2.2.dist-info/top_level.txt +0 -3
  472. webstoken/__init__.py +0 -30
  473. webstoken/classifier.py +0 -189
  474. webstoken/keywords.py +0 -216
  475. webstoken/language.py +0 -128
  476. webstoken/ner.py +0 -164
  477. webstoken/normalizer.py +0 -35
  478. webstoken/processor.py +0 -77
  479. webstoken/sentiment.py +0 -206
  480. webstoken/stemmer.py +0 -73
  481. webstoken/tagger.py +0 -60
  482. webstoken/tokenizer.py +0 -158
  483. {webscout-8.2.2.dist-info → webscout-2026.1.19.dist-info/licenses}/LICENSE.md +0 -0
webscout/sanitize.py ADDED
@@ -0,0 +1,1514 @@
1
+ """Stream sanitization and processing utilities for handling various data formats.
2
+
3
+ This module provides utilities for processing streaming data from various sources,
4
+ including support for byte streams, text streams, JSON parsing, regex filtering,
5
+ marker-based extraction, and customizable output response formatting.
6
+ """
7
+
8
+ import asyncio
9
+ import codecs
10
+ import functools
11
+ import json
12
+ import re
13
+ import sys
14
+ from itertools import chain
15
+ from typing import (
16
+ Any,
17
+ AsyncGenerator,
18
+ AsyncIterable,
19
+ Callable,
20
+ Dict,
21
+ Generator,
22
+ Iterable,
23
+ List,
24
+ Literal,
25
+ Optional,
26
+ Union,
27
+ overload,
28
+ )
29
+
30
+ # Expanded encoding types
31
+ EncodingType = Literal[
32
+ "utf-8",
33
+ "utf-16",
34
+ "utf-32",
35
+ "ascii",
36
+ "latin1",
37
+ "cp1252",
38
+ "iso-8859-1",
39
+ "iso-8859-2",
40
+ "windows-1250",
41
+ "windows-1251",
42
+ "windows-1252",
43
+ "gbk",
44
+ "big5",
45
+ "shift_jis",
46
+ "euc-jp",
47
+ "euc-kr",
48
+ ]
49
+
50
+ # Public API
51
+ __all__ = [
52
+ "sanitize_stream",
53
+ "LITSTREAM",
54
+ "sanitize_stream_decorator",
55
+ "lit_streamer",
56
+ "EncodingType",
57
+ ]
58
+
59
+
60
+ def _compile_regexes(
61
+ patterns: Optional[List[Union[str, re.Pattern[str]]]],
62
+ ) -> Optional[List[re.Pattern[str]]]:
63
+ """
64
+ Compile regex patterns from strings or return compiled patterns as-is.
65
+
66
+ Args:
67
+ patterns: List of regex patterns as strings or compiled Pattern objects.
68
+
69
+ Returns:
70
+ List of compiled Pattern objects, or None if input is None.
71
+
72
+ Raises:
73
+ ValueError: If any pattern is invalid.
74
+ """
75
+ if not patterns:
76
+ return None
77
+
78
+ compiled_patterns = []
79
+ for i, pattern in enumerate(patterns):
80
+ try:
81
+ if isinstance(pattern, str):
82
+ compiled_patterns.append(re.compile(pattern))
83
+ elif isinstance(pattern, re.Pattern):
84
+ compiled_patterns.append(pattern)
85
+ else:
86
+ raise ValueError(
87
+ f"Pattern at index {i} must be a string or compiled regex pattern, "
88
+ f"got {type(pattern).__name__}"
89
+ )
90
+ except re.error as e:
91
+ raise ValueError(f"Invalid regex pattern at index {i}: '{pattern}' - {e}")
92
+
93
+ return compiled_patterns
94
+
95
+
96
+ def _process_chunk(
97
+ chunk: str,
98
+ intro_value: Optional[str],
99
+ to_json: bool,
100
+ skip_markers: List[str],
101
+ strip_chars: Optional[str],
102
+ yield_raw_on_error: bool,
103
+ error_handler: Optional[Callable[[Exception, str], Optional[Any]]] = None,
104
+ skip_regexes: Optional[List[re.Pattern[str]]] = None,
105
+ extract_regexes: Optional[List[re.Pattern[str]]] = None,
106
+ ) -> Union[str, Dict[str, Any], None]:
107
+ """
108
+ Sanitizes and potentially parses a single chunk of text.
109
+
110
+ This function performs several operations on the input chunk:
111
+ - Removes a specified prefix (`intro_value`).
112
+ - Strips leading/trailing characters (`strip_chars`).
113
+ - Skips chunks matching specific markers (`skip_markers`).
114
+ - Skips chunks matching regex patterns (`skip_regexes`).
115
+ - Extracts content using regex capturing groups (`extract_regexes`).
116
+ - Optionally parses the chunk as JSON (`to_json`).
117
+ - Handles JSON parsing errors with an optional callback (`error_handler`).
118
+
119
+ Args:
120
+ chunk (str): The chunk of text to process.
121
+ intro_value (str): The prefix to remove from the chunk.
122
+ to_json (bool): If True, attempts to parse the chunk as JSON.
123
+ skip_markers (List[str]): A list of markers; chunks matching these are skipped.
124
+ strip_chars (Optional[str]): Characters to strip from the beginning and end of the chunk.
125
+ yield_raw_on_error (bool): If True, returns the raw chunk when JSON parsing fails; otherwise, returns None.
126
+ error_handler (Optional[Callable[[Exception, str], Optional[Any]]]): An optional callback function that is called when JSON parsing fails.
127
+ It receives the exception and the sanitized chunk as arguments. It should return a value to yield instead of the raw chunk, or None to ignore.
128
+ skip_regexes (Optional[List[Pattern[str]]]): A list of compiled regex patterns; chunks matching any of these are skipped.
129
+ extract_regexes (Optional[List[Pattern[str]]]): A list of compiled regex patterns for extracting content using capturing groups.
130
+
131
+ """
132
+ if not isinstance(chunk, str):
133
+ return None
134
+
135
+ # Fast path for empty chunks
136
+ if not chunk:
137
+ return None
138
+
139
+ # Use slicing for prefix removal (faster than startswith+slicing)
140
+ sanitized_chunk = chunk
141
+ if intro_value and len(chunk) >= len(intro_value) and chunk[: len(intro_value)] == intro_value:
142
+ sanitized_chunk = chunk[len(intro_value) :]
143
+
144
+ # Optimize string stripping operations
145
+ if strip_chars is not None:
146
+ sanitized_chunk = sanitized_chunk.strip(strip_chars)
147
+ else:
148
+ # lstrip() is faster than strip() when we only need leading whitespace removed
149
+ sanitized_chunk = sanitized_chunk.lstrip()
150
+
151
+ # Skip empty chunks and markers
152
+ if not sanitized_chunk or any(marker == sanitized_chunk for marker in skip_markers):
153
+ return None
154
+
155
+ # Apply regex-based extraction first (if provided)
156
+ if extract_regexes:
157
+ extracted_content = None
158
+ for regex in extract_regexes:
159
+ match = regex.search(sanitized_chunk)
160
+ if match:
161
+ # If there are capturing groups, return the first group or all groups as a tuple
162
+ if match.groups():
163
+ if len(match.groups()) == 1:
164
+ extracted_content = match.group(1)
165
+ else:
166
+ # Multiple groups - return as tuple converted to string for JSON compatibility
167
+ extracted_content = str(match.groups())
168
+ else:
169
+ # No capturing groups, return the full match
170
+ extracted_content = match.group(0)
171
+ break # Use first matching extraction regex
172
+
173
+ if extracted_content is None:
174
+ if to_json:
175
+ pass
176
+ else:
177
+ return None
178
+ else:
179
+ sanitized_chunk = extracted_content
180
+
181
+ if skip_regexes:
182
+ if any(regex.search(sanitized_chunk) for regex in skip_regexes):
183
+ return None
184
+
185
+ if to_json:
186
+ try:
187
+ # Only strip before JSON parsing if both boundaries are incorrect
188
+ if (
189
+ len(sanitized_chunk) >= 2
190
+ and sanitized_chunk[0] not in "{["
191
+ and sanitized_chunk[-1] not in "}]"
192
+ ):
193
+ sanitized_chunk = sanitized_chunk.strip()
194
+ return json.loads(sanitized_chunk)
195
+ except (json.JSONDecodeError, Exception) as e:
196
+ if error_handler:
197
+ try:
198
+ handled = error_handler(e, sanitized_chunk)
199
+ if handled is not None:
200
+ return handled
201
+ except Exception:
202
+ pass
203
+ return sanitized_chunk if yield_raw_on_error else None
204
+
205
+ return sanitized_chunk
206
+
207
+
208
+ def _decode_byte_stream(
209
+ byte_iterator: Any,
210
+ encoding: EncodingType = "utf-8",
211
+ errors: str = "replace",
212
+ buffer_size: int = 8192,
213
+ ) -> Generator[str, None, None]:
214
+ """
215
+ Decodes a byte stream in realtime with flexible encoding support.
216
+
217
+ This function takes an iterator of bytes and decodes it into a stream of strings
218
+ using the specified character encoding. It handles encoding errors gracefully
219
+ and can be tuned for performance with the `buffer_size` parameter.
220
+
221
+ Args:
222
+ byte_iterator (Iterable[bytes]): An iterator that yields chunks of bytes.
223
+ encoding (EncodingType): The character encoding to use for decoding.
224
+ Defaults to 'utf-8'. Supports a wide range of encodings, including:
225
+ 'utf-8', 'utf-16', 'utf-32', 'ascii', 'latin1', 'cp1252', 'iso-8859-1',
226
+ 'iso-8859-2', 'windows-1250', 'windows-1251', 'windows-1252', 'gbk', 'big5',
227
+ 'shift_jis', 'euc-jp', 'euc-kr'.
228
+ errors (str): Specifies how encoding errors should be handled.
229
+ Options are 'strict' (raises an error), 'ignore' (skips the error), and
230
+ 'replace' (replaces the erroneous byte with a replacement character).
231
+ Defaults to 'replace'.
232
+ buffer_size (int): The size of the internal buffer used for decoding.
233
+
234
+ """
235
+ # Initialize decoder with the specified encoding
236
+ try:
237
+ decoder = codecs.getincrementaldecoder(encoding)(errors=errors)
238
+ except LookupError:
239
+ # Fallback to utf-8 if the encoding is not supported
240
+ decoder = codecs.getincrementaldecoder("utf-8")(errors=errors)
241
+
242
+ # Process byte stream in realtime
243
+ buffer = bytearray(buffer_size)
244
+ buffer_view = memoryview(buffer)
245
+
246
+ for chunk_bytes in byte_iterator:
247
+ if not chunk_bytes:
248
+ continue
249
+
250
+ try:
251
+ # Use buffer for processing if chunk size is appropriate
252
+ if len(chunk_bytes) <= buffer_size:
253
+ buffer[: len(chunk_bytes)] = chunk_bytes
254
+ text = decoder.decode(buffer_view[: len(chunk_bytes)], final=False)
255
+ else:
256
+ text = decoder.decode(chunk_bytes, final=False)
257
+
258
+ if text:
259
+ yield text
260
+ except UnicodeDecodeError:
261
+ yield f"[Encoding Error: Could not decode bytes with {encoding}]\n"
262
+
263
+ # Final flush
264
+ try:
265
+ final_text = decoder.decode(b"", final=True)
266
+ if final_text:
267
+ yield final_text
268
+ except UnicodeDecodeError:
269
+ yield f"[Encoding Error: Could not decode final bytes with {encoding}]\n"
270
+
271
+
272
+ async def _decode_byte_stream_async(
273
+ byte_iterator: AsyncIterable[bytes],
274
+ encoding: EncodingType = "utf-8",
275
+ errors: str = "replace",
276
+ buffer_size: int = 8192,
277
+ ) -> AsyncGenerator[str, None]:
278
+ """
279
+ Asynchronously decodes a byte stream with flexible encoding support.
280
+
281
+ This function is the asynchronous counterpart to `_decode_byte_stream`. It takes
282
+ an asynchronous iterator of bytes and decodes it into a stream of strings using
283
+ the specified character encoding. It handles encoding errors gracefully and can
284
+ be tuned for performance with the `buffer_size` parameter.
285
+
286
+ Args:
287
+ byte_iterator (AsyncIterable[bytes]): An asynchronous iterator that yields chunks of bytes.
288
+ encoding (EncodingType): The character encoding to use for decoding.
289
+ Defaults to 'utf-8'. Supports a wide range of encodings, including:
290
+ 'utf-8', 'utf-16', 'utf-32', 'ascii', 'latin1', 'cp1252', 'iso-8859-1',
291
+ 'iso-8859-2', 'windows-1250', 'windows-1251', 'windows-1252', 'gbk', 'big5',
292
+ 'shift_jis', 'euc-jp', 'euc-kr'.
293
+ errors (str): Specifies how encoding errors should be handled.
294
+ Options are 'strict' (raises an error), 'ignore' (skips the error), and
295
+ 'replace' (replaces the erroneous byte with a replacement character).
296
+ Defaults to 'replace'.
297
+ buffer_size (int): The size of the internal buffer used for decoding.
298
+
299
+ Yields:
300
+ str: Decoded text chunks from the byte stream.
301
+ """
302
+ """
303
+ Asynchronously decodes a byte stream with flexible encoding support.
304
+
305
+ This function is the asynchronous counterpart to `_decode_byte_stream`. It takes
306
+ an asynchronous iterator of bytes and decodes it into a stream of strings using
307
+ the specified character encoding. It handles encoding errors gracefully and can
308
+ be tuned for performance with the `buffer_size` parameter.
309
+
310
+ Args:
311
+ byte_iterator (Iterable[bytes]): An asynchronous iterator that yields chunks of bytes.
312
+ encoding (EncodingType): The character encoding to use for decoding.
313
+ Defaults to 'utf-8'. Supports a wide range of encodings, including:
314
+ 'utf-8', 'utf-16', 'utf-32', 'ascii', 'latin1', 'cp1252', 'iso-8859-1',
315
+ 'iso-8859-2', 'windows-1250', 'windows-1251', 'windows-1252', 'gbk', 'big5',
316
+ 'shift_jis', 'euc-jp', 'euc-kr'.
317
+ errors (str): Specifies how encoding errors should be handled.
318
+ Options are 'strict' (raises an error), 'ignore' (skips the error), and
319
+ 'replace' (replaces the erroneous byte with a replacement character).
320
+ Defaults to 'replace'.
321
+ buffer_size (int): The size of the internal buffer used for decoding.
322
+ """
323
+ try:
324
+ decoder = codecs.getincrementaldecoder(encoding)(errors=errors)
325
+ except LookupError:
326
+ decoder = codecs.getincrementaldecoder("utf-8")(errors=errors)
327
+
328
+ buffer = bytearray(buffer_size)
329
+ buffer_view = memoryview(buffer)
330
+
331
+ async for chunk_bytes in byte_iterator:
332
+ if not chunk_bytes:
333
+ continue
334
+ try:
335
+ if len(chunk_bytes) <= buffer_size:
336
+ buffer[: len(chunk_bytes)] = chunk_bytes
337
+ text = decoder.decode(buffer_view[: len(chunk_bytes)], final=False)
338
+ else:
339
+ text = decoder.decode(chunk_bytes, final=False)
340
+ if text:
341
+ yield text
342
+ except UnicodeDecodeError:
343
+ yield f"[Encoding Error: Could not decode bytes with {encoding}]\n"
344
+
345
+ try:
346
+ final_text = decoder.decode(b"", final=True)
347
+ if final_text:
348
+ yield final_text
349
+ except UnicodeDecodeError:
350
+ yield f"[Encoding Error: Could not decode final bytes with {encoding}]\n"
351
+
352
+
353
+ def _sanitize_stream_sync(
354
+ data: Any,
355
+ intro_value: Optional[str] = "data:",
356
+ to_json: bool = True,
357
+ skip_markers: Optional[List[str]] = None,
358
+ strip_chars: Optional[str] = None,
359
+ start_marker: Optional[str] = None,
360
+ end_marker: Optional[str] = None,
361
+ content_extractor: Optional[Callable[[Union[str, Dict[str, Any]]], Optional[Any]]] = None,
362
+ yield_raw_on_error: bool = True,
363
+ encoding: EncodingType = "utf-8",
364
+ encoding_errors: str = "replace",
365
+ buffer_size: int = 8192,
366
+ line_delimiter: Optional[str] = None,
367
+ error_handler: Optional[Callable[[Exception, str], Optional[Any]]] = None,
368
+ skip_regexes: Optional[List[Union[str, re.Pattern[str]]]] = None,
369
+ extract_regexes: Optional[List[Union[str, re.Pattern[str]]]] = None,
370
+ raw: bool = False,
371
+ output_formatter: Optional[Callable[[Any], Any]] = None,
372
+ ) -> Generator[Any, None, None]:
373
+ """
374
+ Processes a stream of data (strings or bytes) in real-time, applying various transformations and filtering.
375
+
376
+ This function is designed to handle streaming data, allowing for operations such as
377
+ prefix removal, JSON parsing, skipping lines based on markers, regex-based filtering,
378
+ and extracting specific content. It also supports custom error handling for JSON parsing failures
379
+ and output response formatting.
380
+
381
+ Args:
382
+ data: String, iterable of strings, or iterable of bytes to process.
383
+ intro_value: Prefix indicating the start of meaningful data.
384
+ to_json: Parse the chunk as JSON if True.
385
+ skip_markers: Lines containing any of these markers are skipped.
386
+ strip_chars: Characters to strip from each line.
387
+ start_marker: Begin processing only after this marker is found.
388
+ end_marker: Stop processing once this marker is found.
389
+ content_extractor: Optional callable to transform parsed content before yielding.
390
+ yield_raw_on_error: Yield raw lines when JSON parsing fails.
391
+ encoding: Byte stream encoding.
392
+ encoding_errors: How to handle encoding errors.
393
+ buffer_size: Buffer size for byte decoding.
394
+ line_delimiter: Delimiter used to split incoming text into lines. ``None``
395
+ uses ``str.splitlines()``.
396
+ error_handler: Callback invoked with ``(Exception, str)`` when JSON
397
+ parsing fails. If the callback returns a value, it is yielded instead of the raw line.
398
+ skip_regexes: List of regex patterns (strings or compiled) for skipping lines that match.
399
+ extract_regexes: List of regex patterns (strings or compiled) for extracting content using capturing groups.
400
+ raw: If True, yields the raw response as returned by the API, chunk by chunk (no processing).
401
+ output_formatter: Custom callable to format/transform each output item before yielding.
402
+
403
+ Yields:
404
+ Any: Processed data, which can be a string, a dictionary (if `to_json` is True),
405
+ the result of `content_extractor`, or formatted by output_formatter.
406
+
407
+ Raises:
408
+ TypeError: If the input `data` is not a string or an iterable.
409
+ ValueError: If any regex pattern is invalid.
410
+ """
411
+ # --- RAW MODE: yield each chunk exactly as returned by the API ---
412
+ if raw:
413
+ if isinstance(data, str):
414
+ yield data
415
+ return
416
+ elif hasattr(data, "__iter__"):
417
+ for chunk in data:
418
+ if isinstance(chunk, (bytes, bytearray)):
419
+ yield chunk.decode(encoding, encoding_errors)
420
+ elif chunk is not None:
421
+ yield chunk
422
+ return
423
+ else:
424
+ if data is not None:
425
+ yield data
426
+ return
427
+ # --- END RAW MODE ---
428
+
429
+ # --- OUTPUT FORMATTING SETUP ---
430
+ def _apply_output_format(item: Any) -> Any:
431
+ """Apply output formatting to a processed item."""
432
+ if output_formatter is not None:
433
+ return output_formatter(item)
434
+ return item
435
+
436
+ # --- END OUTPUT FORMATTING SETUP ---
437
+
438
+ effective_skip_markers = skip_markers or []
439
+ # Compile regex patterns
440
+ compiled_skip_regexes = _compile_regexes(skip_regexes)
441
+ compiled_extract_regexes = _compile_regexes(extract_regexes)
442
+
443
+ processing_active = start_marker is None
444
+ buffer = ""
445
+ found_start = False if start_marker else True
446
+ line_iterator: Iterable[str]
447
+
448
+ if isinstance(data, str):
449
+ # If data is a string, decide whether to split it into lines
450
+ # or treat it as an iterable containing a single chunk.
451
+ temp_lines: List[str]
452
+ if line_delimiter is None: # Default: split by newlines if present
453
+ if "\n" in data or "\r" in data:
454
+ temp_lines = data.splitlines()
455
+ else:
456
+ temp_lines = [data] # Treat as a single line/chunk
457
+ elif line_delimiter in data: # Custom delimiter found in string
458
+ temp_lines = data.split(line_delimiter)
459
+ else: # Custom delimiter not found, or string is effectively a single segment
460
+ temp_lines = [data]
461
+ line_iterator = iter(temp_lines)
462
+ elif hasattr(data, "__iter__"): # data is an iterable (but not a string)
463
+ _iter = iter(data)
464
+ first_item = next(_iter, None)
465
+
466
+ if first_item is None: # Iterable was empty
467
+ return
468
+
469
+ # Reconstruct the full iterable including the first_item
470
+ stream_input_iterable = chain([first_item], _iter)
471
+
472
+ if isinstance(first_item, bytes):
473
+ # Ensure stream_input_iterable is typed as Iterable[bytes] for _decode_byte_stream
474
+ line_iterator = _decode_byte_stream(
475
+ stream_input_iterable,
476
+ encoding=encoding,
477
+ errors=encoding_errors,
478
+ buffer_size=buffer_size,
479
+ )
480
+ elif isinstance(first_item, str):
481
+ # Ensure stream_input_iterable is typed as Iterable[str]
482
+ line_iterator = stream_input_iterable
483
+ else:
484
+ raise TypeError(
485
+ f"Iterable must yield strings or bytes, not {type(first_item).__name__}"
486
+ )
487
+ else: # Not a string and not an iterable
488
+ raise TypeError(f"Input must be a string or an iterable, not {type(data).__name__}")
489
+
490
+ try:
491
+ for line in line_iterator:
492
+ if not line:
493
+ continue
494
+ buffer += line
495
+ while True:
496
+ # Look for start marker if needed
497
+ if not found_start and start_marker:
498
+ idx = buffer.find(start_marker)
499
+ if idx != -1:
500
+ found_start = True
501
+ buffer = buffer[idx + len(start_marker) :]
502
+ else:
503
+ # Not found, keep buffering
504
+ buffer = buffer[-max(len(start_marker), 256) :] # avoid unbounded growth
505
+ break
506
+ # Look for end marker if needed
507
+ if found_start and end_marker:
508
+ idx = buffer.find(end_marker)
509
+ if idx != -1:
510
+ chunk = buffer[:idx]
511
+ buffer = buffer[idx + len(end_marker) :]
512
+ processing_active = False
513
+ else:
514
+ chunk = buffer
515
+ buffer = ""
516
+ processing_active = True
517
+ # Process chunk if we are in active region
518
+ if chunk and processing_active:
519
+ for subline in (
520
+ chunk.split(line_delimiter)
521
+ if line_delimiter is not None
522
+ else chunk.splitlines()
523
+ ):
524
+ use_extract_in_process = (
525
+ compiled_extract_regexes if not content_extractor else None
526
+ )
527
+
528
+ result = _process_chunk(
529
+ subline,
530
+ intro_value,
531
+ to_json,
532
+ effective_skip_markers,
533
+ strip_chars,
534
+ yield_raw_on_error,
535
+ error_handler,
536
+ compiled_skip_regexes,
537
+ use_extract_in_process,
538
+ )
539
+ if result is None:
540
+ continue
541
+ if content_extractor:
542
+ try:
543
+ final_content = content_extractor(result)
544
+ if final_content is not None:
545
+ if compiled_extract_regexes and isinstance(
546
+ final_content, str
547
+ ):
548
+ extracted = None
549
+ for regex in compiled_extract_regexes:
550
+ match = regex.search(final_content)
551
+ if match:
552
+ if match.groups():
553
+ extracted = (
554
+ match.group(1)
555
+ if len(match.groups()) == 1
556
+ else str(match.groups())
557
+ )
558
+ else:
559
+ extracted = match.group(0)
560
+ break
561
+ if extracted is not None:
562
+ yield _apply_output_format(extracted)
563
+ else:
564
+ yield _apply_output_format(final_content)
565
+ except Exception:
566
+ pass
567
+ else:
568
+ yield _apply_output_format(result)
569
+ if not processing_active:
570
+ found_start = False
571
+ if idx == -1:
572
+ break
573
+ elif found_start:
574
+ # No end marker, process all buffered content
575
+ chunk = buffer
576
+ buffer = ""
577
+ if chunk:
578
+ for subline in (
579
+ chunk.split(line_delimiter)
580
+ if line_delimiter is not None
581
+ else chunk.splitlines()
582
+ ):
583
+ use_extract_in_process = (
584
+ compiled_extract_regexes if not content_extractor else None
585
+ )
586
+
587
+ result = _process_chunk(
588
+ subline,
589
+ intro_value,
590
+ to_json,
591
+ effective_skip_markers,
592
+ strip_chars,
593
+ yield_raw_on_error,
594
+ error_handler,
595
+ compiled_skip_regexes,
596
+ use_extract_in_process,
597
+ )
598
+ if result is None:
599
+ continue
600
+ if content_extractor:
601
+ try:
602
+ final_content = content_extractor(result)
603
+ if final_content is not None:
604
+ if compiled_extract_regexes and isinstance(
605
+ final_content, str
606
+ ):
607
+ extracted = None
608
+ for regex in compiled_extract_regexes:
609
+ match = regex.search(final_content)
610
+ if match:
611
+ if match.groups():
612
+ extracted = (
613
+ match.group(1)
614
+ if len(match.groups()) == 1
615
+ else str(match.groups())
616
+ )
617
+ else:
618
+ extracted = match.group(0)
619
+ break
620
+ if extracted is not None:
621
+ yield _apply_output_format(extracted)
622
+ else:
623
+ yield _apply_output_format(final_content)
624
+ except Exception:
625
+ pass
626
+ else:
627
+ yield _apply_output_format(result)
628
+ break
629
+ else:
630
+ break
631
+ except Exception as e:
632
+ print(f"Stream processing error: {e}", file=sys.stderr)
633
+
634
+
635
+ async def _sanitize_stream_async(
636
+ data: Any,
637
+ intro_value: Optional[str] = "data:",
638
+ to_json: bool = True,
639
+ skip_markers: Optional[List[str]] = None,
640
+ strip_chars: Optional[str] = None,
641
+ start_marker: Optional[str] = None,
642
+ end_marker: Optional[str] = None,
643
+ content_extractor: Optional[Callable[[Union[str, Dict[str, Any]]], Optional[Any]]] = None,
644
+ yield_raw_on_error: bool = True,
645
+ encoding: EncodingType = "utf-8",
646
+ encoding_errors: str = "replace",
647
+ buffer_size: int = 8192,
648
+ line_delimiter: Optional[str] = None,
649
+ error_handler: Optional[Callable[[Exception, str], Optional[Any]]] = None,
650
+ skip_regexes: Optional[List[Union[str, re.Pattern[str]]]] = None,
651
+ extract_regexes: Optional[List[Union[str, re.Pattern[str]]]] = None,
652
+ raw: bool = False,
653
+ output_formatter: Optional[Callable[[Any], Any]] = None,
654
+ ) -> AsyncGenerator[Any, None]:
655
+ """
656
+ Asynchronously processes a stream of data (strings or bytes), applying transformations and filtering.
657
+
658
+ This function is the asynchronous counterpart to `_sanitize_stream_sync`. It handles
659
+ streaming data, allowing for operations such as prefix removal, JSON parsing,
660
+ skipping lines based on markers, regex-based filtering, and extracting specific content.
661
+ It also supports custom error handling for JSON parsing failures and output response formatting.
662
+
663
+ Args:
664
+ data: String, iterable of strings, or iterable of bytes to process.
665
+ intro_value: Prefix indicating the start of meaningful data.
666
+ to_json: Parse JSON content if ``True``.
667
+ skip_markers: Lines containing any of these markers are skipped.
668
+ strip_chars: Characters to strip from each line.
669
+ start_marker: Begin processing only after this marker is found.
670
+ end_marker: Stop processing once this marker is found.
671
+ content_extractor: Optional callable to transform parsed content before yielding.
672
+ yield_raw_on_error: Yield raw lines when JSON parsing fails.
673
+ encoding: Byte stream encoding.
674
+ encoding_errors: How to handle encoding errors.
675
+ buffer_size: Buffer size for byte decoding.
676
+ line_delimiter: Delimiter used to split incoming text into lines. ``None`` uses ``str.splitlines()``.
677
+ error_handler: Callback invoked with ``(Exception, str)`` when JSON parsing fails. If the callback returns a value, it is yielded in place of the raw line.
678
+ skip_regexes: List of regex patterns (strings or compiled) for skipping lines that match.
679
+ extract_regexes: List of regex patterns (strings or compiled) for extracting content using capturing groups.
680
+ raw: If True, yields the raw response as returned by the API, chunk by chunk (no processing).
681
+ output_formatter: Custom callable to format/transform each output item before yielding.
682
+ """
683
+ # --- RAW MODE: yield each chunk exactly as returned by the API ---
684
+ if raw:
685
+ if isinstance(data, str):
686
+ yield data
687
+ return
688
+ elif hasattr(data, "__aiter__"):
689
+ async for chunk in data:
690
+ if isinstance(chunk, (bytes, bytearray)):
691
+ yield chunk.decode(encoding, encoding_errors)
692
+ elif chunk is not None:
693
+ yield chunk
694
+ return
695
+ elif hasattr(data, "__iter__"):
696
+ for chunk in data:
697
+ if isinstance(chunk, (bytes, bytearray)):
698
+ yield chunk.decode(encoding, encoding_errors)
699
+ elif chunk is not None:
700
+ yield chunk
701
+ return
702
+ else:
703
+ if data is not None:
704
+ yield data
705
+ return
706
+ # --- END RAW MODE ---
707
+
708
+ if isinstance(data, str):
709
+ for item in _sanitize_stream_sync(
710
+ data,
711
+ intro_value=intro_value,
712
+ to_json=to_json,
713
+ skip_markers=skip_markers,
714
+ strip_chars=strip_chars,
715
+ start_marker=start_marker,
716
+ end_marker=end_marker,
717
+ content_extractor=content_extractor,
718
+ yield_raw_on_error=yield_raw_on_error,
719
+ encoding=encoding,
720
+ encoding_errors=encoding_errors,
721
+ buffer_size=buffer_size,
722
+ line_delimiter=line_delimiter,
723
+ error_handler=error_handler,
724
+ skip_regexes=skip_regexes,
725
+ extract_regexes=extract_regexes,
726
+ raw=raw,
727
+ output_formatter=output_formatter,
728
+ ):
729
+ yield item
730
+ return
731
+
732
+ if not hasattr(data, "__aiter__"):
733
+ # Fallback to synchronous processing if possible
734
+ for item in _sanitize_stream_sync(
735
+ data,
736
+ intro_value=intro_value,
737
+ to_json=to_json,
738
+ skip_markers=skip_markers,
739
+ strip_chars=strip_chars,
740
+ start_marker=start_marker,
741
+ end_marker=end_marker,
742
+ content_extractor=content_extractor,
743
+ yield_raw_on_error=yield_raw_on_error,
744
+ encoding=encoding,
745
+ encoding_errors=encoding_errors,
746
+ buffer_size=buffer_size,
747
+ line_delimiter=line_delimiter,
748
+ error_handler=error_handler,
749
+ skip_regexes=skip_regexes,
750
+ extract_regexes=extract_regexes,
751
+ raw=raw,
752
+ output_formatter=output_formatter,
753
+ ):
754
+ yield item
755
+ return
756
+
757
+ # --- OUTPUT FORMATTING SETUP FOR ASYNC ---
758
+ def _apply_output_format(item: Any) -> Any:
759
+ """Apply output formatting to a processed item."""
760
+ if output_formatter is not None:
761
+ return output_formatter(item)
762
+ return item
763
+
764
+ # --- END OUTPUT FORMATTING SETUP ---
765
+
766
+ effective_skip_markers = skip_markers or []
767
+ # Compile regex patterns
768
+ compiled_skip_regexes = _compile_regexes(skip_regexes)
769
+ compiled_extract_regexes = _compile_regexes(extract_regexes)
770
+
771
+ processing_active = start_marker is None
772
+ buffer = ""
773
+ found_start = False if start_marker else True
774
+
775
+ iterator = data.__aiter__()
776
+ first_item = None
777
+ async for first_item in iterator:
778
+ break
779
+ if first_item is None:
780
+ return
781
+
782
+ async def _chain(first: Any, it: AsyncIterable[Any]) -> AsyncGenerator[Any, None]:
783
+ """Chain the first item with the rest of the async iterator."""
784
+ yield first
785
+ async for x in it:
786
+ yield x
787
+
788
+ stream: AsyncGenerator[Any, None] = _chain(first_item, iterator)
789
+
790
+ if isinstance(first_item, bytes):
791
+ line_iterator = _decode_byte_stream_async(
792
+ stream,
793
+ encoding=encoding,
794
+ errors=encoding_errors,
795
+ buffer_size=buffer_size,
796
+ )
797
+ elif isinstance(first_item, str):
798
+ line_iterator = stream
799
+ else:
800
+ raise TypeError(f"Stream must yield strings or bytes, not {type(first_item).__name__}")
801
+
802
+ try:
803
+ async for line in line_iterator:
804
+ if not line:
805
+ continue
806
+ buffer += line
807
+ while True:
808
+ # Look for start marker if needed
809
+ if not found_start and start_marker:
810
+ idx = buffer.find(start_marker)
811
+ if idx != -1:
812
+ found_start = True
813
+ buffer = buffer[idx + len(start_marker) :]
814
+ else:
815
+ # Not found, keep buffering
816
+ buffer = buffer[-max(len(start_marker), 256) :]
817
+ break
818
+ # Look for end marker if needed
819
+ if found_start and end_marker:
820
+ idx = buffer.find(end_marker)
821
+ if idx != -1:
822
+ chunk = buffer[:idx]
823
+ buffer = buffer[idx + len(end_marker) :]
824
+ processing_active = False
825
+ else:
826
+ chunk = buffer
827
+ buffer = ""
828
+ processing_active = True
829
+ # Process chunk if we are in active region
830
+ if chunk and processing_active:
831
+ for subline in (
832
+ chunk.split(line_delimiter)
833
+ if line_delimiter is not None
834
+ else chunk.splitlines()
835
+ ):
836
+ use_extract_in_process = (
837
+ compiled_extract_regexes if not content_extractor else None
838
+ )
839
+
840
+ result = _process_chunk(
841
+ subline,
842
+ intro_value,
843
+ to_json,
844
+ effective_skip_markers,
845
+ strip_chars,
846
+ yield_raw_on_error,
847
+ error_handler,
848
+ compiled_skip_regexes,
849
+ use_extract_in_process,
850
+ )
851
+ if result is None:
852
+ continue
853
+ if content_extractor:
854
+ try:
855
+ final_content = content_extractor(result)
856
+ if final_content is not None:
857
+ if compiled_extract_regexes and isinstance(
858
+ final_content, str
859
+ ):
860
+ extracted = None
861
+ for regex in compiled_extract_regexes:
862
+ match = regex.search(final_content)
863
+ if match:
864
+ if match.groups():
865
+ extracted = (
866
+ match.group(1)
867
+ if len(match.groups()) == 1
868
+ else str(match.groups())
869
+ )
870
+ else:
871
+ extracted = match.group(0)
872
+ break
873
+ if extracted is not None:
874
+ yield _apply_output_format(extracted)
875
+ else:
876
+ yield _apply_output_format(final_content)
877
+ except Exception:
878
+ pass
879
+ else:
880
+ yield _apply_output_format(result)
881
+ if not processing_active:
882
+ found_start = False
883
+ if idx == -1:
884
+ break
885
+ elif found_start:
886
+ # No end marker, process all buffered content
887
+ chunk = buffer
888
+ buffer = ""
889
+ if chunk:
890
+ for subline in (
891
+ chunk.split(line_delimiter)
892
+ if line_delimiter is not None
893
+ else chunk.splitlines()
894
+ ):
895
+ use_extract_in_process = (
896
+ compiled_extract_regexes if not content_extractor else None
897
+ )
898
+
899
+ result = _process_chunk(
900
+ subline,
901
+ intro_value,
902
+ to_json,
903
+ effective_skip_markers,
904
+ strip_chars,
905
+ yield_raw_on_error,
906
+ error_handler,
907
+ compiled_skip_regexes,
908
+ use_extract_in_process,
909
+ )
910
+ if result is None:
911
+ continue
912
+ if content_extractor:
913
+ try:
914
+ final_content = content_extractor(result)
915
+ if final_content is not None:
916
+ # Apply extract_regexes to extracted content if provided
917
+ if compiled_extract_regexes and isinstance(
918
+ final_content, str
919
+ ):
920
+ extracted = None
921
+ for regex in compiled_extract_regexes:
922
+ match = regex.search(final_content)
923
+ if match:
924
+ if match.groups():
925
+ extracted = (
926
+ match.group(1)
927
+ if len(match.groups()) == 1
928
+ else str(match.groups())
929
+ )
930
+ else:
931
+ extracted = match.group(0)
932
+ break
933
+ if extracted is not None:
934
+ yield _apply_output_format(extracted)
935
+ else:
936
+ yield _apply_output_format(final_content)
937
+ except Exception:
938
+ pass
939
+ else:
940
+ yield _apply_output_format(result)
941
+ break
942
+ else:
943
+ break
944
+ except Exception as e:
945
+ print(f"Async stream processing error: {e}", file=sys.stderr)
946
+
947
+
948
+ @overload
949
+ def sanitize_stream(
950
+ data: Union[
951
+ str,
952
+ bytes,
953
+ Iterable[str],
954
+ Iterable[bytes],
955
+ dict,
956
+ list,
957
+ int,
958
+ float,
959
+ bool,
960
+ None,
961
+ ],
962
+ intro_value: Optional[str] = "data:",
963
+ to_json: bool = True,
964
+ skip_markers: Optional[List[str]] = None,
965
+ strip_chars: Optional[str] = None,
966
+ start_marker: Optional[str] = None,
967
+ end_marker: Optional[str] = None,
968
+ content_extractor: Optional[Callable[[Union[str, Dict[str, Any]]], Optional[Any]]] = None,
969
+ yield_raw_on_error: bool = True,
970
+ encoding: EncodingType = "utf-8",
971
+ encoding_errors: str = "replace",
972
+ buffer_size: int = 8192,
973
+ line_delimiter: Optional[str] = None,
974
+ error_handler: Optional[Callable[[Exception, str], Optional[Any]]] = None,
975
+ skip_regexes: Optional[List[Union[str, re.Pattern[str]]]] = None,
976
+ extract_regexes: Optional[List[Union[str, re.Pattern[str]]]] = None,
977
+ object_mode: Literal["as_is", "json", "str"] = "json",
978
+ raw: bool = False,
979
+ output_formatter: Optional[Callable[[Any], Any]] = None,
980
+ ) -> Generator[Any, None, None]: ...
981
+
982
+
983
+ @overload
984
+ def sanitize_stream(
985
+ data: Union[
986
+ AsyncIterable[str],
987
+ AsyncIterable[bytes],
988
+ ],
989
+ intro_value: Optional[str] = "data:",
990
+ to_json: bool = True,
991
+ skip_markers: Optional[List[str]] = None,
992
+ strip_chars: Optional[str] = None,
993
+ start_marker: Optional[str] = None,
994
+ end_marker: Optional[str] = None,
995
+ content_extractor: Optional[Callable[[Union[str, Dict[str, Any]]], Optional[Any]]] = None,
996
+ yield_raw_on_error: bool = True,
997
+ encoding: EncodingType = "utf-8",
998
+ encoding_errors: str = "replace",
999
+ buffer_size: int = 8192,
1000
+ line_delimiter: Optional[str] = None,
1001
+ error_handler: Optional[Callable[[Exception, str], Optional[Any]]] = None,
1002
+ skip_regexes: Optional[List[Union[str, re.Pattern[str]]]] = None,
1003
+ extract_regexes: Optional[List[Union[str, re.Pattern[str]]]] = None,
1004
+ object_mode: Literal["as_is", "json", "str"] = "json",
1005
+ raw: bool = False,
1006
+ output_formatter: Optional[Callable[[Any], Any]] = None,
1007
+ ) -> AsyncGenerator[Any, None]: ...
1008
+
1009
+
1010
+ def sanitize_stream(
1011
+ data: Any,
1012
+ intro_value: Optional[str] = "data:",
1013
+ to_json: bool = True,
1014
+ skip_markers: Optional[List[str]] = None,
1015
+ strip_chars: Optional[str] = None,
1016
+ start_marker: Optional[str] = None,
1017
+ end_marker: Optional[str] = None,
1018
+ content_extractor: Optional[Callable[[Union[str, Dict[str, Any]]], Optional[Any]]] = None,
1019
+ yield_raw_on_error: bool = True,
1020
+ encoding: EncodingType = "utf-8",
1021
+ encoding_errors: str = "replace",
1022
+ buffer_size: int = 8192,
1023
+ line_delimiter: Optional[str] = None,
1024
+ error_handler: Optional[Callable[[Exception, str], Optional[Any]]] = None,
1025
+ skip_regexes: Optional[List[Union[str, re.Pattern[str]]]] = None,
1026
+ extract_regexes: Optional[List[Union[str, re.Pattern[str]]]] = None,
1027
+ object_mode: Literal["as_is", "json", "str"] = "json",
1028
+ raw: bool = False,
1029
+ output_formatter: Optional[Callable[[Any], Any]] = None,
1030
+ ) -> Union[Generator[Any, None, None], AsyncGenerator[Any, None]]:
1031
+ """
1032
+ Processes streaming data (strings or bytes) in either synchronous or asynchronous mode.
1033
+ Now supports non-iterable and miscellaneous input types (dict, list, int, float, bool, None).
1034
+ Includes regex-based content filtering, extraction capabilities, and customizable output response formatting.
1035
+
1036
+ Args:
1037
+ data: The data to be processed. Can be a string, bytes, a synchronous iterable of strings or bytes,
1038
+ an asynchronous iterable of strings or bytes, or a single object (dict, list, int, float, bool, None).
1039
+ intro_value (str): Prefix indicating the start of meaningful data. Defaults to "data:".
1040
+ to_json (bool): Parse JSON content if ``True``. Defaults to True.
1041
+ skip_markers (Optional[List[str]]): Lines containing any of these markers are skipped. Defaults to None.
1042
+ strip_chars (Optional[str]): Characters to strip from each line. Defaults to None.
1043
+ start_marker (Optional[str]): Begin processing only after this marker is found. Defaults to None.
1044
+ end_marker (Optional[str]): Stop processing once this marker is found. Defaults to None.
1045
+ content_extractor (Optional[Callable[[Union[str, Dict[str, Any]]], Optional[Any]]]):
1046
+ Optional callable to transform parsed content before yielding. Defaults to None.
1047
+ yield_raw_on_error (bool): Yield raw lines when JSON parsing fails. Defaults to True.
1048
+ encoding (EncodingType): Byte stream encoding. Defaults to "utf-8".
1049
+ encoding_errors (str): How to handle encoding errors. Defaults to "replace".
1050
+ buffer_size (int): Buffer size for byte decoding. Defaults to 8192.
1051
+ line_delimiter (Optional[str]): Delimiter used to split incoming text into lines.
1052
+ ``None`` uses ``str.splitlines()``. Defaults to None.
1053
+ error_handler (Optional[Callable[[Exception, str], Optional[Any]]]):
1054
+ Callback invoked with ``(Exception, str)`` when JSON parsing fails.
1055
+ If the callback returns a value, it is yielded in place of the raw line. Defaults to None.
1056
+ skip_regexes (Optional[List[Union[str, Pattern[str]]]]): List of regex patterns (strings or compiled)
1057
+ for skipping lines that match any pattern. Defaults to None.
1058
+ extract_regexes (Optional[List[Union[str, Pattern[str]]]]): List of regex patterns (strings or compiled)
1059
+ for extracting content using capturing groups. If multiple groups are captured, they are returned as a tuple string. Defaults to None.
1060
+ object_mode (Literal["as_is", "json", "str"]): How to handle non-string, non-iterable objects.
1061
+ "json" (default) yields as JSON string, "str" yields as str(obj), "as_is" yields the object as-is.
1062
+ raw (bool): If True, yields the raw response as returned by the API, chunk by chunk (no splitting or joining).
1063
+ output_formatter (Optional[Callable[[Any], Any]]): Custom callable to format/transform each output item
1064
+ before yielding. Use this to structure output into any desired format (e.g., OpenAI-like responses,
1065
+ custom dictionaries, etc.). The formatter receives the processed content and returns the formatted output.
1066
+
1067
+ Returns:
1068
+ Union[Generator[Any, None, None], AsyncGenerator[Any, None]]:
1069
+ A generator or an asynchronous generator yielding the processed data, or raw data if raw=True,
1070
+ optionally transformed by output_formatter.
1071
+
1072
+ Raises:
1073
+ ValueError: If any regex pattern is invalid.
1074
+
1075
+ Examples:
1076
+ # Use custom formatter for simple dict structure
1077
+ >>> def my_formatter(content):
1078
+ ... return {'text': content, 'timestamp': time.time()}
1079
+ >>> for chunk in sanitize_stream(data, output_formatter=my_formatter):
1080
+ ... print(chunk)
1081
+
1082
+ # Format as message with role
1083
+ >>> def message_formatter(content):
1084
+ ... return {'role': 'assistant', 'content': content}
1085
+ >>> for chunk in sanitize_stream(data, output_formatter=message_formatter):
1086
+ ... print(chunk)
1087
+ """
1088
+ if raw:
1089
+
1090
+ def _raw_passthrough_sync(source_iter: Iterable[Any]) -> Generator[Any, None, None]:
1091
+ """Pass through sync iterable, decoding bytes to strings."""
1092
+ for chunk in source_iter:
1093
+ if isinstance(chunk, (bytes, bytearray)):
1094
+ yield chunk.decode(encoding, encoding_errors)
1095
+ elif chunk is not None:
1096
+ yield chunk
1097
+ # Skip None chunks entirely
1098
+
1099
+ async def _raw_passthrough_async(source_aiter: AsyncIterable[Any]) -> AsyncGenerator[Any, None]:
1100
+ """Pass through async iterable, decoding bytes to strings."""
1101
+ async for chunk in source_aiter:
1102
+ if isinstance(chunk, (bytes, bytearray)):
1103
+ # Decode bytes preserving all whitespace and newlines
1104
+ yield chunk.decode(encoding, encoding_errors)
1105
+ elif chunk is not None:
1106
+ yield chunk
1107
+
1108
+ if hasattr(data, "__iter__") and not isinstance(data, (str, bytes)):
1109
+ return _raw_passthrough_sync(data)
1110
+ # Async iterable
1111
+ if hasattr(data, "__aiter__"):
1112
+ return _raw_passthrough_async(data)
1113
+ # Single string or bytes
1114
+ if isinstance(data, (bytes, bytearray)):
1115
+
1116
+ def _yield_single_bytes() -> Generator[str, None, None]:
1117
+ yield data.decode(encoding, encoding_errors)
1118
+
1119
+ return _yield_single_bytes()
1120
+ else:
1121
+
1122
+ def _yield_single_any() -> Generator[Any, None, None]:
1123
+ if data is not None:
1124
+ yield data
1125
+
1126
+ return _yield_single_any()
1127
+ # --- END RAW MODE ---
1128
+
1129
+ text_attr = getattr(data, "text", None)
1130
+ content_attr = getattr(data, "content", None)
1131
+
1132
+ # Handle None
1133
+ if data is None:
1134
+
1135
+ def _empty_gen() -> Generator[None, None, None]:
1136
+ if False:
1137
+ yield None
1138
+
1139
+ return _empty_gen()
1140
+
1141
+ # Handle bytes directly
1142
+ if isinstance(data, bytes):
1143
+ try:
1144
+ payload = data.decode(encoding, encoding_errors)
1145
+ except Exception:
1146
+ payload = str(data)
1147
+ return _sanitize_stream_sync(
1148
+ payload,
1149
+ intro_value,
1150
+ to_json,
1151
+ skip_markers,
1152
+ strip_chars,
1153
+ start_marker,
1154
+ end_marker,
1155
+ content_extractor,
1156
+ yield_raw_on_error,
1157
+ encoding,
1158
+ encoding_errors,
1159
+ buffer_size,
1160
+ line_delimiter,
1161
+ error_handler,
1162
+ skip_regexes,
1163
+ extract_regexes,
1164
+ raw,
1165
+ output_formatter,
1166
+ )
1167
+
1168
+ # Handle string directly
1169
+ if isinstance(data, str):
1170
+ return _sanitize_stream_sync(
1171
+ data,
1172
+ intro_value,
1173
+ to_json,
1174
+ skip_markers,
1175
+ strip_chars,
1176
+ start_marker,
1177
+ end_marker,
1178
+ content_extractor,
1179
+ yield_raw_on_error,
1180
+ encoding,
1181
+ encoding_errors,
1182
+ buffer_size,
1183
+ line_delimiter,
1184
+ error_handler,
1185
+ skip_regexes,
1186
+ extract_regexes,
1187
+ raw,
1188
+ output_formatter,
1189
+ )
1190
+
1191
+ # Handle dict, list, int, float, bool (non-iterable, non-string/bytes)
1192
+ if isinstance(data, (dict, list, int, float, bool)):
1193
+ if object_mode == "as_is":
1194
+
1195
+ def _as_is_gen() -> Generator[Any, None, None]:
1196
+ yield data
1197
+
1198
+ return _as_is_gen()
1199
+ elif object_mode == "str":
1200
+ return _sanitize_stream_sync(
1201
+ str(data),
1202
+ intro_value,
1203
+ to_json,
1204
+ skip_markers,
1205
+ strip_chars,
1206
+ start_marker,
1207
+ end_marker,
1208
+ content_extractor,
1209
+ yield_raw_on_error,
1210
+ encoding,
1211
+ encoding_errors,
1212
+ buffer_size,
1213
+ line_delimiter,
1214
+ error_handler,
1215
+ skip_regexes,
1216
+ extract_regexes,
1217
+ raw,
1218
+ output_formatter,
1219
+ )
1220
+ else: # "json"
1221
+ try:
1222
+ json_str = json.dumps(data)
1223
+ except Exception:
1224
+ json_str = str(data)
1225
+ return _sanitize_stream_sync(
1226
+ json_str,
1227
+ intro_value,
1228
+ to_json,
1229
+ skip_markers,
1230
+ strip_chars,
1231
+ start_marker,
1232
+ end_marker,
1233
+ content_extractor,
1234
+ yield_raw_on_error,
1235
+ encoding,
1236
+ encoding_errors,
1237
+ buffer_size,
1238
+ line_delimiter,
1239
+ error_handler,
1240
+ skip_regexes,
1241
+ extract_regexes,
1242
+ raw,
1243
+ output_formatter,
1244
+ )
1245
+
1246
+ # Handle file-like objects (optional, treat as string if .read exists)
1247
+ if hasattr(data, "read") and callable(data.read):
1248
+ try:
1249
+ file_content = data.read()
1250
+ if isinstance(file_content, bytes):
1251
+ file_content = file_content.decode(encoding, encoding_errors)
1252
+ return _sanitize_stream_sync(
1253
+ file_content,
1254
+ intro_value,
1255
+ to_json,
1256
+ skip_markers,
1257
+ strip_chars,
1258
+ start_marker,
1259
+ end_marker,
1260
+ content_extractor,
1261
+ yield_raw_on_error,
1262
+ encoding,
1263
+ encoding_errors,
1264
+ buffer_size,
1265
+ line_delimiter,
1266
+ error_handler,
1267
+ skip_regexes,
1268
+ extract_regexes,
1269
+ raw,
1270
+ output_formatter,
1271
+ )
1272
+ except Exception:
1273
+ pass # fallback to next
1274
+
1275
+ # Handle .text or .content attributes
1276
+ if isinstance(text_attr, str):
1277
+ payload = text_attr
1278
+ return _sanitize_stream_sync(
1279
+ payload,
1280
+ intro_value,
1281
+ to_json,
1282
+ skip_markers,
1283
+ strip_chars,
1284
+ start_marker,
1285
+ end_marker,
1286
+ content_extractor,
1287
+ yield_raw_on_error,
1288
+ encoding,
1289
+ encoding_errors,
1290
+ buffer_size,
1291
+ line_delimiter,
1292
+ error_handler,
1293
+ skip_regexes,
1294
+ extract_regexes,
1295
+ raw,
1296
+ output_formatter,
1297
+ )
1298
+ elif isinstance(content_attr, bytes):
1299
+ try:
1300
+ payload = content_attr.decode(encoding, encoding_errors)
1301
+ except Exception:
1302
+ payload = str(content_attr)
1303
+ return _sanitize_stream_sync(
1304
+ payload,
1305
+ intro_value,
1306
+ to_json,
1307
+ skip_markers,
1308
+ strip_chars,
1309
+ start_marker,
1310
+ end_marker,
1311
+ content_extractor,
1312
+ yield_raw_on_error,
1313
+ encoding,
1314
+ encoding_errors,
1315
+ buffer_size,
1316
+ line_delimiter,
1317
+ error_handler,
1318
+ skip_regexes,
1319
+ extract_regexes,
1320
+ raw,
1321
+ output_formatter,
1322
+ )
1323
+
1324
+ # Handle async iterables
1325
+ if hasattr(data, "__aiter__"):
1326
+ return _sanitize_stream_async(
1327
+ data,
1328
+ intro_value,
1329
+ to_json,
1330
+ skip_markers,
1331
+ strip_chars,
1332
+ start_marker,
1333
+ end_marker,
1334
+ content_extractor,
1335
+ yield_raw_on_error,
1336
+ encoding,
1337
+ encoding_errors,
1338
+ buffer_size,
1339
+ line_delimiter,
1340
+ error_handler,
1341
+ skip_regexes,
1342
+ extract_regexes,
1343
+ raw,
1344
+ output_formatter,
1345
+ )
1346
+ # Handle sync iterables (but not strings/bytes)
1347
+ if hasattr(data, "__iter__"):
1348
+ return _sanitize_stream_sync(
1349
+ data,
1350
+ intro_value,
1351
+ to_json,
1352
+ skip_markers,
1353
+ strip_chars,
1354
+ start_marker,
1355
+ end_marker,
1356
+ content_extractor,
1357
+ yield_raw_on_error,
1358
+ encoding,
1359
+ encoding_errors,
1360
+ buffer_size,
1361
+ line_delimiter,
1362
+ error_handler,
1363
+ skip_regexes,
1364
+ extract_regexes,
1365
+ raw,
1366
+ output_formatter,
1367
+ )
1368
+ # Fallback: treat as string
1369
+ return _sanitize_stream_sync(
1370
+ str(data),
1371
+ intro_value,
1372
+ to_json,
1373
+ skip_markers,
1374
+ strip_chars,
1375
+ start_marker,
1376
+ end_marker,
1377
+ content_extractor,
1378
+ yield_raw_on_error,
1379
+ encoding,
1380
+ encoding_errors,
1381
+ buffer_size,
1382
+ line_delimiter,
1383
+ error_handler,
1384
+ skip_regexes,
1385
+ extract_regexes,
1386
+ raw,
1387
+ output_formatter,
1388
+ )
1389
+
1390
+
1391
+ # --- Decorator version of sanitize_stream ---
1392
+
1393
+
1394
+ def _sanitize_stream_decorator(
1395
+ _func=None,
1396
+ *,
1397
+ intro_value: Optional[str] = "data:",
1398
+ to_json: bool = True,
1399
+ skip_markers: Optional[List[str]] = None,
1400
+ strip_chars: Optional[str] = None,
1401
+ start_marker: Optional[str] = None,
1402
+ end_marker: Optional[str] = None,
1403
+ content_extractor: Optional[Callable[[Union[str, Dict[str, Any]]], Optional[Any]]] = None,
1404
+ yield_raw_on_error: bool = True,
1405
+ encoding: EncodingType = "utf-8",
1406
+ encoding_errors: str = "replace",
1407
+ buffer_size: int = 8192,
1408
+ line_delimiter: Optional[str] = None,
1409
+ error_handler: Optional[Callable[[Exception, str], Optional[Any]]] = None,
1410
+ skip_regexes: Optional[List[Union[str, re.Pattern[str]]]] = None,
1411
+ extract_regexes: Optional[List[Union[str, re.Pattern[str]]]] = None,
1412
+ object_mode: Literal["as_is", "json", "str"] = "json",
1413
+ raw: bool = False,
1414
+ output_formatter: Optional[Callable[[Any], Any]] = None,
1415
+ ):
1416
+ """
1417
+ Decorator for sanitize_stream. Can be used as @sanitize_stream or @sanitize_stream(...).
1418
+ All arguments are the same as sanitize_stream(), including output_formatter.
1419
+ """
1420
+
1421
+ def decorator(func) -> Callable:
1422
+ if asyncio.iscoroutinefunction(func):
1423
+
1424
+ @functools.wraps(func)
1425
+ async def async_wrapper(*args, **kwargs) -> AsyncGenerator[Any, None]:
1426
+ result = await func(*args, **kwargs)
1427
+ return sanitize_stream(
1428
+ result,
1429
+ intro_value=intro_value,
1430
+ to_json=to_json,
1431
+ skip_markers=skip_markers,
1432
+ strip_chars=strip_chars,
1433
+ start_marker=start_marker,
1434
+ end_marker=end_marker,
1435
+ content_extractor=content_extractor,
1436
+ yield_raw_on_error=yield_raw_on_error,
1437
+ encoding=encoding,
1438
+ encoding_errors=encoding_errors,
1439
+ buffer_size=buffer_size,
1440
+ line_delimiter=line_delimiter,
1441
+ error_handler=error_handler,
1442
+ skip_regexes=skip_regexes,
1443
+ extract_regexes=extract_regexes,
1444
+ object_mode=object_mode,
1445
+ raw=raw,
1446
+ output_formatter=output_formatter,
1447
+ )
1448
+
1449
+ return async_wrapper
1450
+ else:
1451
+
1452
+ @functools.wraps(func)
1453
+ def sync_wrapper(*args, **kwargs) -> Generator[Any, None, None]:
1454
+ result = func(*args, **kwargs)
1455
+ return sanitize_stream(
1456
+ result,
1457
+ intro_value=intro_value,
1458
+ to_json=to_json,
1459
+ skip_markers=skip_markers,
1460
+ strip_chars=strip_chars,
1461
+ start_marker=start_marker,
1462
+ end_marker=end_marker,
1463
+ content_extractor=content_extractor,
1464
+ yield_raw_on_error=yield_raw_on_error,
1465
+ encoding=encoding,
1466
+ encoding_errors=encoding_errors,
1467
+ buffer_size=buffer_size,
1468
+ line_delimiter=line_delimiter,
1469
+ error_handler=error_handler,
1470
+ skip_regexes=skip_regexes,
1471
+ extract_regexes=extract_regexes,
1472
+ object_mode=object_mode,
1473
+ raw=raw,
1474
+ output_formatter=output_formatter,
1475
+ )
1476
+
1477
+ return sync_wrapper
1478
+
1479
+ if _func is None:
1480
+ return decorator
1481
+ else:
1482
+ return decorator(_func)
1483
+
1484
+
1485
+ # Alias for decorator usage
1486
+ LITSTREAM = sanitize_stream
1487
+
1488
+ # Decorator aliases
1489
+ sanitize_stream_decorator = _sanitize_stream_decorator
1490
+ lit_streamer = _sanitize_stream_decorator
1491
+
1492
+ # Allow @sanitize_stream and @lit_streamer as decorators
1493
+ try:
1494
+ sanitize_stream.__decorator__ = _sanitize_stream_decorator # type: ignore[attr-defined]
1495
+ except AttributeError:
1496
+ pass
1497
+ try:
1498
+ LITSTREAM.__decorator__ = _sanitize_stream_decorator # type: ignore[attr-defined]
1499
+ except AttributeError:
1500
+ pass
1501
+ try:
1502
+ lit_streamer.__decorator__ = _sanitize_stream_decorator # type: ignore[attr-defined]
1503
+ except AttributeError:
1504
+ pass
1505
+ def __getattr__(name) -> Any:
1506
+ if name == "sanitize_stream":
1507
+ return sanitize_stream
1508
+ if name == "LITSTREAM":
1509
+ return LITSTREAM
1510
+ if name == "sanitize_stream_decorator":
1511
+ return _sanitize_stream_decorator
1512
+ if name == "lit_streamer":
1513
+ return _sanitize_stream_decorator
1514
+ raise AttributeError(f"module {__name__} has no attribute {name}")