webscout 8.2.7__py3-none-any.whl → 8.2.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of webscout might be problematic. Click here for more details.

Files changed (323) hide show
  1. webscout/AIauto.py +1 -1
  2. webscout/AIutel.py +298 -249
  3. webscout/Extra/Act.md +309 -0
  4. webscout/Extra/GitToolkit/__init__.py +10 -0
  5. webscout/Extra/GitToolkit/gitapi/README.md +110 -0
  6. webscout/Extra/GitToolkit/gitapi/__init__.py +12 -0
  7. webscout/Extra/GitToolkit/gitapi/repository.py +195 -0
  8. webscout/Extra/GitToolkit/gitapi/user.py +96 -0
  9. webscout/Extra/GitToolkit/gitapi/utils.py +62 -0
  10. webscout/Extra/YTToolkit/README.md +375 -0
  11. webscout/Extra/YTToolkit/YTdownloader.py +957 -0
  12. webscout/Extra/YTToolkit/__init__.py +3 -0
  13. webscout/Extra/YTToolkit/transcriber.py +476 -0
  14. webscout/Extra/YTToolkit/ytapi/README.md +44 -0
  15. webscout/Extra/YTToolkit/ytapi/__init__.py +6 -0
  16. webscout/Extra/YTToolkit/ytapi/channel.py +307 -0
  17. webscout/Extra/YTToolkit/ytapi/errors.py +13 -0
  18. webscout/Extra/YTToolkit/ytapi/extras.py +118 -0
  19. webscout/Extra/YTToolkit/ytapi/https.py +88 -0
  20. webscout/Extra/YTToolkit/ytapi/patterns.py +61 -0
  21. webscout/Extra/YTToolkit/ytapi/playlist.py +59 -0
  22. webscout/Extra/YTToolkit/ytapi/pool.py +8 -0
  23. webscout/Extra/YTToolkit/ytapi/query.py +40 -0
  24. webscout/Extra/YTToolkit/ytapi/stream.py +63 -0
  25. webscout/Extra/YTToolkit/ytapi/utils.py +62 -0
  26. webscout/Extra/YTToolkit/ytapi/video.py +232 -0
  27. webscout/Extra/__init__.py +7 -0
  28. webscout/Extra/autocoder/__init__.py +9 -0
  29. webscout/Extra/autocoder/autocoder.py +1105 -0
  30. webscout/Extra/autocoder/autocoder_utiles.py +332 -0
  31. webscout/Extra/gguf.md +430 -0
  32. webscout/Extra/gguf.py +684 -0
  33. webscout/Extra/tempmail/README.md +488 -0
  34. webscout/Extra/tempmail/__init__.py +28 -0
  35. webscout/Extra/tempmail/async_utils.py +141 -0
  36. webscout/Extra/tempmail/base.py +161 -0
  37. webscout/Extra/tempmail/cli.py +187 -0
  38. webscout/Extra/tempmail/emailnator.py +84 -0
  39. webscout/Extra/tempmail/mail_tm.py +361 -0
  40. webscout/Extra/tempmail/temp_mail_io.py +292 -0
  41. webscout/Extra/weather.md +281 -0
  42. webscout/Extra/weather.py +194 -0
  43. webscout/Extra/weather_ascii.py +76 -0
  44. webscout/Litlogger/Readme.md +175 -0
  45. webscout/Litlogger/__init__.py +67 -0
  46. webscout/Litlogger/core/__init__.py +6 -0
  47. webscout/Litlogger/core/level.py +23 -0
  48. webscout/Litlogger/core/logger.py +165 -0
  49. webscout/Litlogger/handlers/__init__.py +12 -0
  50. webscout/Litlogger/handlers/console.py +33 -0
  51. webscout/Litlogger/handlers/file.py +143 -0
  52. webscout/Litlogger/handlers/network.py +173 -0
  53. webscout/Litlogger/styles/__init__.py +7 -0
  54. webscout/Litlogger/styles/colors.py +249 -0
  55. webscout/Litlogger/styles/formats.py +458 -0
  56. webscout/Litlogger/styles/text.py +87 -0
  57. webscout/Litlogger/utils/__init__.py +6 -0
  58. webscout/Litlogger/utils/detectors.py +153 -0
  59. webscout/Litlogger/utils/formatters.py +200 -0
  60. webscout/Provider/AI21.py +177 -0
  61. webscout/Provider/AISEARCH/DeepFind.py +254 -0
  62. webscout/Provider/AISEARCH/Perplexity.py +359 -0
  63. webscout/Provider/AISEARCH/README.md +279 -0
  64. webscout/Provider/AISEARCH/__init__.py +9 -0
  65. webscout/Provider/AISEARCH/felo_search.py +228 -0
  66. webscout/Provider/AISEARCH/genspark_search.py +350 -0
  67. webscout/Provider/AISEARCH/hika_search.py +198 -0
  68. webscout/Provider/AISEARCH/iask_search.py +436 -0
  69. webscout/Provider/AISEARCH/monica_search.py +246 -0
  70. webscout/Provider/AISEARCH/scira_search.py +324 -0
  71. webscout/Provider/AISEARCH/webpilotai_search.py +281 -0
  72. webscout/Provider/Aitopia.py +316 -0
  73. webscout/Provider/AllenAI.py +440 -0
  74. webscout/Provider/Andi.py +228 -0
  75. webscout/Provider/Blackboxai.py +673 -0
  76. webscout/Provider/ChatGPTClone.py +237 -0
  77. webscout/Provider/ChatGPTGratis.py +194 -0
  78. webscout/Provider/ChatSandbox.py +342 -0
  79. webscout/Provider/Cloudflare.py +324 -0
  80. webscout/Provider/Cohere.py +208 -0
  81. webscout/Provider/Deepinfra.py +340 -0
  82. webscout/Provider/ExaAI.py +261 -0
  83. webscout/Provider/ExaChat.py +358 -0
  84. webscout/Provider/Flowith.py +217 -0
  85. webscout/Provider/FreeGemini.py +250 -0
  86. webscout/Provider/Gemini.py +169 -0
  87. webscout/Provider/GithubChat.py +370 -0
  88. webscout/Provider/GizAI.py +295 -0
  89. webscout/Provider/Glider.py +225 -0
  90. webscout/Provider/Groq.py +801 -0
  91. webscout/Provider/HF_space/__init__.py +0 -0
  92. webscout/Provider/HF_space/qwen_qwen2.py +206 -0
  93. webscout/Provider/HeckAI.py +285 -0
  94. webscout/Provider/HuggingFaceChat.py +469 -0
  95. webscout/Provider/Hunyuan.py +283 -0
  96. webscout/Provider/Jadve.py +291 -0
  97. webscout/Provider/Koboldai.py +384 -0
  98. webscout/Provider/LambdaChat.py +411 -0
  99. webscout/Provider/Llama3.py +259 -0
  100. webscout/Provider/MCPCore.py +315 -0
  101. webscout/Provider/Marcus.py +198 -0
  102. webscout/Provider/Nemotron.py +218 -0
  103. webscout/Provider/Netwrck.py +270 -0
  104. webscout/Provider/OLLAMA.py +396 -0
  105. webscout/Provider/OPENAI/BLACKBOXAI.py +735 -0
  106. webscout/Provider/OPENAI/Cloudflare.py +378 -0
  107. webscout/Provider/OPENAI/FreeGemini.py +282 -0
  108. webscout/Provider/OPENAI/NEMOTRON.py +244 -0
  109. webscout/Provider/OPENAI/README.md +1253 -0
  110. webscout/Provider/OPENAI/__init__.py +36 -0
  111. webscout/Provider/OPENAI/ai4chat.py +293 -0
  112. webscout/Provider/OPENAI/api.py +810 -0
  113. webscout/Provider/OPENAI/base.py +249 -0
  114. webscout/Provider/OPENAI/c4ai.py +373 -0
  115. webscout/Provider/OPENAI/chatgpt.py +556 -0
  116. webscout/Provider/OPENAI/chatgptclone.py +488 -0
  117. webscout/Provider/OPENAI/chatsandbox.py +172 -0
  118. webscout/Provider/OPENAI/deepinfra.py +319 -0
  119. webscout/Provider/OPENAI/e2b.py +1356 -0
  120. webscout/Provider/OPENAI/exaai.py +411 -0
  121. webscout/Provider/OPENAI/exachat.py +443 -0
  122. webscout/Provider/OPENAI/flowith.py +162 -0
  123. webscout/Provider/OPENAI/freeaichat.py +359 -0
  124. webscout/Provider/OPENAI/glider.py +323 -0
  125. webscout/Provider/OPENAI/groq.py +361 -0
  126. webscout/Provider/OPENAI/heckai.py +307 -0
  127. webscout/Provider/OPENAI/llmchatco.py +335 -0
  128. webscout/Provider/OPENAI/mcpcore.py +383 -0
  129. webscout/Provider/OPENAI/multichat.py +376 -0
  130. webscout/Provider/OPENAI/netwrck.py +356 -0
  131. webscout/Provider/OPENAI/opkfc.py +496 -0
  132. webscout/Provider/OPENAI/scirachat.py +471 -0
  133. webscout/Provider/OPENAI/sonus.py +303 -0
  134. webscout/Provider/OPENAI/standardinput.py +433 -0
  135. webscout/Provider/OPENAI/textpollinations.py +339 -0
  136. webscout/Provider/OPENAI/toolbaz.py +413 -0
  137. webscout/Provider/OPENAI/typefully.py +355 -0
  138. webscout/Provider/OPENAI/typegpt.py +358 -0
  139. webscout/Provider/OPENAI/uncovrAI.py +462 -0
  140. webscout/Provider/OPENAI/utils.py +307 -0
  141. webscout/Provider/OPENAI/venice.py +425 -0
  142. webscout/Provider/OPENAI/wisecat.py +381 -0
  143. webscout/Provider/OPENAI/writecream.py +163 -0
  144. webscout/Provider/OPENAI/x0gpt.py +378 -0
  145. webscout/Provider/OPENAI/yep.py +356 -0
  146. webscout/Provider/OpenGPT.py +209 -0
  147. webscout/Provider/Openai.py +496 -0
  148. webscout/Provider/PI.py +429 -0
  149. webscout/Provider/Perplexitylabs.py +415 -0
  150. webscout/Provider/QwenLM.py +254 -0
  151. webscout/Provider/Reka.py +214 -0
  152. webscout/Provider/StandardInput.py +290 -0
  153. webscout/Provider/TTI/AiForce/README.md +159 -0
  154. webscout/Provider/TTI/AiForce/__init__.py +22 -0
  155. webscout/Provider/TTI/AiForce/async_aiforce.py +224 -0
  156. webscout/Provider/TTI/AiForce/sync_aiforce.py +245 -0
  157. webscout/Provider/TTI/FreeAIPlayground/README.md +99 -0
  158. webscout/Provider/TTI/FreeAIPlayground/__init__.py +9 -0
  159. webscout/Provider/TTI/FreeAIPlayground/async_freeaiplayground.py +181 -0
  160. webscout/Provider/TTI/FreeAIPlayground/sync_freeaiplayground.py +180 -0
  161. webscout/Provider/TTI/ImgSys/README.md +174 -0
  162. webscout/Provider/TTI/ImgSys/__init__.py +23 -0
  163. webscout/Provider/TTI/ImgSys/async_imgsys.py +202 -0
  164. webscout/Provider/TTI/ImgSys/sync_imgsys.py +195 -0
  165. webscout/Provider/TTI/MagicStudio/README.md +101 -0
  166. webscout/Provider/TTI/MagicStudio/__init__.py +2 -0
  167. webscout/Provider/TTI/MagicStudio/async_magicstudio.py +111 -0
  168. webscout/Provider/TTI/MagicStudio/sync_magicstudio.py +109 -0
  169. webscout/Provider/TTI/Nexra/README.md +155 -0
  170. webscout/Provider/TTI/Nexra/__init__.py +22 -0
  171. webscout/Provider/TTI/Nexra/async_nexra.py +286 -0
  172. webscout/Provider/TTI/Nexra/sync_nexra.py +258 -0
  173. webscout/Provider/TTI/PollinationsAI/README.md +146 -0
  174. webscout/Provider/TTI/PollinationsAI/__init__.py +23 -0
  175. webscout/Provider/TTI/PollinationsAI/async_pollinations.py +311 -0
  176. webscout/Provider/TTI/PollinationsAI/sync_pollinations.py +265 -0
  177. webscout/Provider/TTI/README.md +128 -0
  178. webscout/Provider/TTI/__init__.py +12 -0
  179. webscout/Provider/TTI/aiarta/README.md +134 -0
  180. webscout/Provider/TTI/aiarta/__init__.py +2 -0
  181. webscout/Provider/TTI/aiarta/async_aiarta.py +482 -0
  182. webscout/Provider/TTI/aiarta/sync_aiarta.py +440 -0
  183. webscout/Provider/TTI/artbit/README.md +100 -0
  184. webscout/Provider/TTI/artbit/__init__.py +22 -0
  185. webscout/Provider/TTI/artbit/async_artbit.py +155 -0
  186. webscout/Provider/TTI/artbit/sync_artbit.py +148 -0
  187. webscout/Provider/TTI/fastflux/README.md +129 -0
  188. webscout/Provider/TTI/fastflux/__init__.py +22 -0
  189. webscout/Provider/TTI/fastflux/async_fastflux.py +261 -0
  190. webscout/Provider/TTI/fastflux/sync_fastflux.py +252 -0
  191. webscout/Provider/TTI/huggingface/README.md +114 -0
  192. webscout/Provider/TTI/huggingface/__init__.py +22 -0
  193. webscout/Provider/TTI/huggingface/async_huggingface.py +199 -0
  194. webscout/Provider/TTI/huggingface/sync_huggingface.py +195 -0
  195. webscout/Provider/TTI/piclumen/README.md +161 -0
  196. webscout/Provider/TTI/piclumen/__init__.py +23 -0
  197. webscout/Provider/TTI/piclumen/async_piclumen.py +268 -0
  198. webscout/Provider/TTI/piclumen/sync_piclumen.py +233 -0
  199. webscout/Provider/TTI/pixelmuse/README.md +79 -0
  200. webscout/Provider/TTI/pixelmuse/__init__.py +4 -0
  201. webscout/Provider/TTI/pixelmuse/async_pixelmuse.py +249 -0
  202. webscout/Provider/TTI/pixelmuse/sync_pixelmuse.py +182 -0
  203. webscout/Provider/TTI/talkai/README.md +139 -0
  204. webscout/Provider/TTI/talkai/__init__.py +4 -0
  205. webscout/Provider/TTI/talkai/async_talkai.py +229 -0
  206. webscout/Provider/TTI/talkai/sync_talkai.py +207 -0
  207. webscout/Provider/TTS/README.md +192 -0
  208. webscout/Provider/TTS/__init__.py +9 -0
  209. webscout/Provider/TTS/base.py +159 -0
  210. webscout/Provider/TTS/deepgram.py +156 -0
  211. webscout/Provider/TTS/elevenlabs.py +111 -0
  212. webscout/Provider/TTS/gesserit.py +128 -0
  213. webscout/Provider/TTS/murfai.py +113 -0
  214. webscout/Provider/TTS/parler.py +111 -0
  215. webscout/Provider/TTS/speechma.py +580 -0
  216. webscout/Provider/TTS/sthir.py +94 -0
  217. webscout/Provider/TTS/streamElements.py +333 -0
  218. webscout/Provider/TTS/utils.py +280 -0
  219. webscout/Provider/TeachAnything.py +229 -0
  220. webscout/Provider/TextPollinationsAI.py +308 -0
  221. webscout/Provider/TwoAI.py +280 -0
  222. webscout/Provider/TypliAI.py +305 -0
  223. webscout/Provider/UNFINISHED/ChatHub.py +209 -0
  224. webscout/Provider/UNFINISHED/Youchat.py +330 -0
  225. webscout/Provider/UNFINISHED/liner_api_request.py +263 -0
  226. webscout/Provider/UNFINISHED/oivscode.py +351 -0
  227. webscout/Provider/UNFINISHED/test_lmarena.py +119 -0
  228. webscout/Provider/Venice.py +258 -0
  229. webscout/Provider/VercelAI.py +253 -0
  230. webscout/Provider/WiseCat.py +233 -0
  231. webscout/Provider/WrDoChat.py +370 -0
  232. webscout/Provider/Writecream.py +246 -0
  233. webscout/Provider/WritingMate.py +269 -0
  234. webscout/Provider/__init__.py +172 -0
  235. webscout/Provider/ai4chat.py +149 -0
  236. webscout/Provider/akashgpt.py +335 -0
  237. webscout/Provider/asksteve.py +220 -0
  238. webscout/Provider/cerebras.py +290 -0
  239. webscout/Provider/chatglm.py +215 -0
  240. webscout/Provider/cleeai.py +213 -0
  241. webscout/Provider/copilot.py +425 -0
  242. webscout/Provider/elmo.py +283 -0
  243. webscout/Provider/freeaichat.py +285 -0
  244. webscout/Provider/geminiapi.py +208 -0
  245. webscout/Provider/granite.py +235 -0
  246. webscout/Provider/hermes.py +266 -0
  247. webscout/Provider/julius.py +223 -0
  248. webscout/Provider/koala.py +170 -0
  249. webscout/Provider/learnfastai.py +325 -0
  250. webscout/Provider/llama3mitril.py +215 -0
  251. webscout/Provider/llmchat.py +258 -0
  252. webscout/Provider/llmchatco.py +306 -0
  253. webscout/Provider/lmarena.py +198 -0
  254. webscout/Provider/meta.py +801 -0
  255. webscout/Provider/multichat.py +364 -0
  256. webscout/Provider/samurai.py +223 -0
  257. webscout/Provider/scira_chat.py +299 -0
  258. webscout/Provider/scnet.py +243 -0
  259. webscout/Provider/searchchat.py +292 -0
  260. webscout/Provider/sonus.py +258 -0
  261. webscout/Provider/talkai.py +194 -0
  262. webscout/Provider/toolbaz.py +353 -0
  263. webscout/Provider/turboseek.py +266 -0
  264. webscout/Provider/typefully.py +202 -0
  265. webscout/Provider/typegpt.py +289 -0
  266. webscout/Provider/uncovr.py +368 -0
  267. webscout/Provider/x0gpt.py +299 -0
  268. webscout/Provider/yep.py +389 -0
  269. webscout/__init__.py +4 -2
  270. webscout/cli.py +3 -28
  271. webscout/conversation.py +35 -35
  272. webscout/litagent/Readme.md +276 -0
  273. webscout/litagent/__init__.py +29 -0
  274. webscout/litagent/agent.py +455 -0
  275. webscout/litagent/constants.py +60 -0
  276. webscout/litprinter/__init__.py +59 -0
  277. webscout/scout/README.md +402 -0
  278. webscout/scout/__init__.py +8 -0
  279. webscout/scout/core/__init__.py +7 -0
  280. webscout/scout/core/crawler.py +140 -0
  281. webscout/scout/core/scout.py +568 -0
  282. webscout/scout/core/search_result.py +96 -0
  283. webscout/scout/core/text_analyzer.py +63 -0
  284. webscout/scout/core/text_utils.py +277 -0
  285. webscout/scout/core/web_analyzer.py +52 -0
  286. webscout/scout/element.py +460 -0
  287. webscout/scout/parsers/__init__.py +69 -0
  288. webscout/scout/parsers/html5lib_parser.py +172 -0
  289. webscout/scout/parsers/html_parser.py +236 -0
  290. webscout/scout/parsers/lxml_parser.py +178 -0
  291. webscout/scout/utils.py +37 -0
  292. webscout/swiftcli/Readme.md +323 -0
  293. webscout/swiftcli/__init__.py +95 -0
  294. webscout/swiftcli/core/__init__.py +7 -0
  295. webscout/swiftcli/core/cli.py +297 -0
  296. webscout/swiftcli/core/context.py +104 -0
  297. webscout/swiftcli/core/group.py +241 -0
  298. webscout/swiftcli/decorators/__init__.py +28 -0
  299. webscout/swiftcli/decorators/command.py +221 -0
  300. webscout/swiftcli/decorators/options.py +220 -0
  301. webscout/swiftcli/decorators/output.py +252 -0
  302. webscout/swiftcli/exceptions.py +21 -0
  303. webscout/swiftcli/plugins/__init__.py +9 -0
  304. webscout/swiftcli/plugins/base.py +135 -0
  305. webscout/swiftcli/plugins/manager.py +262 -0
  306. webscout/swiftcli/utils/__init__.py +59 -0
  307. webscout/swiftcli/utils/formatting.py +252 -0
  308. webscout/swiftcli/utils/parsing.py +267 -0
  309. webscout/version.py +1 -1
  310. webscout/webscout_search.py +2 -182
  311. webscout/webscout_search_async.py +1 -179
  312. webscout/zeroart/README.md +89 -0
  313. webscout/zeroart/__init__.py +135 -0
  314. webscout/zeroart/base.py +66 -0
  315. webscout/zeroart/effects.py +101 -0
  316. webscout/zeroart/fonts.py +1239 -0
  317. {webscout-8.2.7.dist-info → webscout-8.2.8.dist-info}/METADATA +115 -60
  318. webscout-8.2.8.dist-info/RECORD +334 -0
  319. {webscout-8.2.7.dist-info → webscout-8.2.8.dist-info}/WHEEL +1 -1
  320. webscout-8.2.7.dist-info/RECORD +0 -26
  321. {webscout-8.2.7.dist-info → webscout-8.2.8.dist-info}/entry_points.txt +0 -0
  322. {webscout-8.2.7.dist-info → webscout-8.2.8.dist-info}/licenses/LICENSE.md +0 -0
  323. {webscout-8.2.7.dist-info → webscout-8.2.8.dist-info}/top_level.txt +0 -0
webscout/AIauto.py CHANGED
@@ -1,7 +1,7 @@
1
1
  """
2
2
  This module provides the AUTO provider, which automatically selects and uses
3
3
  an available LLM provider from the webscout library that doesn't require
4
- API keys or cookies.
4
+ API keys or cookies.
5
5
  """
6
6
 
7
7
  from webscout.AIbase import Provider
webscout/AIutel.py CHANGED
@@ -1,250 +1,299 @@
1
- import json
2
- from typing import Union, Optional, Dict, Any, Iterable, Generator, List, Callable, Literal
3
- import codecs
4
-
5
- # Expanded encoding types
6
- EncodingType = Literal['utf-8', 'utf-16', 'utf-32', 'ascii', 'latin1', 'cp1252', 'iso-8859-1',
7
- 'iso-8859-2', 'windows-1250', 'windows-1251', 'windows-1252', 'gbk', 'big5',
8
- 'shift_jis', 'euc-jp', 'euc-kr']
9
-
10
- def _process_chunk(
11
- chunk: str,
12
- intro_value: str,
13
- to_json: bool,
14
- skip_markers: List[str],
15
- strip_chars: Optional[str],
16
- yield_raw_on_error: bool,
17
- ) -> Union[str, Dict[str, Any], None]:
18
- """Internal helper to sanitize and potentially parse a single chunk."""
19
- if not isinstance(chunk, str):
20
- return None
21
-
22
- # Fast path for empty chunks
23
- if not chunk:
24
- return None
25
-
26
- # Use slicing for prefix removal (faster than startswith+slicing)
27
- sanitized_chunk = chunk
28
- if intro_value and len(chunk) >= len(intro_value) and chunk[:len(intro_value)] == intro_value:
29
- sanitized_chunk = chunk[len(intro_value):]
30
-
31
- # Optimize string stripping operations
32
- if strip_chars is not None:
33
- sanitized_chunk = sanitized_chunk.strip(strip_chars)
34
- else:
35
- # lstrip() is faster than strip() when we only need leading whitespace removed
36
- sanitized_chunk = sanitized_chunk.lstrip()
37
-
38
- # Skip empty chunks and markers
39
- if not sanitized_chunk or any(marker == sanitized_chunk for marker in skip_markers):
40
- return None
41
-
42
- # JSON parsing with optimized error handling
43
- if to_json:
44
- try:
45
- # Only strip before JSON parsing if needed
46
- if sanitized_chunk[0] not in '{[' or sanitized_chunk[-1] not in '}]':
47
- sanitized_chunk = sanitized_chunk.strip()
48
- return json.loads(sanitized_chunk)
49
- except (json.JSONDecodeError, Exception):
50
- return sanitized_chunk if yield_raw_on_error else None
51
-
52
- return sanitized_chunk
53
-
54
- def _decode_byte_stream(
55
- byte_iterator: Iterable[bytes],
56
- encoding: EncodingType = 'utf-8',
57
- errors: str = 'replace',
58
- buffer_size: int = 8192
59
- ) -> Generator[str, None, None]:
60
- """
61
- Realtime byte stream decoder with flexible encoding support.
62
-
63
- Args:
64
- byte_iterator: Iterator yielding bytes
65
- encoding: Character encoding to use
66
- errors: How to handle encoding errors ('strict', 'ignore', 'replace')
67
- buffer_size: Size of internal buffer for performance tuning
68
- """
69
- # Initialize decoder with the specified encoding
70
- try:
71
- decoder = codecs.getincrementaldecoder(encoding)(errors=errors)
72
- except LookupError:
73
- # Fallback to utf-8 if the encoding is not supported
74
- decoder = codecs.getincrementaldecoder('utf-8')(errors=errors)
75
-
76
- # Process byte stream in realtime
77
- buffer = bytearray(buffer_size)
78
- buffer_view = memoryview(buffer)
79
-
80
- for chunk_bytes in byte_iterator:
81
- if not chunk_bytes:
82
- continue
83
-
84
- try:
85
- # Use buffer for processing if chunk size is appropriate
86
- if len(chunk_bytes) <= buffer_size:
87
- buffer[:len(chunk_bytes)] = chunk_bytes
88
- text = decoder.decode(buffer_view[:len(chunk_bytes)], final=False)
89
- else:
90
- text = decoder.decode(chunk_bytes, final=False)
91
-
92
- if text:
93
- yield text
94
- except UnicodeDecodeError:
95
- yield f"[Encoding Error: Could not decode bytes with {encoding}]\n"
96
-
97
- # Final flush
98
- try:
99
- final_text = decoder.decode(b'', final=True)
100
- if final_text:
101
- yield final_text
102
- except UnicodeDecodeError:
103
- yield f"[Encoding Error: Could not decode final bytes with {encoding}]\n"
104
-
105
- def sanitize_stream(
106
- data: Union[str, Iterable[str], Iterable[bytes]],
107
- intro_value: str = "data:",
108
- to_json: bool = True,
109
- skip_markers: Optional[List[str]] = None,
110
- strip_chars: Optional[str] = None,
111
- start_marker: Optional[str] = None,
112
- end_marker: Optional[str] = None,
113
- content_extractor: Optional[Callable[[Union[str, Dict[str, Any]]], Optional[Any]]] = None,
114
- yield_raw_on_error: bool = True,
115
- encoding: EncodingType = 'utf-8',
116
- encoding_errors: str = 'replace',
117
- buffer_size: int = 8192,
118
- ) -> Generator[Any, None, None]:
119
- """
120
- Optimized realtime stream processor that handles string/byte streams with minimal latency.
121
-
122
- Features:
123
- - Direct realtime processing of byte streams
124
- - Optimized string handling and JSON parsing
125
- - Robust error handling and validation
126
- - Flexible encoding support with memory-efficient buffering
127
- - High performance for large streams
128
-
129
- Args:
130
- data: Input data (string, string iterator, or bytes iterator)
131
- intro_value: Prefix to remove from each chunk
132
- to_json: Whether to parse chunks as JSON
133
- skip_markers: Markers to skip
134
- strip_chars: Characters to strip
135
- start_marker: Processing start marker
136
- end_marker: Processing end marker
137
- content_extractor: Function to extract content
138
- yield_raw_on_error: Yield raw content on JSON errors
139
- encoding: Character encoding for byte streams
140
- encoding_errors: How to handle encoding errors
141
- buffer_size: Size of internal processing buffer
142
-
143
- Yields:
144
- Processed chunks (string or dictionary)
145
- """
146
- effective_skip_markers = skip_markers or []
147
- processing_active = start_marker is None
148
-
149
- # Fast path for single string processing
150
- if isinstance(data, str):
151
- processed_item = None
152
- if processing_active:
153
- # Optimize JSON parsing for large strings
154
- if to_json:
155
- try:
156
- # Use faster JSON parser for large strings
157
- data = data.strip()
158
- if data:
159
- processed_item = json.loads(data)
160
- except json.JSONDecodeError:
161
- processed_item = data if yield_raw_on_error else None
162
- else:
163
- processed_item = _process_chunk(
164
- data, intro_value, False, effective_skip_markers,
165
- strip_chars, yield_raw_on_error
166
- )
167
-
168
- if processed_item is not None:
169
- if content_extractor:
170
- try:
171
- final_content = content_extractor(processed_item)
172
- if final_content is not None:
173
- yield final_content
174
- except Exception:
175
- pass
176
- else:
177
- yield processed_item
178
- return
179
-
180
- # Stream processing path
181
- if not hasattr(data, '__iter__'):
182
- raise TypeError(f"Input must be a string or an iterable, not {type(data).__name__}")
183
-
184
- try:
185
- iterator = iter(data)
186
- first_item = next(iterator, None)
187
- if first_item is None:
188
- return
189
-
190
- # Efficient streaming with itertools
191
- from itertools import chain
192
- stream = chain([first_item], iterator)
193
-
194
- # Determine if we're dealing with bytes or strings
195
- if isinstance(first_item, bytes):
196
- line_iterator = _decode_byte_stream(
197
- stream,
198
- encoding=encoding,
199
- errors=encoding_errors,
200
- buffer_size=buffer_size
201
- )
202
- elif isinstance(first_item, str):
203
- line_iterator = stream
204
- else:
205
- raise TypeError(f"Stream must yield strings or bytes, not {type(first_item).__name__}")
206
-
207
- # Process stream with minimal allocations
208
- for line in line_iterator:
209
- if not line:
210
- continue
211
-
212
- # Handle markers efficiently
213
- if not processing_active and start_marker is not None:
214
- if line.strip() == start_marker:
215
- processing_active = True
216
- continue
217
-
218
- if processing_active and end_marker is not None and line.strip() == end_marker:
219
- processing_active = False
220
- continue
221
-
222
- if processing_active:
223
- # Process chunk with optimized function
224
- processed = _process_chunk(
225
- line, intro_value, to_json, effective_skip_markers,
226
- strip_chars, yield_raw_on_error
227
- )
228
-
229
- if processed is not None:
230
- if content_extractor:
231
- try:
232
- final_content = content_extractor(processed)
233
- if final_content is not None:
234
- yield final_content
235
- except Exception:
236
- # Continue on extraction errors
237
- pass
238
- else:
239
- yield processed
240
-
241
- except Exception as e:
242
- # Log error but don't crash on stream processing exceptions
243
- import sys
244
- print(f"Stream processing error: {str(e)}", file=sys.stderr)
245
-
246
-
247
- from .conversation import Conversation
248
- from .optimizers import Optimizers
249
- from .Extra.autocoder import AutoCoder
1
+ import json
2
+ from typing import Union, Optional, Dict, Any, Iterable, Generator, List, Callable, Literal
3
+ import codecs
4
+
5
+ # Expanded encoding types
6
+ EncodingType = Literal['utf-8', 'utf-16', 'utf-32', 'ascii', 'latin1', 'cp1252', 'iso-8859-1',
7
+ 'iso-8859-2', 'windows-1250', 'windows-1251', 'windows-1252', 'gbk', 'big5',
8
+ 'shift_jis', 'euc-jp', 'euc-kr']
9
+
10
+ def _process_chunk(
11
+ chunk: str,
12
+ intro_value: str,
13
+ to_json: bool,
14
+ skip_markers: List[str],
15
+ strip_chars: Optional[str],
16
+ yield_raw_on_error: bool,
17
+ ) -> Union[str, Dict[str, Any], None]:
18
+ """Internal helper to sanitize and potentially parse a single chunk."""
19
+ if not isinstance(chunk, str):
20
+ return None
21
+
22
+ # Fast path for empty chunks
23
+ if not chunk:
24
+ return None
25
+
26
+ # Use slicing for prefix removal (faster than startswith+slicing)
27
+ sanitized_chunk = chunk
28
+ if intro_value and len(chunk) >= len(intro_value) and chunk[:len(intro_value)] == intro_value:
29
+ sanitized_chunk = chunk[len(intro_value):]
30
+
31
+ # Optimize string stripping operations
32
+ if strip_chars is not None:
33
+ sanitized_chunk = sanitized_chunk.strip(strip_chars)
34
+ else:
35
+ # lstrip() is faster than strip() when we only need leading whitespace removed
36
+ sanitized_chunk = sanitized_chunk.lstrip()
37
+
38
+ # Skip empty chunks and markers
39
+ if not sanitized_chunk or any(marker == sanitized_chunk for marker in skip_markers):
40
+ return None
41
+
42
+ # JSON parsing with optimized error handling
43
+ if to_json:
44
+ try:
45
+ # Only strip before JSON parsing if needed
46
+ if sanitized_chunk[0] not in '{[' or sanitized_chunk[-1] not in '}]':
47
+ sanitized_chunk = sanitized_chunk.strip()
48
+ return json.loads(sanitized_chunk)
49
+ except (json.JSONDecodeError, Exception):
50
+ return sanitized_chunk if yield_raw_on_error else None
51
+
52
+ return sanitized_chunk
53
+
54
+ def _decode_byte_stream(
55
+ byte_iterator: Iterable[bytes],
56
+ encoding: EncodingType = 'utf-8',
57
+ errors: str = 'replace',
58
+ buffer_size: int = 8192
59
+ ) -> Generator[str, None, None]:
60
+ """
61
+ Realtime byte stream decoder with flexible encoding support.
62
+
63
+ Args:
64
+ byte_iterator: Iterator yielding bytes
65
+ encoding: Character encoding to use
66
+ errors: How to handle encoding errors ('strict', 'ignore', 'replace')
67
+ buffer_size: Size of internal buffer for performance tuning
68
+ """
69
+ # Initialize decoder with the specified encoding
70
+ try:
71
+ decoder = codecs.getincrementaldecoder(encoding)(errors=errors)
72
+ except LookupError:
73
+ # Fallback to utf-8 if the encoding is not supported
74
+ decoder = codecs.getincrementaldecoder('utf-8')(errors=errors)
75
+
76
+ # Process byte stream in realtime
77
+ buffer = bytearray(buffer_size)
78
+ buffer_view = memoryview(buffer)
79
+
80
+ for chunk_bytes in byte_iterator:
81
+ if not chunk_bytes:
82
+ continue
83
+
84
+ try:
85
+ # Use buffer for processing if chunk size is appropriate
86
+ if len(chunk_bytes) <= buffer_size:
87
+ buffer[:len(chunk_bytes)] = chunk_bytes
88
+ text = decoder.decode(buffer_view[:len(chunk_bytes)], final=False)
89
+ else:
90
+ text = decoder.decode(chunk_bytes, final=False)
91
+
92
+ if text:
93
+ yield text
94
+ except UnicodeDecodeError:
95
+ yield f"[Encoding Error: Could not decode bytes with {encoding}]\n"
96
+
97
+ # Final flush
98
+ try:
99
+ final_text = decoder.decode(b'', final=True)
100
+ if final_text:
101
+ yield final_text
102
+ except UnicodeDecodeError:
103
+ yield f"[Encoding Error: Could not decode final bytes with {encoding}]\n"
104
+
105
+ def sanitize_stream(
106
+ data: Union[str, Iterable[str], Iterable[bytes]],
107
+ intro_value: str = "data:",
108
+ to_json: bool = True,
109
+ skip_markers: Optional[List[str]] = None,
110
+ strip_chars: Optional[str] = None,
111
+ start_marker: Optional[str] = None,
112
+ end_marker: Optional[str] = None,
113
+ content_extractor: Optional[Callable[[Union[str, Dict[str, Any]]], Optional[Any]]] = None,
114
+ yield_raw_on_error: bool = True,
115
+ encoding: EncodingType = 'utf-8',
116
+ encoding_errors: str = 'replace',
117
+ buffer_size: int = 8192,
118
+ ) -> Generator[Any, None, None]:
119
+ """
120
+ Robust realtime stream processor that handles string/byte streams with correct marker extraction/skipping.
121
+ Now handles split markers, partial chunks, and skips lines containing (not just equal to) skip markers.
122
+ """
123
+ effective_skip_markers = skip_markers or []
124
+ processing_active = start_marker is None
125
+ buffer = ""
126
+ found_start = False if start_marker else True
127
+
128
+ # Fast path for single string processing
129
+ if isinstance(data, str):
130
+ processed_item = None
131
+ if processing_active:
132
+ if to_json:
133
+ try:
134
+ data = data.strip()
135
+ if data:
136
+ processed_item = json.loads(data)
137
+ except json.JSONDecodeError:
138
+ processed_item = data if yield_raw_on_error else None
139
+ else:
140
+ processed_item = _process_chunk(
141
+ data, intro_value, False, effective_skip_markers,
142
+ strip_chars, yield_raw_on_error
143
+ )
144
+ if processed_item is not None:
145
+ if content_extractor:
146
+ try:
147
+ final_content = content_extractor(processed_item)
148
+ if final_content is not None:
149
+ yield final_content
150
+ except Exception:
151
+ pass
152
+ else:
153
+ yield processed_item
154
+ return
155
+
156
+ # Stream processing path
157
+ if not hasattr(data, '__iter__'):
158
+ raise TypeError(f"Input must be a string or an iterable, not {type(data).__name__}")
159
+
160
+ try:
161
+ iterator = iter(data)
162
+ first_item = next(iterator, None)
163
+ if first_item is None:
164
+ return
165
+ from itertools import chain
166
+ stream = chain([first_item], iterator)
167
+
168
+ # Determine if we're dealing with bytes or strings
169
+ if isinstance(first_item, bytes):
170
+ line_iterator = _decode_byte_stream(
171
+ stream,
172
+ encoding=encoding,
173
+ errors=encoding_errors,
174
+ buffer_size=buffer_size
175
+ )
176
+ elif isinstance(first_item, str):
177
+ line_iterator = stream
178
+ else:
179
+ raise TypeError(f"Stream must yield strings or bytes, not {type(first_item).__name__}")
180
+
181
+ for line in line_iterator:
182
+ if not line:
183
+ continue
184
+ buffer += line
185
+ while True:
186
+ # Look for start marker if needed
187
+ if not found_start and start_marker:
188
+ idx = buffer.find(start_marker)
189
+ if idx != -1:
190
+ found_start = True
191
+ buffer = buffer[idx + len(start_marker):]
192
+ else:
193
+ # Not found, keep buffering
194
+ buffer = buffer[-max(len(start_marker), 256):] # avoid unbounded growth
195
+ break
196
+ # Look for end marker if needed
197
+ if found_start and end_marker:
198
+ idx = buffer.find(end_marker)
199
+ if idx != -1:
200
+ chunk = buffer[:idx]
201
+ buffer = buffer[idx + len(end_marker):]
202
+ processing_active = False
203
+ else:
204
+ chunk = buffer
205
+ buffer = ""
206
+ processing_active = True
207
+ # Process chunk if we are in active region
208
+ if chunk and processing_active:
209
+ # Split into lines for skip marker logic
210
+ for subline in chunk.splitlines():
211
+ # Remove intro_value prefix if present
212
+ if intro_value and subline.startswith(intro_value):
213
+ subline = subline[len(intro_value):]
214
+ # Strip chars if needed
215
+ if strip_chars is not None:
216
+ subline = subline.strip(strip_chars)
217
+ else:
218
+ subline = subline.lstrip()
219
+ # Skip if matches any skip marker (using 'in')
220
+ if any(marker in subline for marker in effective_skip_markers):
221
+ continue
222
+ # Skip empty
223
+ if not subline:
224
+ continue
225
+ # JSON parse if needed
226
+ if to_json:
227
+ try:
228
+ if subline and (subline[0] in '{[' and subline[-1] in '}]'):
229
+ parsed = json.loads(subline)
230
+ result = parsed
231
+ else:
232
+ result = subline
233
+ except Exception:
234
+ result = subline if yield_raw_on_error else None
235
+ else:
236
+ result = subline
237
+ if result is not None:
238
+ if content_extractor:
239
+ try:
240
+ final_content = content_extractor(result)
241
+ if final_content is not None:
242
+ yield final_content
243
+ except Exception:
244
+ pass
245
+ else:
246
+ yield result
247
+ if not processing_active:
248
+ found_start = False
249
+ if idx == -1:
250
+ break
251
+ elif found_start:
252
+ # No end marker, process all buffered content
253
+ chunk = buffer
254
+ buffer = ""
255
+ if chunk:
256
+ for subline in chunk.splitlines():
257
+ if intro_value and subline.startswith(intro_value):
258
+ subline = subline[len(intro_value):]
259
+ if strip_chars is not None:
260
+ subline = subline.strip(strip_chars)
261
+ else:
262
+ subline = subline.lstrip()
263
+ if any(marker in subline for marker in effective_skip_markers):
264
+ continue
265
+ if not subline:
266
+ continue
267
+ if to_json:
268
+ try:
269
+ if subline and (subline[0] in '{[' and subline[-1] in '}]'):
270
+ parsed = json.loads(subline)
271
+ result = parsed
272
+ else:
273
+ result = subline
274
+ except Exception:
275
+ result = subline if yield_raw_on_error else None
276
+ else:
277
+ result = subline
278
+ if result is not None:
279
+ if content_extractor:
280
+ try:
281
+ final_content = content_extractor(result)
282
+ if final_content is not None:
283
+ yield final_content
284
+ except Exception:
285
+ pass
286
+ else:
287
+ yield result
288
+ break
289
+ else:
290
+ break
291
+ except Exception as e:
292
+ import sys
293
+ print(f"Stream processing error: {str(e)}", file=sys.stderr)
294
+
295
+
296
+ from .conversation import Conversation
297
+ from .optimizers import Optimizers
298
+ from .Extra.autocoder import AutoCoder
250
299
  from .prompt_manager import AwesomePrompts