webscout 8.2.2__py3-none-any.whl → 8.2.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of webscout might be problematic. Click here for more details.

Files changed (306) hide show
  1. webscout/AIauto.py +112 -22
  2. webscout/AIbase.py +144 -7
  3. webscout/AIutel.py +249 -131
  4. webscout/Bard.py +579 -206
  5. webscout/DWEBS.py +78 -35
  6. webscout/__init__.py +0 -1
  7. webscout/cli.py +256 -0
  8. webscout/conversation.py +307 -436
  9. webscout/exceptions.py +23 -0
  10. webscout/prompt_manager.py +56 -42
  11. webscout/version.py +1 -1
  12. webscout/webscout_search.py +65 -47
  13. webscout/webscout_search_async.py +81 -126
  14. webscout/yep_search.py +93 -43
  15. {webscout-8.2.2.dist-info → webscout-8.2.7.dist-info}/METADATA +172 -52
  16. webscout-8.2.7.dist-info/RECORD +26 -0
  17. {webscout-8.2.2.dist-info → webscout-8.2.7.dist-info}/WHEEL +1 -1
  18. webscout-8.2.7.dist-info/entry_points.txt +3 -0
  19. webscout-8.2.7.dist-info/top_level.txt +1 -0
  20. inferno/__init__.py +0 -6
  21. inferno/__main__.py +0 -9
  22. inferno/cli.py +0 -6
  23. webscout/Extra/GitToolkit/__init__.py +0 -10
  24. webscout/Extra/GitToolkit/gitapi/__init__.py +0 -12
  25. webscout/Extra/GitToolkit/gitapi/repository.py +0 -195
  26. webscout/Extra/GitToolkit/gitapi/user.py +0 -96
  27. webscout/Extra/GitToolkit/gitapi/utils.py +0 -62
  28. webscout/Extra/YTToolkit/YTdownloader.py +0 -957
  29. webscout/Extra/YTToolkit/__init__.py +0 -3
  30. webscout/Extra/YTToolkit/transcriber.py +0 -476
  31. webscout/Extra/YTToolkit/ytapi/__init__.py +0 -6
  32. webscout/Extra/YTToolkit/ytapi/channel.py +0 -307
  33. webscout/Extra/YTToolkit/ytapi/errors.py +0 -13
  34. webscout/Extra/YTToolkit/ytapi/extras.py +0 -45
  35. webscout/Extra/YTToolkit/ytapi/https.py +0 -88
  36. webscout/Extra/YTToolkit/ytapi/patterns.py +0 -61
  37. webscout/Extra/YTToolkit/ytapi/playlist.py +0 -59
  38. webscout/Extra/YTToolkit/ytapi/pool.py +0 -8
  39. webscout/Extra/YTToolkit/ytapi/query.py +0 -40
  40. webscout/Extra/YTToolkit/ytapi/stream.py +0 -63
  41. webscout/Extra/YTToolkit/ytapi/utils.py +0 -62
  42. webscout/Extra/YTToolkit/ytapi/video.py +0 -232
  43. webscout/Extra/__init__.py +0 -7
  44. webscout/Extra/autocoder/__init__.py +0 -9
  45. webscout/Extra/autocoder/autocoder.py +0 -849
  46. webscout/Extra/autocoder/autocoder_utiles.py +0 -332
  47. webscout/Extra/gguf.py +0 -682
  48. webscout/Extra/tempmail/__init__.py +0 -28
  49. webscout/Extra/tempmail/async_utils.py +0 -141
  50. webscout/Extra/tempmail/base.py +0 -161
  51. webscout/Extra/tempmail/cli.py +0 -187
  52. webscout/Extra/tempmail/emailnator.py +0 -84
  53. webscout/Extra/tempmail/mail_tm.py +0 -361
  54. webscout/Extra/tempmail/temp_mail_io.py +0 -292
  55. webscout/Extra/weather.py +0 -194
  56. webscout/Extra/weather_ascii.py +0 -76
  57. webscout/LLM.py +0 -442
  58. webscout/Litlogger/__init__.py +0 -67
  59. webscout/Litlogger/core/__init__.py +0 -6
  60. webscout/Litlogger/core/level.py +0 -23
  61. webscout/Litlogger/core/logger.py +0 -165
  62. webscout/Litlogger/handlers/__init__.py +0 -12
  63. webscout/Litlogger/handlers/console.py +0 -33
  64. webscout/Litlogger/handlers/file.py +0 -143
  65. webscout/Litlogger/handlers/network.py +0 -173
  66. webscout/Litlogger/styles/__init__.py +0 -7
  67. webscout/Litlogger/styles/colors.py +0 -249
  68. webscout/Litlogger/styles/formats.py +0 -458
  69. webscout/Litlogger/styles/text.py +0 -87
  70. webscout/Litlogger/utils/__init__.py +0 -6
  71. webscout/Litlogger/utils/detectors.py +0 -153
  72. webscout/Litlogger/utils/formatters.py +0 -200
  73. webscout/Local/__init__.py +0 -12
  74. webscout/Local/__main__.py +0 -9
  75. webscout/Local/api.py +0 -576
  76. webscout/Local/cli.py +0 -516
  77. webscout/Local/config.py +0 -75
  78. webscout/Local/llm.py +0 -287
  79. webscout/Local/model_manager.py +0 -253
  80. webscout/Local/server.py +0 -721
  81. webscout/Local/utils.py +0 -93
  82. webscout/Provider/AI21.py +0 -177
  83. webscout/Provider/AISEARCH/DeepFind.py +0 -250
  84. webscout/Provider/AISEARCH/ISou.py +0 -256
  85. webscout/Provider/AISEARCH/Perplexity.py +0 -359
  86. webscout/Provider/AISEARCH/__init__.py +0 -10
  87. webscout/Provider/AISEARCH/felo_search.py +0 -228
  88. webscout/Provider/AISEARCH/genspark_search.py +0 -208
  89. webscout/Provider/AISEARCH/hika_search.py +0 -194
  90. webscout/Provider/AISEARCH/iask_search.py +0 -436
  91. webscout/Provider/AISEARCH/monica_search.py +0 -246
  92. webscout/Provider/AISEARCH/scira_search.py +0 -324
  93. webscout/Provider/AISEARCH/webpilotai_search.py +0 -281
  94. webscout/Provider/Aitopia.py +0 -292
  95. webscout/Provider/AllenAI.py +0 -413
  96. webscout/Provider/Andi.py +0 -228
  97. webscout/Provider/Blackboxai.py +0 -229
  98. webscout/Provider/C4ai.py +0 -432
  99. webscout/Provider/ChatGPTClone.py +0 -226
  100. webscout/Provider/ChatGPTES.py +0 -237
  101. webscout/Provider/ChatGPTGratis.py +0 -194
  102. webscout/Provider/Chatify.py +0 -175
  103. webscout/Provider/Cloudflare.py +0 -273
  104. webscout/Provider/Cohere.py +0 -208
  105. webscout/Provider/DeepSeek.py +0 -196
  106. webscout/Provider/Deepinfra.py +0 -297
  107. webscout/Provider/ElectronHub.py +0 -709
  108. webscout/Provider/ExaAI.py +0 -261
  109. webscout/Provider/ExaChat.py +0 -342
  110. webscout/Provider/Free2GPT.py +0 -241
  111. webscout/Provider/GPTWeb.py +0 -193
  112. webscout/Provider/Gemini.py +0 -169
  113. webscout/Provider/GithubChat.py +0 -367
  114. webscout/Provider/Glider.py +0 -211
  115. webscout/Provider/Groq.py +0 -670
  116. webscout/Provider/HF_space/__init__.py +0 -0
  117. webscout/Provider/HF_space/qwen_qwen2.py +0 -206
  118. webscout/Provider/HeckAI.py +0 -233
  119. webscout/Provider/HuggingFaceChat.py +0 -462
  120. webscout/Provider/Hunyuan.py +0 -272
  121. webscout/Provider/Jadve.py +0 -266
  122. webscout/Provider/Koboldai.py +0 -381
  123. webscout/Provider/LambdaChat.py +0 -392
  124. webscout/Provider/Llama.py +0 -200
  125. webscout/Provider/Llama3.py +0 -204
  126. webscout/Provider/Marcus.py +0 -148
  127. webscout/Provider/Netwrck.py +0 -228
  128. webscout/Provider/OLLAMA.py +0 -396
  129. webscout/Provider/OPENAI/__init__.py +0 -25
  130. webscout/Provider/OPENAI/base.py +0 -46
  131. webscout/Provider/OPENAI/c4ai.py +0 -367
  132. webscout/Provider/OPENAI/chatgpt.py +0 -549
  133. webscout/Provider/OPENAI/chatgptclone.py +0 -460
  134. webscout/Provider/OPENAI/deepinfra.py +0 -272
  135. webscout/Provider/OPENAI/e2b.py +0 -1350
  136. webscout/Provider/OPENAI/exaai.py +0 -404
  137. webscout/Provider/OPENAI/exachat.py +0 -433
  138. webscout/Provider/OPENAI/freeaichat.py +0 -352
  139. webscout/Provider/OPENAI/glider.py +0 -316
  140. webscout/Provider/OPENAI/heckai.py +0 -337
  141. webscout/Provider/OPENAI/llmchatco.py +0 -327
  142. webscout/Provider/OPENAI/netwrck.py +0 -348
  143. webscout/Provider/OPENAI/opkfc.py +0 -488
  144. webscout/Provider/OPENAI/scirachat.py +0 -463
  145. webscout/Provider/OPENAI/sonus.py +0 -294
  146. webscout/Provider/OPENAI/standardinput.py +0 -425
  147. webscout/Provider/OPENAI/textpollinations.py +0 -285
  148. webscout/Provider/OPENAI/toolbaz.py +0 -405
  149. webscout/Provider/OPENAI/typegpt.py +0 -346
  150. webscout/Provider/OPENAI/uncovrAI.py +0 -455
  151. webscout/Provider/OPENAI/utils.py +0 -211
  152. webscout/Provider/OPENAI/venice.py +0 -413
  153. webscout/Provider/OPENAI/wisecat.py +0 -381
  154. webscout/Provider/OPENAI/writecream.py +0 -156
  155. webscout/Provider/OPENAI/x0gpt.py +0 -371
  156. webscout/Provider/OPENAI/yep.py +0 -327
  157. webscout/Provider/OpenGPT.py +0 -199
  158. webscout/Provider/Openai.py +0 -496
  159. webscout/Provider/PI.py +0 -344
  160. webscout/Provider/Perplexitylabs.py +0 -415
  161. webscout/Provider/Phind.py +0 -535
  162. webscout/Provider/PizzaGPT.py +0 -198
  163. webscout/Provider/QwenLM.py +0 -254
  164. webscout/Provider/Reka.py +0 -214
  165. webscout/Provider/StandardInput.py +0 -278
  166. webscout/Provider/TTI/AiForce/__init__.py +0 -22
  167. webscout/Provider/TTI/AiForce/async_aiforce.py +0 -224
  168. webscout/Provider/TTI/AiForce/sync_aiforce.py +0 -245
  169. webscout/Provider/TTI/FreeAIPlayground/__init__.py +0 -9
  170. webscout/Provider/TTI/FreeAIPlayground/async_freeaiplayground.py +0 -181
  171. webscout/Provider/TTI/FreeAIPlayground/sync_freeaiplayground.py +0 -180
  172. webscout/Provider/TTI/ImgSys/__init__.py +0 -23
  173. webscout/Provider/TTI/ImgSys/async_imgsys.py +0 -202
  174. webscout/Provider/TTI/ImgSys/sync_imgsys.py +0 -195
  175. webscout/Provider/TTI/MagicStudio/__init__.py +0 -2
  176. webscout/Provider/TTI/MagicStudio/async_magicstudio.py +0 -111
  177. webscout/Provider/TTI/MagicStudio/sync_magicstudio.py +0 -109
  178. webscout/Provider/TTI/Nexra/__init__.py +0 -22
  179. webscout/Provider/TTI/Nexra/async_nexra.py +0 -286
  180. webscout/Provider/TTI/Nexra/sync_nexra.py +0 -258
  181. webscout/Provider/TTI/PollinationsAI/__init__.py +0 -23
  182. webscout/Provider/TTI/PollinationsAI/async_pollinations.py +0 -311
  183. webscout/Provider/TTI/PollinationsAI/sync_pollinations.py +0 -265
  184. webscout/Provider/TTI/__init__.py +0 -12
  185. webscout/Provider/TTI/aiarta/__init__.py +0 -2
  186. webscout/Provider/TTI/aiarta/async_aiarta.py +0 -482
  187. webscout/Provider/TTI/aiarta/sync_aiarta.py +0 -440
  188. webscout/Provider/TTI/artbit/__init__.py +0 -22
  189. webscout/Provider/TTI/artbit/async_artbit.py +0 -155
  190. webscout/Provider/TTI/artbit/sync_artbit.py +0 -148
  191. webscout/Provider/TTI/fastflux/__init__.py +0 -22
  192. webscout/Provider/TTI/fastflux/async_fastflux.py +0 -261
  193. webscout/Provider/TTI/fastflux/sync_fastflux.py +0 -252
  194. webscout/Provider/TTI/huggingface/__init__.py +0 -22
  195. webscout/Provider/TTI/huggingface/async_huggingface.py +0 -199
  196. webscout/Provider/TTI/huggingface/sync_huggingface.py +0 -195
  197. webscout/Provider/TTI/piclumen/__init__.py +0 -23
  198. webscout/Provider/TTI/piclumen/async_piclumen.py +0 -268
  199. webscout/Provider/TTI/piclumen/sync_piclumen.py +0 -233
  200. webscout/Provider/TTI/pixelmuse/__init__.py +0 -4
  201. webscout/Provider/TTI/pixelmuse/async_pixelmuse.py +0 -249
  202. webscout/Provider/TTI/pixelmuse/sync_pixelmuse.py +0 -182
  203. webscout/Provider/TTI/talkai/__init__.py +0 -4
  204. webscout/Provider/TTI/talkai/async_talkai.py +0 -229
  205. webscout/Provider/TTI/talkai/sync_talkai.py +0 -207
  206. webscout/Provider/TTS/__init__.py +0 -7
  207. webscout/Provider/TTS/deepgram.py +0 -156
  208. webscout/Provider/TTS/elevenlabs.py +0 -111
  209. webscout/Provider/TTS/gesserit.py +0 -127
  210. webscout/Provider/TTS/murfai.py +0 -113
  211. webscout/Provider/TTS/parler.py +0 -111
  212. webscout/Provider/TTS/speechma.py +0 -180
  213. webscout/Provider/TTS/streamElements.py +0 -333
  214. webscout/Provider/TTS/utils.py +0 -280
  215. webscout/Provider/TeachAnything.py +0 -187
  216. webscout/Provider/TextPollinationsAI.py +0 -231
  217. webscout/Provider/TwoAI.py +0 -199
  218. webscout/Provider/Venice.py +0 -219
  219. webscout/Provider/VercelAI.py +0 -234
  220. webscout/Provider/WebSim.py +0 -228
  221. webscout/Provider/WiseCat.py +0 -196
  222. webscout/Provider/Writecream.py +0 -211
  223. webscout/Provider/WritingMate.py +0 -197
  224. webscout/Provider/Youchat.py +0 -330
  225. webscout/Provider/__init__.py +0 -198
  226. webscout/Provider/ai4chat.py +0 -202
  227. webscout/Provider/aimathgpt.py +0 -189
  228. webscout/Provider/akashgpt.py +0 -342
  229. webscout/Provider/askmyai.py +0 -158
  230. webscout/Provider/asksteve.py +0 -203
  231. webscout/Provider/bagoodex.py +0 -145
  232. webscout/Provider/cerebras.py +0 -242
  233. webscout/Provider/chatglm.py +0 -205
  234. webscout/Provider/cleeai.py +0 -213
  235. webscout/Provider/copilot.py +0 -428
  236. webscout/Provider/elmo.py +0 -234
  237. webscout/Provider/freeaichat.py +0 -271
  238. webscout/Provider/gaurish.py +0 -244
  239. webscout/Provider/geminiapi.py +0 -208
  240. webscout/Provider/geminiprorealtime.py +0 -160
  241. webscout/Provider/granite.py +0 -187
  242. webscout/Provider/hermes.py +0 -219
  243. webscout/Provider/julius.py +0 -223
  244. webscout/Provider/koala.py +0 -268
  245. webscout/Provider/labyrinth.py +0 -340
  246. webscout/Provider/learnfastai.py +0 -266
  247. webscout/Provider/lepton.py +0 -194
  248. webscout/Provider/llama3mitril.py +0 -180
  249. webscout/Provider/llamatutor.py +0 -192
  250. webscout/Provider/llmchat.py +0 -213
  251. webscout/Provider/llmchatco.py +0 -311
  252. webscout/Provider/meta.py +0 -794
  253. webscout/Provider/multichat.py +0 -325
  254. webscout/Provider/promptrefine.py +0 -193
  255. webscout/Provider/scira_chat.py +0 -277
  256. webscout/Provider/scnet.py +0 -187
  257. webscout/Provider/searchchat.py +0 -293
  258. webscout/Provider/sonus.py +0 -208
  259. webscout/Provider/talkai.py +0 -194
  260. webscout/Provider/toolbaz.py +0 -320
  261. webscout/Provider/turboseek.py +0 -219
  262. webscout/Provider/tutorai.py +0 -252
  263. webscout/Provider/typefully.py +0 -280
  264. webscout/Provider/typegpt.py +0 -232
  265. webscout/Provider/uncovr.py +0 -312
  266. webscout/Provider/x0gpt.py +0 -256
  267. webscout/Provider/yep.py +0 -376
  268. webscout/litagent/__init__.py +0 -29
  269. webscout/litagent/agent.py +0 -455
  270. webscout/litagent/constants.py +0 -60
  271. webscout/litprinter/__init__.py +0 -59
  272. webscout/scout/__init__.py +0 -8
  273. webscout/scout/core/__init__.py +0 -7
  274. webscout/scout/core/crawler.py +0 -140
  275. webscout/scout/core/scout.py +0 -568
  276. webscout/scout/core/search_result.py +0 -96
  277. webscout/scout/core/text_analyzer.py +0 -63
  278. webscout/scout/core/text_utils.py +0 -277
  279. webscout/scout/core/web_analyzer.py +0 -52
  280. webscout/scout/core.py +0 -881
  281. webscout/scout/element.py +0 -460
  282. webscout/scout/parsers/__init__.py +0 -69
  283. webscout/scout/parsers/html5lib_parser.py +0 -172
  284. webscout/scout/parsers/html_parser.py +0 -236
  285. webscout/scout/parsers/lxml_parser.py +0 -178
  286. webscout/scout/utils.py +0 -37
  287. webscout/swiftcli/__init__.py +0 -809
  288. webscout/zeroart/__init__.py +0 -55
  289. webscout/zeroart/base.py +0 -60
  290. webscout/zeroart/effects.py +0 -99
  291. webscout/zeroart/fonts.py +0 -816
  292. webscout-8.2.2.dist-info/RECORD +0 -309
  293. webscout-8.2.2.dist-info/entry_points.txt +0 -5
  294. webscout-8.2.2.dist-info/top_level.txt +0 -3
  295. webstoken/__init__.py +0 -30
  296. webstoken/classifier.py +0 -189
  297. webstoken/keywords.py +0 -216
  298. webstoken/language.py +0 -128
  299. webstoken/ner.py +0 -164
  300. webstoken/normalizer.py +0 -35
  301. webstoken/processor.py +0 -77
  302. webstoken/sentiment.py +0 -206
  303. webstoken/stemmer.py +0 -73
  304. webstoken/tagger.py +0 -60
  305. webstoken/tokenizer.py +0 -158
  306. {webscout-8.2.2.dist-info → webscout-8.2.7.dist-info/licenses}/LICENSE.md +0 -0
webscout/AIutel.py CHANGED
@@ -1,132 +1,250 @@
1
- import json
2
- import platform
3
- import subprocess
4
- from typing import Union
5
-
6
-
7
-
8
- def sanitize_stream(
9
- chunk: str, intro_value: str = "data:", to_json: bool = True
10
- ) -> Union[str, dict]:
11
- """Remove streaming flags
12
-
13
- Args:
14
- chunk (str): Streamig chunk.
15
- intro_value (str, optional): streaming flag. Defaults to "data:".
16
- to_json (bool, optional). Return chunk as dictionary. Defaults to True.
17
-
18
- Returns:
19
- str: Sanitized streaming value.
20
- """
21
-
22
- if chunk.startswith(intro_value):
23
- chunk = chunk[len(intro_value) :]
24
-
25
- return json.loads(chunk) if to_json else chunk
26
-
27
- def run_system_command(
28
- command: str,
29
- exit_on_error: bool = True,
30
- stdout_error: bool = True,
31
- help: str = None,
32
- ):
33
- """Run commands against system
34
- Args:
35
- command (str): shell command
36
- exit_on_error (bool, optional): Exit on error. Defaults to True.
37
- stdout_error (bool, optional): Print out the error. Defaults to True
38
- help (str, optional): Help info in case of exception. Defaults to None.
39
- Returns:
40
- tuple : (is_successful, object[Exception|Subprocess.run])
41
- """
42
- try:
43
- # Run the command and capture the output
44
- result = subprocess.run(
45
- command,
46
- shell=True,
47
- check=True,
48
- text=True,
49
- stdout=subprocess.PIPE,
50
- stderr=subprocess.PIPE,
51
- )
52
- return (True, result)
53
- except subprocess.CalledProcessError as e:
54
- if exit_on_error:
55
- raise Exception(f"Command failed with exit code {e.returncode}") from e
56
- else:
57
- return (False, e)
58
-
59
- class Updates:
60
- """Webscout latest release info"""
61
-
62
- url = "https://api.github.com/repos/OE-LUCIFER/Webscout/releases/latest"
63
-
64
- @property
65
- def latest_version(self):
66
- return self.latest(version=True)
67
-
68
- def executable(self, system: str = platform.system()) -> str:
69
- """Url pointing to executable for particular system
70
-
71
- Args:
72
- system (str, optional): system name. Defaults to platform.system().
73
-
74
- Returns:
75
- str: url
76
- """
77
- for entry in self.latest()["assets"]:
78
- if entry.get("target") == system:
79
- return entry.get("url")
80
-
81
- def latest(self, whole: bool = False, version: bool = False) -> dict:
82
- """Check Webscout latest version info
83
-
84
- Args:
85
- whole (bool, optional): Return whole json response. Defaults to False.
86
- version (bool, optional): return version only. Defaults to False.
87
-
88
- Returns:
89
- bool|dict: version str or whole dict info
90
- """
91
- import requests
92
-
93
- data = requests.get(self.url).json()
94
- if whole:
95
- return data
96
-
97
- elif version:
98
- return data.get("tag_name")
99
-
100
- else:
101
- sorted = dict(
102
- tag_name=data.get("tag_name"),
103
- tarball_url=data.get("tarball_url"),
104
- zipball_url=data.get("zipball_url"),
105
- html_url=data.get("html_url"),
106
- body=data.get("body"),
107
- )
108
- whole_assets = []
109
- for entry in data.get("assets"):
110
- url = entry.get("browser_download_url")
111
- assets = dict(url=url, size=entry.get("size"))
112
- if ".deb" in url:
113
- assets["target"] = "Debian"
114
- elif ".exe" in url:
115
- assets["target"] = "Windows"
116
- elif "macos" in url:
117
- assets["target"] = "Mac"
118
- elif "linux" in url:
119
- assets["target"] = "Linux"
120
-
121
- whole_assets.append(assets)
122
- sorted["assets"] = whole_assets
123
-
124
- return sorted
125
-
126
- from .conversation import Conversation
127
-
128
- from .optimizers import Optimizers
129
-
130
- from .Extra.autocoder import AutoCoder
131
-
1
+ import json
2
+ from typing import Union, Optional, Dict, Any, Iterable, Generator, List, Callable, Literal
3
+ import codecs
4
+
5
+ # Expanded encoding types
6
+ EncodingType = Literal['utf-8', 'utf-16', 'utf-32', 'ascii', 'latin1', 'cp1252', 'iso-8859-1',
7
+ 'iso-8859-2', 'windows-1250', 'windows-1251', 'windows-1252', 'gbk', 'big5',
8
+ 'shift_jis', 'euc-jp', 'euc-kr']
9
+
10
+ def _process_chunk(
11
+ chunk: str,
12
+ intro_value: str,
13
+ to_json: bool,
14
+ skip_markers: List[str],
15
+ strip_chars: Optional[str],
16
+ yield_raw_on_error: bool,
17
+ ) -> Union[str, Dict[str, Any], None]:
18
+ """Internal helper to sanitize and potentially parse a single chunk."""
19
+ if not isinstance(chunk, str):
20
+ return None
21
+
22
+ # Fast path for empty chunks
23
+ if not chunk:
24
+ return None
25
+
26
+ # Use slicing for prefix removal (faster than startswith+slicing)
27
+ sanitized_chunk = chunk
28
+ if intro_value and len(chunk) >= len(intro_value) and chunk[:len(intro_value)] == intro_value:
29
+ sanitized_chunk = chunk[len(intro_value):]
30
+
31
+ # Optimize string stripping operations
32
+ if strip_chars is not None:
33
+ sanitized_chunk = sanitized_chunk.strip(strip_chars)
34
+ else:
35
+ # lstrip() is faster than strip() when we only need leading whitespace removed
36
+ sanitized_chunk = sanitized_chunk.lstrip()
37
+
38
+ # Skip empty chunks and markers
39
+ if not sanitized_chunk or any(marker == sanitized_chunk for marker in skip_markers):
40
+ return None
41
+
42
+ # JSON parsing with optimized error handling
43
+ if to_json:
44
+ try:
45
+ # Only strip before JSON parsing if needed
46
+ if sanitized_chunk[0] not in '{[' or sanitized_chunk[-1] not in '}]':
47
+ sanitized_chunk = sanitized_chunk.strip()
48
+ return json.loads(sanitized_chunk)
49
+ except (json.JSONDecodeError, Exception):
50
+ return sanitized_chunk if yield_raw_on_error else None
51
+
52
+ return sanitized_chunk
53
+
54
+ def _decode_byte_stream(
55
+ byte_iterator: Iterable[bytes],
56
+ encoding: EncodingType = 'utf-8',
57
+ errors: str = 'replace',
58
+ buffer_size: int = 8192
59
+ ) -> Generator[str, None, None]:
60
+ """
61
+ Realtime byte stream decoder with flexible encoding support.
62
+
63
+ Args:
64
+ byte_iterator: Iterator yielding bytes
65
+ encoding: Character encoding to use
66
+ errors: How to handle encoding errors ('strict', 'ignore', 'replace')
67
+ buffer_size: Size of internal buffer for performance tuning
68
+ """
69
+ # Initialize decoder with the specified encoding
70
+ try:
71
+ decoder = codecs.getincrementaldecoder(encoding)(errors=errors)
72
+ except LookupError:
73
+ # Fallback to utf-8 if the encoding is not supported
74
+ decoder = codecs.getincrementaldecoder('utf-8')(errors=errors)
75
+
76
+ # Process byte stream in realtime
77
+ buffer = bytearray(buffer_size)
78
+ buffer_view = memoryview(buffer)
79
+
80
+ for chunk_bytes in byte_iterator:
81
+ if not chunk_bytes:
82
+ continue
83
+
84
+ try:
85
+ # Use buffer for processing if chunk size is appropriate
86
+ if len(chunk_bytes) <= buffer_size:
87
+ buffer[:len(chunk_bytes)] = chunk_bytes
88
+ text = decoder.decode(buffer_view[:len(chunk_bytes)], final=False)
89
+ else:
90
+ text = decoder.decode(chunk_bytes, final=False)
91
+
92
+ if text:
93
+ yield text
94
+ except UnicodeDecodeError:
95
+ yield f"[Encoding Error: Could not decode bytes with {encoding}]\n"
96
+
97
+ # Final flush
98
+ try:
99
+ final_text = decoder.decode(b'', final=True)
100
+ if final_text:
101
+ yield final_text
102
+ except UnicodeDecodeError:
103
+ yield f"[Encoding Error: Could not decode final bytes with {encoding}]\n"
104
+
105
+ def sanitize_stream(
106
+ data: Union[str, Iterable[str], Iterable[bytes]],
107
+ intro_value: str = "data:",
108
+ to_json: bool = True,
109
+ skip_markers: Optional[List[str]] = None,
110
+ strip_chars: Optional[str] = None,
111
+ start_marker: Optional[str] = None,
112
+ end_marker: Optional[str] = None,
113
+ content_extractor: Optional[Callable[[Union[str, Dict[str, Any]]], Optional[Any]]] = None,
114
+ yield_raw_on_error: bool = True,
115
+ encoding: EncodingType = 'utf-8',
116
+ encoding_errors: str = 'replace',
117
+ buffer_size: int = 8192,
118
+ ) -> Generator[Any, None, None]:
119
+ """
120
+ Optimized realtime stream processor that handles string/byte streams with minimal latency.
121
+
122
+ Features:
123
+ - Direct realtime processing of byte streams
124
+ - Optimized string handling and JSON parsing
125
+ - Robust error handling and validation
126
+ - Flexible encoding support with memory-efficient buffering
127
+ - High performance for large streams
128
+
129
+ Args:
130
+ data: Input data (string, string iterator, or bytes iterator)
131
+ intro_value: Prefix to remove from each chunk
132
+ to_json: Whether to parse chunks as JSON
133
+ skip_markers: Markers to skip
134
+ strip_chars: Characters to strip
135
+ start_marker: Processing start marker
136
+ end_marker: Processing end marker
137
+ content_extractor: Function to extract content
138
+ yield_raw_on_error: Yield raw content on JSON errors
139
+ encoding: Character encoding for byte streams
140
+ encoding_errors: How to handle encoding errors
141
+ buffer_size: Size of internal processing buffer
142
+
143
+ Yields:
144
+ Processed chunks (string or dictionary)
145
+ """
146
+ effective_skip_markers = skip_markers or []
147
+ processing_active = start_marker is None
148
+
149
+ # Fast path for single string processing
150
+ if isinstance(data, str):
151
+ processed_item = None
152
+ if processing_active:
153
+ # Optimize JSON parsing for large strings
154
+ if to_json:
155
+ try:
156
+ # Use faster JSON parser for large strings
157
+ data = data.strip()
158
+ if data:
159
+ processed_item = json.loads(data)
160
+ except json.JSONDecodeError:
161
+ processed_item = data if yield_raw_on_error else None
162
+ else:
163
+ processed_item = _process_chunk(
164
+ data, intro_value, False, effective_skip_markers,
165
+ strip_chars, yield_raw_on_error
166
+ )
167
+
168
+ if processed_item is not None:
169
+ if content_extractor:
170
+ try:
171
+ final_content = content_extractor(processed_item)
172
+ if final_content is not None:
173
+ yield final_content
174
+ except Exception:
175
+ pass
176
+ else:
177
+ yield processed_item
178
+ return
179
+
180
+ # Stream processing path
181
+ if not hasattr(data, '__iter__'):
182
+ raise TypeError(f"Input must be a string or an iterable, not {type(data).__name__}")
183
+
184
+ try:
185
+ iterator = iter(data)
186
+ first_item = next(iterator, None)
187
+ if first_item is None:
188
+ return
189
+
190
+ # Efficient streaming with itertools
191
+ from itertools import chain
192
+ stream = chain([first_item], iterator)
193
+
194
+ # Determine if we're dealing with bytes or strings
195
+ if isinstance(first_item, bytes):
196
+ line_iterator = _decode_byte_stream(
197
+ stream,
198
+ encoding=encoding,
199
+ errors=encoding_errors,
200
+ buffer_size=buffer_size
201
+ )
202
+ elif isinstance(first_item, str):
203
+ line_iterator = stream
204
+ else:
205
+ raise TypeError(f"Stream must yield strings or bytes, not {type(first_item).__name__}")
206
+
207
+ # Process stream with minimal allocations
208
+ for line in line_iterator:
209
+ if not line:
210
+ continue
211
+
212
+ # Handle markers efficiently
213
+ if not processing_active and start_marker is not None:
214
+ if line.strip() == start_marker:
215
+ processing_active = True
216
+ continue
217
+
218
+ if processing_active and end_marker is not None and line.strip() == end_marker:
219
+ processing_active = False
220
+ continue
221
+
222
+ if processing_active:
223
+ # Process chunk with optimized function
224
+ processed = _process_chunk(
225
+ line, intro_value, to_json, effective_skip_markers,
226
+ strip_chars, yield_raw_on_error
227
+ )
228
+
229
+ if processed is not None:
230
+ if content_extractor:
231
+ try:
232
+ final_content = content_extractor(processed)
233
+ if final_content is not None:
234
+ yield final_content
235
+ except Exception:
236
+ # Continue on extraction errors
237
+ pass
238
+ else:
239
+ yield processed
240
+
241
+ except Exception as e:
242
+ # Log error but don't crash on stream processing exceptions
243
+ import sys
244
+ print(f"Stream processing error: {str(e)}", file=sys.stderr)
245
+
246
+
247
+ from .conversation import Conversation
248
+ from .optimizers import Optimizers
249
+ from .Extra.autocoder import AutoCoder
132
250
  from .prompt_manager import AwesomePrompts