superqode 0.1.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (288) hide show
  1. superqode/__init__.py +33 -0
  2. superqode/acp/__init__.py +23 -0
  3. superqode/acp/client.py +913 -0
  4. superqode/acp/permission_screen.py +457 -0
  5. superqode/acp/types.py +480 -0
  6. superqode/acp_discovery.py +856 -0
  7. superqode/agent/__init__.py +22 -0
  8. superqode/agent/edit_strategies.py +334 -0
  9. superqode/agent/loop.py +892 -0
  10. superqode/agent/qe_report_templates.py +39 -0
  11. superqode/agent/system_prompts.py +353 -0
  12. superqode/agent_output.py +721 -0
  13. superqode/agent_stream.py +953 -0
  14. superqode/agents/__init__.py +59 -0
  15. superqode/agents/acp_registry.py +305 -0
  16. superqode/agents/client.py +249 -0
  17. superqode/agents/data/augmentcode.com.toml +51 -0
  18. superqode/agents/data/cagent.dev.toml +51 -0
  19. superqode/agents/data/claude.com.toml +60 -0
  20. superqode/agents/data/codeassistant.dev.toml +51 -0
  21. superqode/agents/data/codex.openai.com.toml +57 -0
  22. superqode/agents/data/fastagent.ai.toml +66 -0
  23. superqode/agents/data/geminicli.com.toml +77 -0
  24. superqode/agents/data/goose.block.xyz.toml +54 -0
  25. superqode/agents/data/junie.jetbrains.com.toml +56 -0
  26. superqode/agents/data/kimi.moonshot.cn.toml +57 -0
  27. superqode/agents/data/llmlingagent.dev.toml +51 -0
  28. superqode/agents/data/molt.bot.toml +49 -0
  29. superqode/agents/data/opencode.ai.toml +60 -0
  30. superqode/agents/data/stakpak.dev.toml +51 -0
  31. superqode/agents/data/vtcode.dev.toml +51 -0
  32. superqode/agents/discovery.py +266 -0
  33. superqode/agents/messaging.py +160 -0
  34. superqode/agents/persona.py +166 -0
  35. superqode/agents/registry.py +421 -0
  36. superqode/agents/schema.py +72 -0
  37. superqode/agents/unified.py +367 -0
  38. superqode/app/__init__.py +111 -0
  39. superqode/app/constants.py +314 -0
  40. superqode/app/css.py +366 -0
  41. superqode/app/models.py +118 -0
  42. superqode/app/suggester.py +125 -0
  43. superqode/app/widgets.py +1591 -0
  44. superqode/app_enhanced.py +399 -0
  45. superqode/app_main.py +17187 -0
  46. superqode/approval.py +312 -0
  47. superqode/atomic.py +296 -0
  48. superqode/commands/__init__.py +1 -0
  49. superqode/commands/acp.py +965 -0
  50. superqode/commands/agents.py +180 -0
  51. superqode/commands/auth.py +278 -0
  52. superqode/commands/config.py +374 -0
  53. superqode/commands/init.py +826 -0
  54. superqode/commands/providers.py +819 -0
  55. superqode/commands/qe.py +1145 -0
  56. superqode/commands/roles.py +380 -0
  57. superqode/commands/serve.py +172 -0
  58. superqode/commands/suggestions.py +127 -0
  59. superqode/commands/superqe.py +460 -0
  60. superqode/config/__init__.py +51 -0
  61. superqode/config/loader.py +812 -0
  62. superqode/config/schema.py +498 -0
  63. superqode/core/__init__.py +111 -0
  64. superqode/core/roles.py +281 -0
  65. superqode/danger.py +386 -0
  66. superqode/data/superqode-template.yaml +1522 -0
  67. superqode/design_system.py +1080 -0
  68. superqode/dialogs/__init__.py +6 -0
  69. superqode/dialogs/base.py +39 -0
  70. superqode/dialogs/model.py +130 -0
  71. superqode/dialogs/provider.py +870 -0
  72. superqode/diff_view.py +919 -0
  73. superqode/enterprise.py +21 -0
  74. superqode/evaluation/__init__.py +25 -0
  75. superqode/evaluation/adapters.py +93 -0
  76. superqode/evaluation/behaviors.py +89 -0
  77. superqode/evaluation/engine.py +209 -0
  78. superqode/evaluation/scenarios.py +96 -0
  79. superqode/execution/__init__.py +36 -0
  80. superqode/execution/linter.py +538 -0
  81. superqode/execution/modes.py +347 -0
  82. superqode/execution/resolver.py +283 -0
  83. superqode/execution/runner.py +642 -0
  84. superqode/file_explorer.py +811 -0
  85. superqode/file_viewer.py +471 -0
  86. superqode/flash.py +183 -0
  87. superqode/guidance/__init__.py +58 -0
  88. superqode/guidance/config.py +203 -0
  89. superqode/guidance/prompts.py +71 -0
  90. superqode/harness/__init__.py +54 -0
  91. superqode/harness/accelerator.py +291 -0
  92. superqode/harness/config.py +319 -0
  93. superqode/harness/validator.py +147 -0
  94. superqode/history.py +279 -0
  95. superqode/integrations/superopt_runner.py +124 -0
  96. superqode/logging/__init__.py +49 -0
  97. superqode/logging/adapters.py +219 -0
  98. superqode/logging/formatter.py +923 -0
  99. superqode/logging/integration.py +341 -0
  100. superqode/logging/sinks.py +170 -0
  101. superqode/logging/unified_log.py +417 -0
  102. superqode/lsp/__init__.py +26 -0
  103. superqode/lsp/client.py +544 -0
  104. superqode/main.py +1069 -0
  105. superqode/mcp/__init__.py +89 -0
  106. superqode/mcp/auth_storage.py +380 -0
  107. superqode/mcp/client.py +1236 -0
  108. superqode/mcp/config.py +319 -0
  109. superqode/mcp/integration.py +337 -0
  110. superqode/mcp/oauth.py +436 -0
  111. superqode/mcp/oauth_callback.py +385 -0
  112. superqode/mcp/types.py +290 -0
  113. superqode/memory/__init__.py +31 -0
  114. superqode/memory/feedback.py +342 -0
  115. superqode/memory/store.py +522 -0
  116. superqode/notifications.py +369 -0
  117. superqode/optimization/__init__.py +5 -0
  118. superqode/optimization/config.py +33 -0
  119. superqode/permissions/__init__.py +25 -0
  120. superqode/permissions/rules.py +488 -0
  121. superqode/plan.py +323 -0
  122. superqode/providers/__init__.py +33 -0
  123. superqode/providers/gateway/__init__.py +165 -0
  124. superqode/providers/gateway/base.py +228 -0
  125. superqode/providers/gateway/litellm_gateway.py +1170 -0
  126. superqode/providers/gateway/openresponses_gateway.py +436 -0
  127. superqode/providers/health.py +297 -0
  128. superqode/providers/huggingface/__init__.py +74 -0
  129. superqode/providers/huggingface/downloader.py +472 -0
  130. superqode/providers/huggingface/endpoints.py +442 -0
  131. superqode/providers/huggingface/hub.py +531 -0
  132. superqode/providers/huggingface/inference.py +394 -0
  133. superqode/providers/huggingface/transformers_runner.py +516 -0
  134. superqode/providers/local/__init__.py +100 -0
  135. superqode/providers/local/base.py +438 -0
  136. superqode/providers/local/discovery.py +418 -0
  137. superqode/providers/local/lmstudio.py +256 -0
  138. superqode/providers/local/mlx.py +457 -0
  139. superqode/providers/local/ollama.py +486 -0
  140. superqode/providers/local/sglang.py +268 -0
  141. superqode/providers/local/tgi.py +260 -0
  142. superqode/providers/local/tool_support.py +477 -0
  143. superqode/providers/local/vllm.py +258 -0
  144. superqode/providers/manager.py +1338 -0
  145. superqode/providers/models.py +1016 -0
  146. superqode/providers/models_dev.py +578 -0
  147. superqode/providers/openresponses/__init__.py +87 -0
  148. superqode/providers/openresponses/converters/__init__.py +17 -0
  149. superqode/providers/openresponses/converters/messages.py +343 -0
  150. superqode/providers/openresponses/converters/tools.py +268 -0
  151. superqode/providers/openresponses/schema/__init__.py +56 -0
  152. superqode/providers/openresponses/schema/models.py +585 -0
  153. superqode/providers/openresponses/streaming/__init__.py +5 -0
  154. superqode/providers/openresponses/streaming/parser.py +338 -0
  155. superqode/providers/openresponses/tools/__init__.py +21 -0
  156. superqode/providers/openresponses/tools/apply_patch.py +352 -0
  157. superqode/providers/openresponses/tools/code_interpreter.py +290 -0
  158. superqode/providers/openresponses/tools/file_search.py +333 -0
  159. superqode/providers/openresponses/tools/mcp_adapter.py +252 -0
  160. superqode/providers/registry.py +716 -0
  161. superqode/providers/usage.py +332 -0
  162. superqode/pure_mode.py +384 -0
  163. superqode/qr/__init__.py +23 -0
  164. superqode/qr/dashboard.py +781 -0
  165. superqode/qr/generator.py +1018 -0
  166. superqode/qr/templates.py +135 -0
  167. superqode/safety/__init__.py +41 -0
  168. superqode/safety/sandbox.py +413 -0
  169. superqode/safety/warnings.py +256 -0
  170. superqode/server/__init__.py +33 -0
  171. superqode/server/lsp_server.py +775 -0
  172. superqode/server/web.py +250 -0
  173. superqode/session/__init__.py +25 -0
  174. superqode/session/persistence.py +580 -0
  175. superqode/session/sharing.py +477 -0
  176. superqode/session.py +475 -0
  177. superqode/sidebar.py +2991 -0
  178. superqode/stream_view.py +648 -0
  179. superqode/styles/__init__.py +3 -0
  180. superqode/superqe/__init__.py +184 -0
  181. superqode/superqe/acp_runner.py +1064 -0
  182. superqode/superqe/constitution/__init__.py +62 -0
  183. superqode/superqe/constitution/evaluator.py +308 -0
  184. superqode/superqe/constitution/loader.py +432 -0
  185. superqode/superqe/constitution/schema.py +250 -0
  186. superqode/superqe/events.py +591 -0
  187. superqode/superqe/frameworks/__init__.py +65 -0
  188. superqode/superqe/frameworks/base.py +234 -0
  189. superqode/superqe/frameworks/e2e.py +263 -0
  190. superqode/superqe/frameworks/executor.py +237 -0
  191. superqode/superqe/frameworks/javascript.py +409 -0
  192. superqode/superqe/frameworks/python.py +373 -0
  193. superqode/superqe/frameworks/registry.py +92 -0
  194. superqode/superqe/mcp_tools/__init__.py +47 -0
  195. superqode/superqe/mcp_tools/core_tools.py +418 -0
  196. superqode/superqe/mcp_tools/registry.py +230 -0
  197. superqode/superqe/mcp_tools/testing_tools.py +167 -0
  198. superqode/superqe/noise.py +89 -0
  199. superqode/superqe/orchestrator.py +778 -0
  200. superqode/superqe/roles.py +609 -0
  201. superqode/superqe/session.py +713 -0
  202. superqode/superqe/skills/__init__.py +57 -0
  203. superqode/superqe/skills/base.py +106 -0
  204. superqode/superqe/skills/core_skills.py +899 -0
  205. superqode/superqe/skills/registry.py +90 -0
  206. superqode/superqe/verifier.py +101 -0
  207. superqode/superqe_cli.py +76 -0
  208. superqode/tool_call.py +358 -0
  209. superqode/tools/__init__.py +93 -0
  210. superqode/tools/agent_tools.py +496 -0
  211. superqode/tools/base.py +324 -0
  212. superqode/tools/batch_tool.py +133 -0
  213. superqode/tools/diagnostics.py +311 -0
  214. superqode/tools/edit_tools.py +653 -0
  215. superqode/tools/enhanced_base.py +515 -0
  216. superqode/tools/file_tools.py +269 -0
  217. superqode/tools/file_tracking.py +45 -0
  218. superqode/tools/lsp_tools.py +610 -0
  219. superqode/tools/network_tools.py +350 -0
  220. superqode/tools/permissions.py +400 -0
  221. superqode/tools/question_tool.py +324 -0
  222. superqode/tools/search_tools.py +598 -0
  223. superqode/tools/shell_tools.py +259 -0
  224. superqode/tools/todo_tools.py +121 -0
  225. superqode/tools/validation.py +80 -0
  226. superqode/tools/web_tools.py +639 -0
  227. superqode/tui.py +1152 -0
  228. superqode/tui_integration.py +875 -0
  229. superqode/tui_widgets/__init__.py +27 -0
  230. superqode/tui_widgets/widgets/__init__.py +18 -0
  231. superqode/tui_widgets/widgets/progress.py +185 -0
  232. superqode/tui_widgets/widgets/tool_display.py +188 -0
  233. superqode/undo_manager.py +574 -0
  234. superqode/utils/__init__.py +5 -0
  235. superqode/utils/error_handling.py +323 -0
  236. superqode/utils/fuzzy.py +257 -0
  237. superqode/widgets/__init__.py +477 -0
  238. superqode/widgets/agent_collab.py +390 -0
  239. superqode/widgets/agent_store.py +936 -0
  240. superqode/widgets/agent_switcher.py +395 -0
  241. superqode/widgets/animation_manager.py +284 -0
  242. superqode/widgets/code_context.py +356 -0
  243. superqode/widgets/command_palette.py +412 -0
  244. superqode/widgets/connection_status.py +537 -0
  245. superqode/widgets/conversation_history.py +470 -0
  246. superqode/widgets/diff_indicator.py +155 -0
  247. superqode/widgets/enhanced_status_bar.py +385 -0
  248. superqode/widgets/enhanced_toast.py +476 -0
  249. superqode/widgets/file_browser.py +809 -0
  250. superqode/widgets/file_reference.py +585 -0
  251. superqode/widgets/issue_timeline.py +340 -0
  252. superqode/widgets/leader_key.py +264 -0
  253. superqode/widgets/mode_switcher.py +445 -0
  254. superqode/widgets/model_picker.py +234 -0
  255. superqode/widgets/permission_preview.py +1205 -0
  256. superqode/widgets/prompt.py +358 -0
  257. superqode/widgets/provider_connect.py +725 -0
  258. superqode/widgets/pty_shell.py +587 -0
  259. superqode/widgets/qe_dashboard.py +321 -0
  260. superqode/widgets/resizable_sidebar.py +377 -0
  261. superqode/widgets/response_changes.py +218 -0
  262. superqode/widgets/response_display.py +528 -0
  263. superqode/widgets/rich_tool_display.py +613 -0
  264. superqode/widgets/sidebar_panels.py +1180 -0
  265. superqode/widgets/slash_complete.py +356 -0
  266. superqode/widgets/split_view.py +612 -0
  267. superqode/widgets/status_bar.py +273 -0
  268. superqode/widgets/superqode_display.py +786 -0
  269. superqode/widgets/thinking_display.py +815 -0
  270. superqode/widgets/throbber.py +87 -0
  271. superqode/widgets/toast.py +206 -0
  272. superqode/widgets/unified_output.py +1073 -0
  273. superqode/workspace/__init__.py +75 -0
  274. superqode/workspace/artifacts.py +472 -0
  275. superqode/workspace/coordinator.py +353 -0
  276. superqode/workspace/diff_tracker.py +429 -0
  277. superqode/workspace/git_guard.py +373 -0
  278. superqode/workspace/git_snapshot.py +526 -0
  279. superqode/workspace/manager.py +750 -0
  280. superqode/workspace/snapshot.py +357 -0
  281. superqode/workspace/watcher.py +535 -0
  282. superqode/workspace/worktree.py +440 -0
  283. superqode-0.1.5.dist-info/METADATA +204 -0
  284. superqode-0.1.5.dist-info/RECORD +288 -0
  285. superqode-0.1.5.dist-info/WHEEL +5 -0
  286. superqode-0.1.5.dist-info/entry_points.txt +3 -0
  287. superqode-0.1.5.dist-info/licenses/LICENSE +648 -0
  288. superqode-0.1.5.dist-info/top_level.txt +1 -0
@@ -0,0 +1,516 @@
1
+ """Local transformers runner for pure Python inference.
2
+
3
+ This module provides the ability to run HuggingFace models locally
4
+ using the transformers library without requiring Ollama or other
5
+ external servers.
6
+
7
+ Requires optional dependencies:
8
+ pip install superqode[transformers]
9
+
10
+ Or manually:
11
+ pip install transformers accelerate torch
12
+ """
13
+
14
+ import asyncio
15
+ import gc
16
+ import os
17
+ from dataclasses import dataclass, field
18
+ from typing import Any, Dict, List, Optional, Union
19
+
20
+
21
+ @dataclass
22
+ class TransformersConfig:
23
+ """Configuration for transformers model loading.
24
+
25
+ Attributes:
26
+ quantization: Quantization mode ("4bit", "8bit", None)
27
+ device_map: Device mapping strategy ("auto", "cpu", "cuda", etc.)
28
+ torch_dtype: Data type ("float16", "bfloat16", "float32", "auto")
29
+ max_memory: Max memory per device (e.g., {"cuda:0": "10GB"})
30
+ trust_remote_code: Allow executing model's custom code
31
+ use_flash_attention: Enable Flash Attention 2 if available
32
+ low_cpu_mem_usage: Reduce CPU memory during loading
33
+ """
34
+
35
+ quantization: Optional[str] = None
36
+ device_map: str = "auto"
37
+ torch_dtype: str = "auto"
38
+ max_memory: Optional[Dict[str, str]] = None
39
+ trust_remote_code: bool = False
40
+ use_flash_attention: bool = True
41
+ low_cpu_mem_usage: bool = True
42
+
43
+
44
+ @dataclass
45
+ class GenerationResult:
46
+ """Result from text generation.
47
+
48
+ Attributes:
49
+ content: Generated text
50
+ model_id: Model used
51
+ input_tokens: Number of input tokens
52
+ output_tokens: Number of generated tokens
53
+ time_seconds: Generation time
54
+ tokens_per_second: Generation speed
55
+ error: Error message if failed
56
+ """
57
+
58
+ content: str = ""
59
+ model_id: str = ""
60
+ input_tokens: int = 0
61
+ output_tokens: int = 0
62
+ time_seconds: float = 0.0
63
+ tokens_per_second: float = 0.0
64
+ error: str = ""
65
+
66
+
67
+ @dataclass
68
+ class LoadedModel:
69
+ """Information about a loaded model.
70
+
71
+ Attributes:
72
+ model_id: HuggingFace model ID
73
+ model: The loaded model object
74
+ tokenizer: The loaded tokenizer
75
+ config: Loading configuration used
76
+ memory_usage_gb: Estimated GPU memory usage
77
+ """
78
+
79
+ model_id: str
80
+ model: Any = None
81
+ tokenizer: Any = None
82
+ config: TransformersConfig = field(default_factory=TransformersConfig)
83
+ memory_usage_gb: float = 0.0
84
+
85
+
86
+ class TransformersRunner:
87
+ """Run HuggingFace models locally using transformers.
88
+
89
+ This class provides a pure Python way to run models without
90
+ external servers like Ollama. It handles model loading, caching,
91
+ and generation with support for:
92
+
93
+ - 4-bit and 8-bit quantization via bitsandbytes
94
+ - Automatic device mapping (CPU/GPU)
95
+ - Flash Attention 2 when available
96
+ - Memory-efficient loading
97
+
98
+ Example:
99
+ runner = TransformersRunner()
100
+ await runner.load_model("microsoft/Phi-3.5-mini-instruct")
101
+
102
+ response = await runner.generate(
103
+ messages=[{"role": "user", "content": "Hello!"}]
104
+ )
105
+ print(response.content)
106
+
107
+ await runner.unload()
108
+ """
109
+
110
+ def __init__(self):
111
+ """Initialize the transformers runner."""
112
+ self._loaded: Optional[LoadedModel] = None
113
+ self._dependencies_checked = False
114
+ self._available_deps: Dict[str, bool] = {}
115
+
116
+ @property
117
+ def is_loaded(self) -> bool:
118
+ """Check if a model is currently loaded."""
119
+ return self._loaded is not None and self._loaded.model is not None
120
+
121
+ @property
122
+ def loaded_model_id(self) -> Optional[str]:
123
+ """Get the ID of the currently loaded model."""
124
+ return self._loaded.model_id if self._loaded else None
125
+
126
+ def check_dependencies(self) -> Dict[str, bool]:
127
+ """Check which transformers dependencies are available.
128
+
129
+ Returns:
130
+ Dict mapping dependency name to availability.
131
+ """
132
+ if self._dependencies_checked:
133
+ return self._available_deps
134
+
135
+ deps = {
136
+ "transformers": False,
137
+ "torch": False,
138
+ "accelerate": False,
139
+ "bitsandbytes": False,
140
+ "flash_attn": False,
141
+ }
142
+
143
+ try:
144
+ import transformers
145
+
146
+ deps["transformers"] = True
147
+ except ImportError:
148
+ pass
149
+
150
+ try:
151
+ import torch
152
+
153
+ deps["torch"] = True
154
+ except ImportError:
155
+ pass
156
+
157
+ try:
158
+ import accelerate
159
+
160
+ deps["accelerate"] = True
161
+ except ImportError:
162
+ pass
163
+
164
+ try:
165
+ import bitsandbytes
166
+
167
+ deps["bitsandbytes"] = True
168
+ except ImportError:
169
+ pass
170
+
171
+ try:
172
+ import flash_attn
173
+
174
+ deps["flash_attn"] = True
175
+ except ImportError:
176
+ pass
177
+
178
+ self._available_deps = deps
179
+ self._dependencies_checked = True
180
+ return deps
181
+
182
+ def is_available(self) -> bool:
183
+ """Check if transformers runner can be used.
184
+
185
+ Returns:
186
+ True if required dependencies are available.
187
+ """
188
+ deps = self.check_dependencies()
189
+ return deps["transformers"] and deps["torch"]
190
+
191
+ def get_device_info(self) -> Dict[str, Any]:
192
+ """Get information about available compute devices.
193
+
194
+ Returns:
195
+ Dict with device information.
196
+ """
197
+ deps = self.check_dependencies()
198
+
199
+ if not deps["torch"]:
200
+ return {
201
+ "available": False,
202
+ "error": "PyTorch not installed",
203
+ }
204
+
205
+ import torch
206
+
207
+ info = {
208
+ "available": True,
209
+ "cuda_available": torch.cuda.is_available(),
210
+ "mps_available": hasattr(torch.backends, "mps") and torch.backends.mps.is_available(),
211
+ "cpu_threads": torch.get_num_threads(),
212
+ }
213
+
214
+ if info["cuda_available"]:
215
+ info["cuda_device_count"] = torch.cuda.device_count()
216
+ info["cuda_device_name"] = torch.cuda.get_device_name(0)
217
+ info["cuda_memory_gb"] = torch.cuda.get_device_properties(0).total_memory / (1024**3)
218
+
219
+ return info
220
+
221
+ async def load_model(
222
+ self, model_id: str, config: Optional[TransformersConfig] = None, force: bool = False
223
+ ) -> bool:
224
+ """Load a model for inference.
225
+
226
+ Args:
227
+ model_id: HuggingFace model ID.
228
+ config: Loading configuration.
229
+ force: Force reload even if model is already loaded.
230
+
231
+ Returns:
232
+ True if loading succeeded.
233
+ """
234
+ # Check if already loaded
235
+ if self.is_loaded and self._loaded.model_id == model_id and not force:
236
+ return True
237
+
238
+ # Unload existing model
239
+ if self.is_loaded:
240
+ await self.unload()
241
+
242
+ deps = self.check_dependencies()
243
+ if not deps["transformers"] or not deps["torch"]:
244
+ return False
245
+
246
+ config = config or TransformersConfig()
247
+
248
+ # Run loading in executor to not block
249
+ loop = asyncio.get_event_loop()
250
+ loaded = await loop.run_in_executor(None, lambda: self._load_model_sync(model_id, config))
251
+
252
+ if loaded:
253
+ self._loaded = loaded
254
+ return True
255
+
256
+ return False
257
+
258
+ def _load_model_sync(self, model_id: str, config: TransformersConfig) -> Optional[LoadedModel]:
259
+ """Synchronous model loading."""
260
+ try:
261
+ import torch
262
+ from transformers import AutoModelForCausalLM, AutoTokenizer
263
+
264
+ # Get HF token
265
+ token = os.environ.get("HF_TOKEN") or os.environ.get("HUGGING_FACE_HUB_TOKEN")
266
+
267
+ # Build loading kwargs
268
+ model_kwargs: Dict[str, Any] = {
269
+ "device_map": config.device_map,
270
+ "low_cpu_mem_usage": config.low_cpu_mem_usage,
271
+ "trust_remote_code": config.trust_remote_code,
272
+ }
273
+
274
+ if token:
275
+ model_kwargs["token"] = token
276
+
277
+ # Handle torch dtype
278
+ if config.torch_dtype == "auto":
279
+ model_kwargs["torch_dtype"] = "auto"
280
+ elif config.torch_dtype == "float16":
281
+ model_kwargs["torch_dtype"] = torch.float16
282
+ elif config.torch_dtype == "bfloat16":
283
+ model_kwargs["torch_dtype"] = torch.bfloat16
284
+ elif config.torch_dtype == "float32":
285
+ model_kwargs["torch_dtype"] = torch.float32
286
+
287
+ # Handle quantization
288
+ if config.quantization and self._available_deps.get("bitsandbytes"):
289
+ from transformers import BitsAndBytesConfig
290
+
291
+ if config.quantization == "4bit":
292
+ model_kwargs["quantization_config"] = BitsAndBytesConfig(
293
+ load_in_4bit=True,
294
+ bnb_4bit_compute_dtype=torch.float16,
295
+ bnb_4bit_quant_type="nf4",
296
+ bnb_4bit_use_double_quant=True,
297
+ )
298
+ elif config.quantization == "8bit":
299
+ model_kwargs["quantization_config"] = BitsAndBytesConfig(
300
+ load_in_8bit=True,
301
+ )
302
+
303
+ # Handle max memory
304
+ if config.max_memory:
305
+ model_kwargs["max_memory"] = config.max_memory
306
+
307
+ # Handle flash attention
308
+ if config.use_flash_attention and self._available_deps.get("flash_attn"):
309
+ model_kwargs["attn_implementation"] = "flash_attention_2"
310
+
311
+ # Load tokenizer
312
+ tokenizer = AutoTokenizer.from_pretrained(
313
+ model_id,
314
+ token=token,
315
+ trust_remote_code=config.trust_remote_code,
316
+ )
317
+
318
+ # Load model
319
+ model = AutoModelForCausalLM.from_pretrained(model_id, **model_kwargs)
320
+
321
+ # Estimate memory usage
322
+ memory_gb = 0.0
323
+ if torch.cuda.is_available():
324
+ memory_gb = torch.cuda.memory_allocated() / (1024**3)
325
+
326
+ return LoadedModel(
327
+ model_id=model_id,
328
+ model=model,
329
+ tokenizer=tokenizer,
330
+ config=config,
331
+ memory_usage_gb=memory_gb,
332
+ )
333
+
334
+ except Exception as e:
335
+ print(f"Error loading model: {e}")
336
+ return None
337
+
338
+ async def generate(
339
+ self,
340
+ messages: List[Dict[str, str]],
341
+ max_tokens: int = 2048,
342
+ temperature: float = 0.7,
343
+ top_p: float = 0.9,
344
+ top_k: int = 50,
345
+ stop: Optional[List[str]] = None,
346
+ ) -> GenerationResult:
347
+ """Generate text from messages.
348
+
349
+ Args:
350
+ messages: Chat messages in OpenAI format.
351
+ max_tokens: Maximum tokens to generate.
352
+ temperature: Sampling temperature.
353
+ top_p: Nucleus sampling threshold.
354
+ top_k: Top-k sampling.
355
+ stop: Stop sequences.
356
+
357
+ Returns:
358
+ GenerationResult with generated text.
359
+ """
360
+ if not self.is_loaded:
361
+ return GenerationResult(error="No model loaded")
362
+
363
+ loop = asyncio.get_event_loop()
364
+ return await loop.run_in_executor(
365
+ None, lambda: self._generate_sync(messages, max_tokens, temperature, top_p, top_k, stop)
366
+ )
367
+
368
+ def _generate_sync(
369
+ self,
370
+ messages: List[Dict[str, str]],
371
+ max_tokens: int,
372
+ temperature: float,
373
+ top_p: float,
374
+ top_k: int,
375
+ stop: Optional[List[str]],
376
+ ) -> GenerationResult:
377
+ """Synchronous generation."""
378
+ import time
379
+ import torch
380
+
381
+ try:
382
+ model = self._loaded.model
383
+ tokenizer = self._loaded.tokenizer
384
+ model_id = self._loaded.model_id
385
+
386
+ # Apply chat template
387
+ if hasattr(tokenizer, "apply_chat_template"):
388
+ prompt = tokenizer.apply_chat_template(
389
+ messages,
390
+ tokenize=False,
391
+ add_generation_prompt=True,
392
+ )
393
+ else:
394
+ # Fallback for models without chat template
395
+ prompt = (
396
+ "\n".join(f"{m['role']}: {m['content']}" for m in messages) + "\nassistant:"
397
+ )
398
+
399
+ # Tokenize
400
+ inputs = tokenizer(prompt, return_tensors="pt")
401
+ input_length = inputs["input_ids"].shape[1]
402
+
403
+ # Move to model device
404
+ device = next(model.parameters()).device
405
+ inputs = {k: v.to(device) for k, v in inputs.items()}
406
+
407
+ # Build generation kwargs
408
+ gen_kwargs = {
409
+ "max_new_tokens": max_tokens,
410
+ "do_sample": temperature > 0,
411
+ "pad_token_id": tokenizer.pad_token_id or tokenizer.eos_token_id,
412
+ }
413
+
414
+ if temperature > 0:
415
+ gen_kwargs["temperature"] = temperature
416
+ gen_kwargs["top_p"] = top_p
417
+ gen_kwargs["top_k"] = top_k
418
+
419
+ # Handle stop sequences
420
+ if stop:
421
+ stop_ids = [tokenizer.encode(s, add_special_tokens=False) for s in stop]
422
+ # Flatten for stopping_criteria would be complex, skip for now
423
+
424
+ # Generate
425
+ start_time = time.time()
426
+
427
+ with torch.no_grad():
428
+ outputs = model.generate(**inputs, **gen_kwargs)
429
+
430
+ gen_time = time.time() - start_time
431
+
432
+ # Decode output
433
+ output_tokens = outputs[0][input_length:]
434
+ output_length = len(output_tokens)
435
+
436
+ generated_text = tokenizer.decode(output_tokens, skip_special_tokens=True)
437
+
438
+ # Calculate speed
439
+ tokens_per_sec = output_length / gen_time if gen_time > 0 else 0
440
+
441
+ return GenerationResult(
442
+ content=generated_text.strip(),
443
+ model_id=model_id,
444
+ input_tokens=input_length,
445
+ output_tokens=output_length,
446
+ time_seconds=gen_time,
447
+ tokens_per_second=tokens_per_sec,
448
+ )
449
+
450
+ except Exception as e:
451
+ return GenerationResult(
452
+ model_id=self._loaded.model_id if self._loaded else "", error=str(e)
453
+ )
454
+
455
+ async def unload(self) -> None:
456
+ """Unload the current model and free memory."""
457
+ if not self.is_loaded:
458
+ return
459
+
460
+ loop = asyncio.get_event_loop()
461
+ await loop.run_in_executor(None, self._unload_sync)
462
+
463
+ def _unload_sync(self) -> None:
464
+ """Synchronous unload."""
465
+ if self._loaded:
466
+ # Delete model references
467
+ if self._loaded.model is not None:
468
+ del self._loaded.model
469
+ if self._loaded.tokenizer is not None:
470
+ del self._loaded.tokenizer
471
+
472
+ self._loaded = None
473
+
474
+ # Force garbage collection
475
+ gc.collect()
476
+
477
+ # Clear CUDA cache if available
478
+ try:
479
+ import torch
480
+
481
+ if torch.cuda.is_available():
482
+ torch.cuda.empty_cache()
483
+ except Exception:
484
+ pass
485
+
486
+ def get_loaded_info(self) -> Optional[Dict[str, Any]]:
487
+ """Get information about the currently loaded model.
488
+
489
+ Returns:
490
+ Dict with model info, or None if no model loaded.
491
+ """
492
+ if not self.is_loaded:
493
+ return None
494
+
495
+ return {
496
+ "model_id": self._loaded.model_id,
497
+ "memory_usage_gb": self._loaded.memory_usage_gb,
498
+ "quantization": self._loaded.config.quantization,
499
+ "device_map": self._loaded.config.device_map,
500
+ }
501
+
502
+
503
+ # Singleton instance
504
+ _runner_instance: Optional[TransformersRunner] = None
505
+
506
+
507
+ def get_transformers_runner() -> TransformersRunner:
508
+ """Get the global TransformersRunner instance.
509
+
510
+ Returns:
511
+ TransformersRunner instance.
512
+ """
513
+ global _runner_instance
514
+ if _runner_instance is None:
515
+ _runner_instance = TransformersRunner()
516
+ return _runner_instance
@@ -0,0 +1,100 @@
1
+ """Local LLM provider clients and utilities.
2
+
3
+ This module provides clients for self-hosted LLM servers including:
4
+ - Ollama
5
+ - LM Studio
6
+ - vLLM
7
+ - SGLang
8
+ - MLX-LM
9
+ - TGI (Text Generation Inference)
10
+ - llama.cpp server
11
+ - Generic OpenAI-compatible servers
12
+ """
13
+
14
+ from superqode.providers.local.base import (
15
+ LocalProviderType,
16
+ Quantization,
17
+ LocalModel,
18
+ ProviderStatus,
19
+ ToolTestResult,
20
+ GenerationConfig,
21
+ LocalProviderClient,
22
+ MODEL_FAMILIES,
23
+ TOOL_CAPABLE_FAMILIES,
24
+ detect_model_family,
25
+ detect_quantization,
26
+ likely_supports_tools,
27
+ )
28
+ from superqode.providers.local.ollama import OllamaClient, get_ollama_client
29
+ from superqode.providers.local.vllm import VLLMClient, get_vllm_client
30
+ from superqode.providers.local.sglang import SGLangClient, get_sglang_client
31
+ from superqode.providers.local.mlx import MLXClient, get_mlx_client
32
+ from superqode.providers.local.tgi import TGIClient, get_tgi_client
33
+ from superqode.providers.local.lmstudio import LMStudioClient, get_lmstudio_client
34
+ from superqode.providers.local.discovery import (
35
+ DiscoveredProvider,
36
+ LocalProviderDiscovery,
37
+ get_discovery_service,
38
+ quick_scan,
39
+ DEFAULT_PORTS,
40
+ ALL_PORTS,
41
+ )
42
+ from superqode.providers.local.tool_support import (
43
+ ToolCapabilityInfo,
44
+ TOOL_CAPABLE_MODELS,
45
+ TOOL_QUIRKS,
46
+ NO_TOOL_SUPPORT,
47
+ get_tool_capability_info,
48
+ test_tool_calling,
49
+ get_recommended_coding_models,
50
+ estimate_tool_support,
51
+ )
52
+
53
+ __all__ = [
54
+ # Enums
55
+ "LocalProviderType",
56
+ "Quantization",
57
+ # Data classes
58
+ "LocalModel",
59
+ "ProviderStatus",
60
+ "ToolTestResult",
61
+ "GenerationConfig",
62
+ "DiscoveredProvider",
63
+ # Base class
64
+ "LocalProviderClient",
65
+ # Clients
66
+ "OllamaClient",
67
+ "get_ollama_client",
68
+ "VLLMClient",
69
+ "get_vllm_client",
70
+ "SGLangClient",
71
+ "get_sglang_client",
72
+ "MLXClient",
73
+ "get_mlx_client",
74
+ "TGIClient",
75
+ "get_tgi_client",
76
+ "LMStudioClient",
77
+ "get_lmstudio_client",
78
+ # Discovery
79
+ "LocalProviderDiscovery",
80
+ "get_discovery_service",
81
+ "quick_scan",
82
+ "DEFAULT_PORTS",
83
+ "ALL_PORTS",
84
+ # Constants
85
+ "MODEL_FAMILIES",
86
+ "TOOL_CAPABLE_FAMILIES",
87
+ # Utilities
88
+ "detect_model_family",
89
+ "detect_quantization",
90
+ "likely_supports_tools",
91
+ # Tool support
92
+ "ToolCapabilityInfo",
93
+ "TOOL_CAPABLE_MODELS",
94
+ "TOOL_QUIRKS",
95
+ "NO_TOOL_SUPPORT",
96
+ "get_tool_capability_info",
97
+ "test_tool_calling",
98
+ "get_recommended_coding_models",
99
+ "estimate_tool_support",
100
+ ]