superqode 0.1.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (288) hide show
  1. superqode/__init__.py +33 -0
  2. superqode/acp/__init__.py +23 -0
  3. superqode/acp/client.py +913 -0
  4. superqode/acp/permission_screen.py +457 -0
  5. superqode/acp/types.py +480 -0
  6. superqode/acp_discovery.py +856 -0
  7. superqode/agent/__init__.py +22 -0
  8. superqode/agent/edit_strategies.py +334 -0
  9. superqode/agent/loop.py +892 -0
  10. superqode/agent/qe_report_templates.py +39 -0
  11. superqode/agent/system_prompts.py +353 -0
  12. superqode/agent_output.py +721 -0
  13. superqode/agent_stream.py +953 -0
  14. superqode/agents/__init__.py +59 -0
  15. superqode/agents/acp_registry.py +305 -0
  16. superqode/agents/client.py +249 -0
  17. superqode/agents/data/augmentcode.com.toml +51 -0
  18. superqode/agents/data/cagent.dev.toml +51 -0
  19. superqode/agents/data/claude.com.toml +60 -0
  20. superqode/agents/data/codeassistant.dev.toml +51 -0
  21. superqode/agents/data/codex.openai.com.toml +57 -0
  22. superqode/agents/data/fastagent.ai.toml +66 -0
  23. superqode/agents/data/geminicli.com.toml +77 -0
  24. superqode/agents/data/goose.block.xyz.toml +54 -0
  25. superqode/agents/data/junie.jetbrains.com.toml +56 -0
  26. superqode/agents/data/kimi.moonshot.cn.toml +57 -0
  27. superqode/agents/data/llmlingagent.dev.toml +51 -0
  28. superqode/agents/data/molt.bot.toml +49 -0
  29. superqode/agents/data/opencode.ai.toml +60 -0
  30. superqode/agents/data/stakpak.dev.toml +51 -0
  31. superqode/agents/data/vtcode.dev.toml +51 -0
  32. superqode/agents/discovery.py +266 -0
  33. superqode/agents/messaging.py +160 -0
  34. superqode/agents/persona.py +166 -0
  35. superqode/agents/registry.py +421 -0
  36. superqode/agents/schema.py +72 -0
  37. superqode/agents/unified.py +367 -0
  38. superqode/app/__init__.py +111 -0
  39. superqode/app/constants.py +314 -0
  40. superqode/app/css.py +366 -0
  41. superqode/app/models.py +118 -0
  42. superqode/app/suggester.py +125 -0
  43. superqode/app/widgets.py +1591 -0
  44. superqode/app_enhanced.py +399 -0
  45. superqode/app_main.py +17187 -0
  46. superqode/approval.py +312 -0
  47. superqode/atomic.py +296 -0
  48. superqode/commands/__init__.py +1 -0
  49. superqode/commands/acp.py +965 -0
  50. superqode/commands/agents.py +180 -0
  51. superqode/commands/auth.py +278 -0
  52. superqode/commands/config.py +374 -0
  53. superqode/commands/init.py +826 -0
  54. superqode/commands/providers.py +819 -0
  55. superqode/commands/qe.py +1145 -0
  56. superqode/commands/roles.py +380 -0
  57. superqode/commands/serve.py +172 -0
  58. superqode/commands/suggestions.py +127 -0
  59. superqode/commands/superqe.py +460 -0
  60. superqode/config/__init__.py +51 -0
  61. superqode/config/loader.py +812 -0
  62. superqode/config/schema.py +498 -0
  63. superqode/core/__init__.py +111 -0
  64. superqode/core/roles.py +281 -0
  65. superqode/danger.py +386 -0
  66. superqode/data/superqode-template.yaml +1522 -0
  67. superqode/design_system.py +1080 -0
  68. superqode/dialogs/__init__.py +6 -0
  69. superqode/dialogs/base.py +39 -0
  70. superqode/dialogs/model.py +130 -0
  71. superqode/dialogs/provider.py +870 -0
  72. superqode/diff_view.py +919 -0
  73. superqode/enterprise.py +21 -0
  74. superqode/evaluation/__init__.py +25 -0
  75. superqode/evaluation/adapters.py +93 -0
  76. superqode/evaluation/behaviors.py +89 -0
  77. superqode/evaluation/engine.py +209 -0
  78. superqode/evaluation/scenarios.py +96 -0
  79. superqode/execution/__init__.py +36 -0
  80. superqode/execution/linter.py +538 -0
  81. superqode/execution/modes.py +347 -0
  82. superqode/execution/resolver.py +283 -0
  83. superqode/execution/runner.py +642 -0
  84. superqode/file_explorer.py +811 -0
  85. superqode/file_viewer.py +471 -0
  86. superqode/flash.py +183 -0
  87. superqode/guidance/__init__.py +58 -0
  88. superqode/guidance/config.py +203 -0
  89. superqode/guidance/prompts.py +71 -0
  90. superqode/harness/__init__.py +54 -0
  91. superqode/harness/accelerator.py +291 -0
  92. superqode/harness/config.py +319 -0
  93. superqode/harness/validator.py +147 -0
  94. superqode/history.py +279 -0
  95. superqode/integrations/superopt_runner.py +124 -0
  96. superqode/logging/__init__.py +49 -0
  97. superqode/logging/adapters.py +219 -0
  98. superqode/logging/formatter.py +923 -0
  99. superqode/logging/integration.py +341 -0
  100. superqode/logging/sinks.py +170 -0
  101. superqode/logging/unified_log.py +417 -0
  102. superqode/lsp/__init__.py +26 -0
  103. superqode/lsp/client.py +544 -0
  104. superqode/main.py +1069 -0
  105. superqode/mcp/__init__.py +89 -0
  106. superqode/mcp/auth_storage.py +380 -0
  107. superqode/mcp/client.py +1236 -0
  108. superqode/mcp/config.py +319 -0
  109. superqode/mcp/integration.py +337 -0
  110. superqode/mcp/oauth.py +436 -0
  111. superqode/mcp/oauth_callback.py +385 -0
  112. superqode/mcp/types.py +290 -0
  113. superqode/memory/__init__.py +31 -0
  114. superqode/memory/feedback.py +342 -0
  115. superqode/memory/store.py +522 -0
  116. superqode/notifications.py +369 -0
  117. superqode/optimization/__init__.py +5 -0
  118. superqode/optimization/config.py +33 -0
  119. superqode/permissions/__init__.py +25 -0
  120. superqode/permissions/rules.py +488 -0
  121. superqode/plan.py +323 -0
  122. superqode/providers/__init__.py +33 -0
  123. superqode/providers/gateway/__init__.py +165 -0
  124. superqode/providers/gateway/base.py +228 -0
  125. superqode/providers/gateway/litellm_gateway.py +1170 -0
  126. superqode/providers/gateway/openresponses_gateway.py +436 -0
  127. superqode/providers/health.py +297 -0
  128. superqode/providers/huggingface/__init__.py +74 -0
  129. superqode/providers/huggingface/downloader.py +472 -0
  130. superqode/providers/huggingface/endpoints.py +442 -0
  131. superqode/providers/huggingface/hub.py +531 -0
  132. superqode/providers/huggingface/inference.py +394 -0
  133. superqode/providers/huggingface/transformers_runner.py +516 -0
  134. superqode/providers/local/__init__.py +100 -0
  135. superqode/providers/local/base.py +438 -0
  136. superqode/providers/local/discovery.py +418 -0
  137. superqode/providers/local/lmstudio.py +256 -0
  138. superqode/providers/local/mlx.py +457 -0
  139. superqode/providers/local/ollama.py +486 -0
  140. superqode/providers/local/sglang.py +268 -0
  141. superqode/providers/local/tgi.py +260 -0
  142. superqode/providers/local/tool_support.py +477 -0
  143. superqode/providers/local/vllm.py +258 -0
  144. superqode/providers/manager.py +1338 -0
  145. superqode/providers/models.py +1016 -0
  146. superqode/providers/models_dev.py +578 -0
  147. superqode/providers/openresponses/__init__.py +87 -0
  148. superqode/providers/openresponses/converters/__init__.py +17 -0
  149. superqode/providers/openresponses/converters/messages.py +343 -0
  150. superqode/providers/openresponses/converters/tools.py +268 -0
  151. superqode/providers/openresponses/schema/__init__.py +56 -0
  152. superqode/providers/openresponses/schema/models.py +585 -0
  153. superqode/providers/openresponses/streaming/__init__.py +5 -0
  154. superqode/providers/openresponses/streaming/parser.py +338 -0
  155. superqode/providers/openresponses/tools/__init__.py +21 -0
  156. superqode/providers/openresponses/tools/apply_patch.py +352 -0
  157. superqode/providers/openresponses/tools/code_interpreter.py +290 -0
  158. superqode/providers/openresponses/tools/file_search.py +333 -0
  159. superqode/providers/openresponses/tools/mcp_adapter.py +252 -0
  160. superqode/providers/registry.py +716 -0
  161. superqode/providers/usage.py +332 -0
  162. superqode/pure_mode.py +384 -0
  163. superqode/qr/__init__.py +23 -0
  164. superqode/qr/dashboard.py +781 -0
  165. superqode/qr/generator.py +1018 -0
  166. superqode/qr/templates.py +135 -0
  167. superqode/safety/__init__.py +41 -0
  168. superqode/safety/sandbox.py +413 -0
  169. superqode/safety/warnings.py +256 -0
  170. superqode/server/__init__.py +33 -0
  171. superqode/server/lsp_server.py +775 -0
  172. superqode/server/web.py +250 -0
  173. superqode/session/__init__.py +25 -0
  174. superqode/session/persistence.py +580 -0
  175. superqode/session/sharing.py +477 -0
  176. superqode/session.py +475 -0
  177. superqode/sidebar.py +2991 -0
  178. superqode/stream_view.py +648 -0
  179. superqode/styles/__init__.py +3 -0
  180. superqode/superqe/__init__.py +184 -0
  181. superqode/superqe/acp_runner.py +1064 -0
  182. superqode/superqe/constitution/__init__.py +62 -0
  183. superqode/superqe/constitution/evaluator.py +308 -0
  184. superqode/superqe/constitution/loader.py +432 -0
  185. superqode/superqe/constitution/schema.py +250 -0
  186. superqode/superqe/events.py +591 -0
  187. superqode/superqe/frameworks/__init__.py +65 -0
  188. superqode/superqe/frameworks/base.py +234 -0
  189. superqode/superqe/frameworks/e2e.py +263 -0
  190. superqode/superqe/frameworks/executor.py +237 -0
  191. superqode/superqe/frameworks/javascript.py +409 -0
  192. superqode/superqe/frameworks/python.py +373 -0
  193. superqode/superqe/frameworks/registry.py +92 -0
  194. superqode/superqe/mcp_tools/__init__.py +47 -0
  195. superqode/superqe/mcp_tools/core_tools.py +418 -0
  196. superqode/superqe/mcp_tools/registry.py +230 -0
  197. superqode/superqe/mcp_tools/testing_tools.py +167 -0
  198. superqode/superqe/noise.py +89 -0
  199. superqode/superqe/orchestrator.py +778 -0
  200. superqode/superqe/roles.py +609 -0
  201. superqode/superqe/session.py +713 -0
  202. superqode/superqe/skills/__init__.py +57 -0
  203. superqode/superqe/skills/base.py +106 -0
  204. superqode/superqe/skills/core_skills.py +899 -0
  205. superqode/superqe/skills/registry.py +90 -0
  206. superqode/superqe/verifier.py +101 -0
  207. superqode/superqe_cli.py +76 -0
  208. superqode/tool_call.py +358 -0
  209. superqode/tools/__init__.py +93 -0
  210. superqode/tools/agent_tools.py +496 -0
  211. superqode/tools/base.py +324 -0
  212. superqode/tools/batch_tool.py +133 -0
  213. superqode/tools/diagnostics.py +311 -0
  214. superqode/tools/edit_tools.py +653 -0
  215. superqode/tools/enhanced_base.py +515 -0
  216. superqode/tools/file_tools.py +269 -0
  217. superqode/tools/file_tracking.py +45 -0
  218. superqode/tools/lsp_tools.py +610 -0
  219. superqode/tools/network_tools.py +350 -0
  220. superqode/tools/permissions.py +400 -0
  221. superqode/tools/question_tool.py +324 -0
  222. superqode/tools/search_tools.py +598 -0
  223. superqode/tools/shell_tools.py +259 -0
  224. superqode/tools/todo_tools.py +121 -0
  225. superqode/tools/validation.py +80 -0
  226. superqode/tools/web_tools.py +639 -0
  227. superqode/tui.py +1152 -0
  228. superqode/tui_integration.py +875 -0
  229. superqode/tui_widgets/__init__.py +27 -0
  230. superqode/tui_widgets/widgets/__init__.py +18 -0
  231. superqode/tui_widgets/widgets/progress.py +185 -0
  232. superqode/tui_widgets/widgets/tool_display.py +188 -0
  233. superqode/undo_manager.py +574 -0
  234. superqode/utils/__init__.py +5 -0
  235. superqode/utils/error_handling.py +323 -0
  236. superqode/utils/fuzzy.py +257 -0
  237. superqode/widgets/__init__.py +477 -0
  238. superqode/widgets/agent_collab.py +390 -0
  239. superqode/widgets/agent_store.py +936 -0
  240. superqode/widgets/agent_switcher.py +395 -0
  241. superqode/widgets/animation_manager.py +284 -0
  242. superqode/widgets/code_context.py +356 -0
  243. superqode/widgets/command_palette.py +412 -0
  244. superqode/widgets/connection_status.py +537 -0
  245. superqode/widgets/conversation_history.py +470 -0
  246. superqode/widgets/diff_indicator.py +155 -0
  247. superqode/widgets/enhanced_status_bar.py +385 -0
  248. superqode/widgets/enhanced_toast.py +476 -0
  249. superqode/widgets/file_browser.py +809 -0
  250. superqode/widgets/file_reference.py +585 -0
  251. superqode/widgets/issue_timeline.py +340 -0
  252. superqode/widgets/leader_key.py +264 -0
  253. superqode/widgets/mode_switcher.py +445 -0
  254. superqode/widgets/model_picker.py +234 -0
  255. superqode/widgets/permission_preview.py +1205 -0
  256. superqode/widgets/prompt.py +358 -0
  257. superqode/widgets/provider_connect.py +725 -0
  258. superqode/widgets/pty_shell.py +587 -0
  259. superqode/widgets/qe_dashboard.py +321 -0
  260. superqode/widgets/resizable_sidebar.py +377 -0
  261. superqode/widgets/response_changes.py +218 -0
  262. superqode/widgets/response_display.py +528 -0
  263. superqode/widgets/rich_tool_display.py +613 -0
  264. superqode/widgets/sidebar_panels.py +1180 -0
  265. superqode/widgets/slash_complete.py +356 -0
  266. superqode/widgets/split_view.py +612 -0
  267. superqode/widgets/status_bar.py +273 -0
  268. superqode/widgets/superqode_display.py +786 -0
  269. superqode/widgets/thinking_display.py +815 -0
  270. superqode/widgets/throbber.py +87 -0
  271. superqode/widgets/toast.py +206 -0
  272. superqode/widgets/unified_output.py +1073 -0
  273. superqode/workspace/__init__.py +75 -0
  274. superqode/workspace/artifacts.py +472 -0
  275. superqode/workspace/coordinator.py +353 -0
  276. superqode/workspace/diff_tracker.py +429 -0
  277. superqode/workspace/git_guard.py +373 -0
  278. superqode/workspace/git_snapshot.py +526 -0
  279. superqode/workspace/manager.py +750 -0
  280. superqode/workspace/snapshot.py +357 -0
  281. superqode/workspace/watcher.py +535 -0
  282. superqode/workspace/worktree.py +440 -0
  283. superqode-0.1.5.dist-info/METADATA +204 -0
  284. superqode-0.1.5.dist-info/RECORD +288 -0
  285. superqode-0.1.5.dist-info/WHEEL +5 -0
  286. superqode-0.1.5.dist-info/entry_points.txt +3 -0
  287. superqode-0.1.5.dist-info/licenses/LICENSE +648 -0
  288. superqode-0.1.5.dist-info/top_level.txt +1 -0
@@ -0,0 +1,394 @@
1
+ """HuggingFace Inference API client with streaming support.
2
+
3
+ This module provides access to the HuggingFace Inference API (serverless)
4
+ for text generation with any compatible model.
5
+ """
6
+
7
+ import asyncio
8
+ import json
9
+ import os
10
+ from dataclasses import dataclass, field
11
+ from typing import Any, AsyncIterator, Dict, List, Optional, Union
12
+ from urllib.error import HTTPError, URLError
13
+ from urllib.request import Request, urlopen
14
+
15
+
16
+ # HuggingFace Inference API endpoints
17
+ HF_INFERENCE_API = "https://api-inference.huggingface.co/models"
18
+ HF_ROUTER_API = "https://router.huggingface.co/hf" # New router for free inference
19
+
20
+
21
+ @dataclass
22
+ class InferenceResponse:
23
+ """Response from HF Inference API.
24
+
25
+ Attributes:
26
+ content: Generated text content
27
+ model: Model that generated the response
28
+ finish_reason: Why generation stopped
29
+ usage: Token usage information
30
+ tool_calls: Tool calls if any
31
+ error: Error message if failed
32
+ """
33
+
34
+ content: str = ""
35
+ model: str = ""
36
+ finish_reason: str = ""
37
+ usage: Dict[str, int] = field(default_factory=dict)
38
+ tool_calls: List[Dict] = field(default_factory=list)
39
+ error: str = ""
40
+
41
+
42
+ # Recommended models for different use cases
43
+ RECOMMENDED_MODELS = {
44
+ "general": [
45
+ "meta-llama/Llama-3.3-70B-Instruct",
46
+ "Qwen/Qwen2.5-72B-Instruct",
47
+ "mistralai/Mixtral-8x7B-Instruct-v0.1",
48
+ "microsoft/Phi-3.5-mini-instruct",
49
+ ],
50
+ "coding": [
51
+ "Qwen/Qwen2.5-Coder-32B-Instruct",
52
+ "deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct",
53
+ "codellama/CodeLlama-34b-Instruct-hf",
54
+ "bigcode/starcoder2-15b-instruct-v0.1",
55
+ ],
56
+ "small": [
57
+ "microsoft/Phi-3.5-mini-instruct",
58
+ "google/gemma-2-2b-it",
59
+ "Qwen/Qwen2.5-3B-Instruct",
60
+ "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
61
+ ],
62
+ "chat": [
63
+ "meta-llama/Llama-3.2-3B-Instruct",
64
+ "HuggingFaceH4/zephyr-7b-beta",
65
+ "openchat/openchat-3.5-0106",
66
+ "NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO",
67
+ ],
68
+ }
69
+
70
+
71
+ class HFInferenceClient:
72
+ """HuggingFace Inference API client.
73
+
74
+ Provides access to HF's serverless inference API for text generation.
75
+ Supports both the free tier and Pro tier.
76
+
77
+ Environment:
78
+ HF_TOKEN: HuggingFace token for authentication (optional but recommended)
79
+ HF_INFERENCE_ENDPOINT: Custom inference endpoint (optional)
80
+ """
81
+
82
+ def __init__(
83
+ self, token: Optional[str] = None, endpoint: Optional[str] = None, use_router: bool = True
84
+ ):
85
+ """Initialize the Inference API client.
86
+
87
+ Args:
88
+ token: HF token. Falls back to HF_TOKEN env var.
89
+ endpoint: Custom inference endpoint. Falls back to HF_INFERENCE_ENDPOINT.
90
+ use_router: Use the new router API for better availability.
91
+ """
92
+ self._token = (
93
+ token or os.environ.get("HF_TOKEN") or os.environ.get("HUGGING_FACE_HUB_TOKEN")
94
+ )
95
+ self._custom_endpoint = endpoint or os.environ.get("HF_INFERENCE_ENDPOINT")
96
+ self._use_router = use_router
97
+
98
+ @property
99
+ def is_authenticated(self) -> bool:
100
+ """Check if we have authentication."""
101
+ return self._token is not None and len(self._token) > 0
102
+
103
+ def get_endpoint(self, model_id: str) -> str:
104
+ """Get the API endpoint for a model.
105
+
106
+ Args:
107
+ model_id: Model ID.
108
+
109
+ Returns:
110
+ Full API endpoint URL.
111
+ """
112
+ if self._custom_endpoint:
113
+ return f"{self._custom_endpoint}/{model_id}"
114
+
115
+ if self._use_router:
116
+ return f"{HF_ROUTER_API}/{model_id}/v1/chat/completions"
117
+
118
+ return f"{HF_INFERENCE_API}/{model_id}"
119
+
120
+ def _request(
121
+ self, endpoint: str, data: Dict[str, Any], timeout: float = 120.0
122
+ ) -> Dict[str, Any]:
123
+ """Make a request to the Inference API.
124
+
125
+ Args:
126
+ endpoint: Full API endpoint URL.
127
+ data: Request body.
128
+ timeout: Request timeout.
129
+
130
+ Returns:
131
+ JSON response.
132
+ """
133
+ headers = {
134
+ "Content-Type": "application/json",
135
+ "Accept": "application/json",
136
+ }
137
+
138
+ if self._token:
139
+ headers["Authorization"] = f"Bearer {self._token}"
140
+
141
+ body = json.dumps(data).encode("utf-8")
142
+ request = Request(endpoint, data=body, headers=headers, method="POST")
143
+
144
+ with urlopen(request, timeout=timeout) as response:
145
+ return json.loads(response.read().decode("utf-8"))
146
+
147
+ async def _async_request(
148
+ self, endpoint: str, data: Dict[str, Any], timeout: float = 120.0
149
+ ) -> Dict[str, Any]:
150
+ """Async wrapper for _request."""
151
+ loop = asyncio.get_event_loop()
152
+ return await loop.run_in_executor(None, lambda: self._request(endpoint, data, timeout))
153
+
154
+ async def chat(
155
+ self,
156
+ messages: List[Dict[str, str]],
157
+ model: str = "meta-llama/Llama-3.3-70B-Instruct",
158
+ max_tokens: int = 2048,
159
+ temperature: float = 0.7,
160
+ top_p: float = 0.9,
161
+ tools: Optional[List[Dict]] = None,
162
+ tool_choice: Optional[str] = None,
163
+ stream: bool = False,
164
+ ) -> InferenceResponse:
165
+ """Send a chat completion request.
166
+
167
+ Args:
168
+ messages: Chat messages in OpenAI format.
169
+ model: Model ID to use.
170
+ max_tokens: Maximum tokens to generate.
171
+ temperature: Sampling temperature.
172
+ top_p: Nucleus sampling threshold.
173
+ tools: Tool definitions for function calling.
174
+ tool_choice: Tool choice mode ("auto", "none", "required").
175
+ stream: Whether to stream the response (not yet implemented).
176
+
177
+ Returns:
178
+ InferenceResponse with generated content.
179
+ """
180
+ endpoint = self.get_endpoint(model)
181
+
182
+ # Build request payload
183
+ payload: Dict[str, Any] = {
184
+ "model": model,
185
+ "messages": messages,
186
+ "max_tokens": max_tokens,
187
+ "temperature": temperature,
188
+ "top_p": top_p,
189
+ "stream": False, # Streaming handled separately
190
+ }
191
+
192
+ if tools:
193
+ payload["tools"] = tools
194
+
195
+ if tool_choice:
196
+ payload["tool_choice"] = tool_choice
197
+
198
+ try:
199
+ response = await self._async_request(endpoint, payload)
200
+ return self._parse_chat_response(response, model)
201
+
202
+ except HTTPError as e:
203
+ error_body = ""
204
+ try:
205
+ error_body = e.read().decode("utf-8")
206
+ except Exception:
207
+ pass
208
+
209
+ return InferenceResponse(model=model, error=f"HTTP {e.code}: {error_body or e.reason}")
210
+
211
+ except Exception as e:
212
+ return InferenceResponse(model=model, error=str(e))
213
+
214
+ async def generate(
215
+ self,
216
+ prompt: str,
217
+ model: str = "meta-llama/Llama-3.3-70B-Instruct",
218
+ max_tokens: int = 2048,
219
+ temperature: float = 0.7,
220
+ stop: Optional[List[str]] = None,
221
+ ) -> InferenceResponse:
222
+ """Send a text generation request (non-chat format).
223
+
224
+ This uses the older text generation API format for models
225
+ that don't support chat templates.
226
+
227
+ Args:
228
+ prompt: Text prompt.
229
+ model: Model ID.
230
+ max_tokens: Maximum tokens.
231
+ temperature: Sampling temperature.
232
+ stop: Stop sequences.
233
+
234
+ Returns:
235
+ InferenceResponse with generated text.
236
+ """
237
+ # Use direct inference API for non-chat models
238
+ endpoint = f"{HF_INFERENCE_API}/{model}"
239
+
240
+ payload = {
241
+ "inputs": prompt,
242
+ "parameters": {
243
+ "max_new_tokens": max_tokens,
244
+ "temperature": temperature,
245
+ "return_full_text": False,
246
+ },
247
+ }
248
+
249
+ if stop:
250
+ payload["parameters"]["stop_sequences"] = stop
251
+
252
+ try:
253
+ response = await self._async_request(endpoint, payload)
254
+
255
+ # Parse text generation response
256
+ if isinstance(response, list) and len(response) > 0:
257
+ text = response[0].get("generated_text", "")
258
+ return InferenceResponse(content=text, model=model)
259
+
260
+ return InferenceResponse(model=model, error="Unexpected response format")
261
+
262
+ except Exception as e:
263
+ return InferenceResponse(model=model, error=str(e))
264
+
265
+ async def check_model_status(self, model: str) -> Dict[str, Any]:
266
+ """Check the status of a model on the Inference API.
267
+
268
+ Args:
269
+ model: Model ID.
270
+
271
+ Returns:
272
+ Dict with status information.
273
+ """
274
+ # Try a minimal request to check status
275
+ try:
276
+ response = await self.chat(
277
+ messages=[{"role": "user", "content": "Hi"}],
278
+ model=model,
279
+ max_tokens=1,
280
+ )
281
+
282
+ if response.error:
283
+ # Check for common error patterns
284
+ if "loading" in response.error.lower():
285
+ return {
286
+ "available": False,
287
+ "loading": True,
288
+ "error": "Model is loading",
289
+ }
290
+ if "rate limit" in response.error.lower():
291
+ return {
292
+ "available": True,
293
+ "rate_limited": True,
294
+ "error": response.error,
295
+ }
296
+ return {
297
+ "available": False,
298
+ "error": response.error,
299
+ }
300
+
301
+ return {
302
+ "available": True,
303
+ "loading": False,
304
+ }
305
+
306
+ except Exception as e:
307
+ return {
308
+ "available": False,
309
+ "error": str(e),
310
+ }
311
+
312
+ async def list_available_models(self) -> List[str]:
313
+ """Get list of recommended available models.
314
+
315
+ Returns:
316
+ List of model IDs known to work well with the Inference API.
317
+ """
318
+ # Return all recommended models
319
+ all_models = []
320
+ for category_models in RECOMMENDED_MODELS.values():
321
+ all_models.extend(category_models)
322
+
323
+ # Remove duplicates while preserving order
324
+ seen = set()
325
+ unique = []
326
+ for m in all_models:
327
+ if m not in seen:
328
+ seen.add(m)
329
+ unique.append(m)
330
+
331
+ return unique
332
+
333
+ def get_recommended_models(self, category: str = "general") -> List[str]:
334
+ """Get recommended models for a category.
335
+
336
+ Args:
337
+ category: Model category (general, coding, small, chat).
338
+
339
+ Returns:
340
+ List of recommended model IDs.
341
+ """
342
+ return RECOMMENDED_MODELS.get(category, RECOMMENDED_MODELS["general"])
343
+
344
+ def _parse_chat_response(self, response: Dict[str, Any], model: str) -> InferenceResponse:
345
+ """Parse a chat completion response."""
346
+ # Handle OpenAI-compatible format
347
+ choices = response.get("choices", [])
348
+
349
+ if not choices:
350
+ # Check for error
351
+ if "error" in response:
352
+ return InferenceResponse(
353
+ model=model,
354
+ error=response.get("error", {}).get("message", str(response["error"])),
355
+ )
356
+ return InferenceResponse(model=model, error="No response choices")
357
+
358
+ choice = choices[0]
359
+ message = choice.get("message", {})
360
+
361
+ content = message.get("content", "")
362
+ tool_calls = message.get("tool_calls", [])
363
+ finish_reason = choice.get("finish_reason", "")
364
+
365
+ # Parse usage
366
+ usage = response.get("usage", {})
367
+
368
+ return InferenceResponse(
369
+ content=content,
370
+ model=model,
371
+ finish_reason=finish_reason,
372
+ usage={
373
+ "prompt_tokens": usage.get("prompt_tokens", 0),
374
+ "completion_tokens": usage.get("completion_tokens", 0),
375
+ "total_tokens": usage.get("total_tokens", 0),
376
+ },
377
+ tool_calls=tool_calls,
378
+ )
379
+
380
+
381
+ # Singleton instance
382
+ _inference_client: Optional[HFInferenceClient] = None
383
+
384
+
385
+ def get_hf_inference_client() -> HFInferenceClient:
386
+ """Get the global HF Inference API client instance.
387
+
388
+ Returns:
389
+ HFInferenceClient instance.
390
+ """
391
+ global _inference_client
392
+ if _inference_client is None:
393
+ _inference_client = HFInferenceClient()
394
+ return _inference_client