superqode 0.1.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- superqode/__init__.py +33 -0
- superqode/acp/__init__.py +23 -0
- superqode/acp/client.py +913 -0
- superqode/acp/permission_screen.py +457 -0
- superqode/acp/types.py +480 -0
- superqode/acp_discovery.py +856 -0
- superqode/agent/__init__.py +22 -0
- superqode/agent/edit_strategies.py +334 -0
- superqode/agent/loop.py +892 -0
- superqode/agent/qe_report_templates.py +39 -0
- superqode/agent/system_prompts.py +353 -0
- superqode/agent_output.py +721 -0
- superqode/agent_stream.py +953 -0
- superqode/agents/__init__.py +59 -0
- superqode/agents/acp_registry.py +305 -0
- superqode/agents/client.py +249 -0
- superqode/agents/data/augmentcode.com.toml +51 -0
- superqode/agents/data/cagent.dev.toml +51 -0
- superqode/agents/data/claude.com.toml +60 -0
- superqode/agents/data/codeassistant.dev.toml +51 -0
- superqode/agents/data/codex.openai.com.toml +57 -0
- superqode/agents/data/fastagent.ai.toml +66 -0
- superqode/agents/data/geminicli.com.toml +77 -0
- superqode/agents/data/goose.block.xyz.toml +54 -0
- superqode/agents/data/junie.jetbrains.com.toml +56 -0
- superqode/agents/data/kimi.moonshot.cn.toml +57 -0
- superqode/agents/data/llmlingagent.dev.toml +51 -0
- superqode/agents/data/molt.bot.toml +49 -0
- superqode/agents/data/opencode.ai.toml +60 -0
- superqode/agents/data/stakpak.dev.toml +51 -0
- superqode/agents/data/vtcode.dev.toml +51 -0
- superqode/agents/discovery.py +266 -0
- superqode/agents/messaging.py +160 -0
- superqode/agents/persona.py +166 -0
- superqode/agents/registry.py +421 -0
- superqode/agents/schema.py +72 -0
- superqode/agents/unified.py +367 -0
- superqode/app/__init__.py +111 -0
- superqode/app/constants.py +314 -0
- superqode/app/css.py +366 -0
- superqode/app/models.py +118 -0
- superqode/app/suggester.py +125 -0
- superqode/app/widgets.py +1591 -0
- superqode/app_enhanced.py +399 -0
- superqode/app_main.py +17187 -0
- superqode/approval.py +312 -0
- superqode/atomic.py +296 -0
- superqode/commands/__init__.py +1 -0
- superqode/commands/acp.py +965 -0
- superqode/commands/agents.py +180 -0
- superqode/commands/auth.py +278 -0
- superqode/commands/config.py +374 -0
- superqode/commands/init.py +826 -0
- superqode/commands/providers.py +819 -0
- superqode/commands/qe.py +1145 -0
- superqode/commands/roles.py +380 -0
- superqode/commands/serve.py +172 -0
- superqode/commands/suggestions.py +127 -0
- superqode/commands/superqe.py +460 -0
- superqode/config/__init__.py +51 -0
- superqode/config/loader.py +812 -0
- superqode/config/schema.py +498 -0
- superqode/core/__init__.py +111 -0
- superqode/core/roles.py +281 -0
- superqode/danger.py +386 -0
- superqode/data/superqode-template.yaml +1522 -0
- superqode/design_system.py +1080 -0
- superqode/dialogs/__init__.py +6 -0
- superqode/dialogs/base.py +39 -0
- superqode/dialogs/model.py +130 -0
- superqode/dialogs/provider.py +870 -0
- superqode/diff_view.py +919 -0
- superqode/enterprise.py +21 -0
- superqode/evaluation/__init__.py +25 -0
- superqode/evaluation/adapters.py +93 -0
- superqode/evaluation/behaviors.py +89 -0
- superqode/evaluation/engine.py +209 -0
- superqode/evaluation/scenarios.py +96 -0
- superqode/execution/__init__.py +36 -0
- superqode/execution/linter.py +538 -0
- superqode/execution/modes.py +347 -0
- superqode/execution/resolver.py +283 -0
- superqode/execution/runner.py +642 -0
- superqode/file_explorer.py +811 -0
- superqode/file_viewer.py +471 -0
- superqode/flash.py +183 -0
- superqode/guidance/__init__.py +58 -0
- superqode/guidance/config.py +203 -0
- superqode/guidance/prompts.py +71 -0
- superqode/harness/__init__.py +54 -0
- superqode/harness/accelerator.py +291 -0
- superqode/harness/config.py +319 -0
- superqode/harness/validator.py +147 -0
- superqode/history.py +279 -0
- superqode/integrations/superopt_runner.py +124 -0
- superqode/logging/__init__.py +49 -0
- superqode/logging/adapters.py +219 -0
- superqode/logging/formatter.py +923 -0
- superqode/logging/integration.py +341 -0
- superqode/logging/sinks.py +170 -0
- superqode/logging/unified_log.py +417 -0
- superqode/lsp/__init__.py +26 -0
- superqode/lsp/client.py +544 -0
- superqode/main.py +1069 -0
- superqode/mcp/__init__.py +89 -0
- superqode/mcp/auth_storage.py +380 -0
- superqode/mcp/client.py +1236 -0
- superqode/mcp/config.py +319 -0
- superqode/mcp/integration.py +337 -0
- superqode/mcp/oauth.py +436 -0
- superqode/mcp/oauth_callback.py +385 -0
- superqode/mcp/types.py +290 -0
- superqode/memory/__init__.py +31 -0
- superqode/memory/feedback.py +342 -0
- superqode/memory/store.py +522 -0
- superqode/notifications.py +369 -0
- superqode/optimization/__init__.py +5 -0
- superqode/optimization/config.py +33 -0
- superqode/permissions/__init__.py +25 -0
- superqode/permissions/rules.py +488 -0
- superqode/plan.py +323 -0
- superqode/providers/__init__.py +33 -0
- superqode/providers/gateway/__init__.py +165 -0
- superqode/providers/gateway/base.py +228 -0
- superqode/providers/gateway/litellm_gateway.py +1170 -0
- superqode/providers/gateway/openresponses_gateway.py +436 -0
- superqode/providers/health.py +297 -0
- superqode/providers/huggingface/__init__.py +74 -0
- superqode/providers/huggingface/downloader.py +472 -0
- superqode/providers/huggingface/endpoints.py +442 -0
- superqode/providers/huggingface/hub.py +531 -0
- superqode/providers/huggingface/inference.py +394 -0
- superqode/providers/huggingface/transformers_runner.py +516 -0
- superqode/providers/local/__init__.py +100 -0
- superqode/providers/local/base.py +438 -0
- superqode/providers/local/discovery.py +418 -0
- superqode/providers/local/lmstudio.py +256 -0
- superqode/providers/local/mlx.py +457 -0
- superqode/providers/local/ollama.py +486 -0
- superqode/providers/local/sglang.py +268 -0
- superqode/providers/local/tgi.py +260 -0
- superqode/providers/local/tool_support.py +477 -0
- superqode/providers/local/vllm.py +258 -0
- superqode/providers/manager.py +1338 -0
- superqode/providers/models.py +1016 -0
- superqode/providers/models_dev.py +578 -0
- superqode/providers/openresponses/__init__.py +87 -0
- superqode/providers/openresponses/converters/__init__.py +17 -0
- superqode/providers/openresponses/converters/messages.py +343 -0
- superqode/providers/openresponses/converters/tools.py +268 -0
- superqode/providers/openresponses/schema/__init__.py +56 -0
- superqode/providers/openresponses/schema/models.py +585 -0
- superqode/providers/openresponses/streaming/__init__.py +5 -0
- superqode/providers/openresponses/streaming/parser.py +338 -0
- superqode/providers/openresponses/tools/__init__.py +21 -0
- superqode/providers/openresponses/tools/apply_patch.py +352 -0
- superqode/providers/openresponses/tools/code_interpreter.py +290 -0
- superqode/providers/openresponses/tools/file_search.py +333 -0
- superqode/providers/openresponses/tools/mcp_adapter.py +252 -0
- superqode/providers/registry.py +716 -0
- superqode/providers/usage.py +332 -0
- superqode/pure_mode.py +384 -0
- superqode/qr/__init__.py +23 -0
- superqode/qr/dashboard.py +781 -0
- superqode/qr/generator.py +1018 -0
- superqode/qr/templates.py +135 -0
- superqode/safety/__init__.py +41 -0
- superqode/safety/sandbox.py +413 -0
- superqode/safety/warnings.py +256 -0
- superqode/server/__init__.py +33 -0
- superqode/server/lsp_server.py +775 -0
- superqode/server/web.py +250 -0
- superqode/session/__init__.py +25 -0
- superqode/session/persistence.py +580 -0
- superqode/session/sharing.py +477 -0
- superqode/session.py +475 -0
- superqode/sidebar.py +2991 -0
- superqode/stream_view.py +648 -0
- superqode/styles/__init__.py +3 -0
- superqode/superqe/__init__.py +184 -0
- superqode/superqe/acp_runner.py +1064 -0
- superqode/superqe/constitution/__init__.py +62 -0
- superqode/superqe/constitution/evaluator.py +308 -0
- superqode/superqe/constitution/loader.py +432 -0
- superqode/superqe/constitution/schema.py +250 -0
- superqode/superqe/events.py +591 -0
- superqode/superqe/frameworks/__init__.py +65 -0
- superqode/superqe/frameworks/base.py +234 -0
- superqode/superqe/frameworks/e2e.py +263 -0
- superqode/superqe/frameworks/executor.py +237 -0
- superqode/superqe/frameworks/javascript.py +409 -0
- superqode/superqe/frameworks/python.py +373 -0
- superqode/superqe/frameworks/registry.py +92 -0
- superqode/superqe/mcp_tools/__init__.py +47 -0
- superqode/superqe/mcp_tools/core_tools.py +418 -0
- superqode/superqe/mcp_tools/registry.py +230 -0
- superqode/superqe/mcp_tools/testing_tools.py +167 -0
- superqode/superqe/noise.py +89 -0
- superqode/superqe/orchestrator.py +778 -0
- superqode/superqe/roles.py +609 -0
- superqode/superqe/session.py +713 -0
- superqode/superqe/skills/__init__.py +57 -0
- superqode/superqe/skills/base.py +106 -0
- superqode/superqe/skills/core_skills.py +899 -0
- superqode/superqe/skills/registry.py +90 -0
- superqode/superqe/verifier.py +101 -0
- superqode/superqe_cli.py +76 -0
- superqode/tool_call.py +358 -0
- superqode/tools/__init__.py +93 -0
- superqode/tools/agent_tools.py +496 -0
- superqode/tools/base.py +324 -0
- superqode/tools/batch_tool.py +133 -0
- superqode/tools/diagnostics.py +311 -0
- superqode/tools/edit_tools.py +653 -0
- superqode/tools/enhanced_base.py +515 -0
- superqode/tools/file_tools.py +269 -0
- superqode/tools/file_tracking.py +45 -0
- superqode/tools/lsp_tools.py +610 -0
- superqode/tools/network_tools.py +350 -0
- superqode/tools/permissions.py +400 -0
- superqode/tools/question_tool.py +324 -0
- superqode/tools/search_tools.py +598 -0
- superqode/tools/shell_tools.py +259 -0
- superqode/tools/todo_tools.py +121 -0
- superqode/tools/validation.py +80 -0
- superqode/tools/web_tools.py +639 -0
- superqode/tui.py +1152 -0
- superqode/tui_integration.py +875 -0
- superqode/tui_widgets/__init__.py +27 -0
- superqode/tui_widgets/widgets/__init__.py +18 -0
- superqode/tui_widgets/widgets/progress.py +185 -0
- superqode/tui_widgets/widgets/tool_display.py +188 -0
- superqode/undo_manager.py +574 -0
- superqode/utils/__init__.py +5 -0
- superqode/utils/error_handling.py +323 -0
- superqode/utils/fuzzy.py +257 -0
- superqode/widgets/__init__.py +477 -0
- superqode/widgets/agent_collab.py +390 -0
- superqode/widgets/agent_store.py +936 -0
- superqode/widgets/agent_switcher.py +395 -0
- superqode/widgets/animation_manager.py +284 -0
- superqode/widgets/code_context.py +356 -0
- superqode/widgets/command_palette.py +412 -0
- superqode/widgets/connection_status.py +537 -0
- superqode/widgets/conversation_history.py +470 -0
- superqode/widgets/diff_indicator.py +155 -0
- superqode/widgets/enhanced_status_bar.py +385 -0
- superqode/widgets/enhanced_toast.py +476 -0
- superqode/widgets/file_browser.py +809 -0
- superqode/widgets/file_reference.py +585 -0
- superqode/widgets/issue_timeline.py +340 -0
- superqode/widgets/leader_key.py +264 -0
- superqode/widgets/mode_switcher.py +445 -0
- superqode/widgets/model_picker.py +234 -0
- superqode/widgets/permission_preview.py +1205 -0
- superqode/widgets/prompt.py +358 -0
- superqode/widgets/provider_connect.py +725 -0
- superqode/widgets/pty_shell.py +587 -0
- superqode/widgets/qe_dashboard.py +321 -0
- superqode/widgets/resizable_sidebar.py +377 -0
- superqode/widgets/response_changes.py +218 -0
- superqode/widgets/response_display.py +528 -0
- superqode/widgets/rich_tool_display.py +613 -0
- superqode/widgets/sidebar_panels.py +1180 -0
- superqode/widgets/slash_complete.py +356 -0
- superqode/widgets/split_view.py +612 -0
- superqode/widgets/status_bar.py +273 -0
- superqode/widgets/superqode_display.py +786 -0
- superqode/widgets/thinking_display.py +815 -0
- superqode/widgets/throbber.py +87 -0
- superqode/widgets/toast.py +206 -0
- superqode/widgets/unified_output.py +1073 -0
- superqode/workspace/__init__.py +75 -0
- superqode/workspace/artifacts.py +472 -0
- superqode/workspace/coordinator.py +353 -0
- superqode/workspace/diff_tracker.py +429 -0
- superqode/workspace/git_guard.py +373 -0
- superqode/workspace/git_snapshot.py +526 -0
- superqode/workspace/manager.py +750 -0
- superqode/workspace/snapshot.py +357 -0
- superqode/workspace/watcher.py +535 -0
- superqode/workspace/worktree.py +440 -0
- superqode-0.1.5.dist-info/METADATA +204 -0
- superqode-0.1.5.dist-info/RECORD +288 -0
- superqode-0.1.5.dist-info/WHEEL +5 -0
- superqode-0.1.5.dist-info/entry_points.txt +3 -0
- superqode-0.1.5.dist-info/licenses/LICENSE +648 -0
- superqode-0.1.5.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,442 @@
|
|
|
1
|
+
"""HuggingFace Inference Endpoints client for dedicated deployments.
|
|
2
|
+
|
|
3
|
+
This module provides access to HuggingFace Inference Endpoints,
|
|
4
|
+
allowing users to connect to their dedicated model deployments.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import asyncio
|
|
8
|
+
import json
|
|
9
|
+
import os
|
|
10
|
+
from dataclasses import dataclass, field
|
|
11
|
+
from datetime import datetime
|
|
12
|
+
from enum import Enum
|
|
13
|
+
from typing import Any, Dict, List, Optional
|
|
14
|
+
from urllib.error import HTTPError, URLError
|
|
15
|
+
from urllib.request import Request, urlopen
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
# HuggingFace Inference Endpoints API
|
|
19
|
+
HF_ENDPOINTS_API = "https://api.endpoints.huggingface.cloud/v2"
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class EndpointState(Enum):
|
|
23
|
+
"""Possible states for an Inference Endpoint."""
|
|
24
|
+
|
|
25
|
+
PENDING = "pending"
|
|
26
|
+
INITIALIZING = "initializing"
|
|
27
|
+
UPDATING = "updating"
|
|
28
|
+
RUNNING = "running"
|
|
29
|
+
PAUSED = "paused"
|
|
30
|
+
FAILED = "failed"
|
|
31
|
+
SCALED_TO_ZERO = "scaledToZero"
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class EndpointType(Enum):
|
|
35
|
+
"""Types of Inference Endpoints."""
|
|
36
|
+
|
|
37
|
+
PROTECTED = "protected" # Requires HF token
|
|
38
|
+
PUBLIC = "public" # Anyone can access
|
|
39
|
+
PRIVATE = "private" # Private VPC
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
@dataclass
|
|
43
|
+
class InferenceEndpoint:
|
|
44
|
+
"""Represents an HF Inference Endpoint.
|
|
45
|
+
|
|
46
|
+
Attributes:
|
|
47
|
+
name: Endpoint name
|
|
48
|
+
model_id: Deployed model ID
|
|
49
|
+
url: Endpoint URL for inference
|
|
50
|
+
state: Current endpoint state
|
|
51
|
+
type: Endpoint type (protected, public, private)
|
|
52
|
+
instance_type: Hardware instance type
|
|
53
|
+
instance_size: Instance size configuration
|
|
54
|
+
region: Deployment region
|
|
55
|
+
created_at: Creation timestamp
|
|
56
|
+
updated_at: Last update timestamp
|
|
57
|
+
revision: Model revision/commit
|
|
58
|
+
framework: ML framework (pytorch, etc.)
|
|
59
|
+
task: Task type (text-generation, etc.)
|
|
60
|
+
scaling: Scaling configuration
|
|
61
|
+
"""
|
|
62
|
+
|
|
63
|
+
name: str
|
|
64
|
+
model_id: str
|
|
65
|
+
url: str = ""
|
|
66
|
+
state: EndpointState = EndpointState.PENDING
|
|
67
|
+
type: EndpointType = EndpointType.PROTECTED
|
|
68
|
+
instance_type: str = ""
|
|
69
|
+
instance_size: str = ""
|
|
70
|
+
region: str = ""
|
|
71
|
+
created_at: Optional[datetime] = None
|
|
72
|
+
updated_at: Optional[datetime] = None
|
|
73
|
+
revision: str = ""
|
|
74
|
+
framework: str = ""
|
|
75
|
+
task: str = ""
|
|
76
|
+
scaling: Dict[str, Any] = field(default_factory=dict)
|
|
77
|
+
|
|
78
|
+
@property
|
|
79
|
+
def is_running(self) -> bool:
|
|
80
|
+
"""Check if endpoint is running and ready."""
|
|
81
|
+
return self.state == EndpointState.RUNNING
|
|
82
|
+
|
|
83
|
+
@property
|
|
84
|
+
def is_paused(self) -> bool:
|
|
85
|
+
"""Check if endpoint is paused."""
|
|
86
|
+
return self.state in (EndpointState.PAUSED, EndpointState.SCALED_TO_ZERO)
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
@dataclass
|
|
90
|
+
class EndpointResponse:
|
|
91
|
+
"""Response from an Inference Endpoint.
|
|
92
|
+
|
|
93
|
+
Attributes:
|
|
94
|
+
content: Generated text
|
|
95
|
+
model: Model ID
|
|
96
|
+
usage: Token usage
|
|
97
|
+
error: Error message if failed
|
|
98
|
+
"""
|
|
99
|
+
|
|
100
|
+
content: str = ""
|
|
101
|
+
model: str = ""
|
|
102
|
+
usage: Dict[str, int] = field(default_factory=dict)
|
|
103
|
+
error: str = ""
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
class HFEndpointsClient:
|
|
107
|
+
"""HuggingFace Inference Endpoints client.
|
|
108
|
+
|
|
109
|
+
Provides access to dedicated Inference Endpoints for production deployments.
|
|
110
|
+
|
|
111
|
+
Environment:
|
|
112
|
+
HF_TOKEN: Required for accessing endpoints API and protected endpoints
|
|
113
|
+
"""
|
|
114
|
+
|
|
115
|
+
def __init__(self, token: Optional[str] = None, namespace: Optional[str] = None):
|
|
116
|
+
"""Initialize the Endpoints client.
|
|
117
|
+
|
|
118
|
+
Args:
|
|
119
|
+
token: HF token. Falls back to HF_TOKEN env var.
|
|
120
|
+
namespace: HF organization/username namespace.
|
|
121
|
+
"""
|
|
122
|
+
self._token = (
|
|
123
|
+
token or os.environ.get("HF_TOKEN") or os.environ.get("HUGGING_FACE_HUB_TOKEN")
|
|
124
|
+
)
|
|
125
|
+
self._namespace = namespace
|
|
126
|
+
|
|
127
|
+
@property
|
|
128
|
+
def is_authenticated(self) -> bool:
|
|
129
|
+
"""Check if we have authentication."""
|
|
130
|
+
return self._token is not None and len(self._token) > 0
|
|
131
|
+
|
|
132
|
+
def _request(
|
|
133
|
+
self, method: str, endpoint: str, data: Optional[Dict] = None, timeout: float = 30.0
|
|
134
|
+
) -> Any:
|
|
135
|
+
"""Make a request to the Endpoints API.
|
|
136
|
+
|
|
137
|
+
Args:
|
|
138
|
+
method: HTTP method.
|
|
139
|
+
endpoint: API endpoint.
|
|
140
|
+
data: Request body.
|
|
141
|
+
timeout: Request timeout.
|
|
142
|
+
|
|
143
|
+
Returns:
|
|
144
|
+
JSON response.
|
|
145
|
+
"""
|
|
146
|
+
url = f"{HF_ENDPOINTS_API}{endpoint}"
|
|
147
|
+
|
|
148
|
+
headers = {
|
|
149
|
+
"Accept": "application/json",
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
if self._token:
|
|
153
|
+
headers["Authorization"] = f"Bearer {self._token}"
|
|
154
|
+
|
|
155
|
+
body = None
|
|
156
|
+
if data:
|
|
157
|
+
headers["Content-Type"] = "application/json"
|
|
158
|
+
body = json.dumps(data).encode("utf-8")
|
|
159
|
+
|
|
160
|
+
request = Request(url, data=body, headers=headers, method=method)
|
|
161
|
+
|
|
162
|
+
with urlopen(request, timeout=timeout) as response:
|
|
163
|
+
return json.loads(response.read().decode("utf-8"))
|
|
164
|
+
|
|
165
|
+
async def _async_request(
|
|
166
|
+
self, method: str, endpoint: str, data: Optional[Dict] = None, timeout: float = 30.0
|
|
167
|
+
) -> Any:
|
|
168
|
+
"""Async wrapper for _request."""
|
|
169
|
+
loop = asyncio.get_event_loop()
|
|
170
|
+
return await loop.run_in_executor(
|
|
171
|
+
None, lambda: self._request(method, endpoint, data, timeout)
|
|
172
|
+
)
|
|
173
|
+
|
|
174
|
+
async def list_endpoints(self, namespace: Optional[str] = None) -> List[InferenceEndpoint]:
|
|
175
|
+
"""List all Inference Endpoints.
|
|
176
|
+
|
|
177
|
+
Args:
|
|
178
|
+
namespace: Filter by namespace (organization/username).
|
|
179
|
+
|
|
180
|
+
Returns:
|
|
181
|
+
List of InferenceEndpoint objects.
|
|
182
|
+
"""
|
|
183
|
+
if not self.is_authenticated:
|
|
184
|
+
return []
|
|
185
|
+
|
|
186
|
+
try:
|
|
187
|
+
ns = namespace or self._namespace
|
|
188
|
+
endpoint_url = "/endpoint"
|
|
189
|
+
if ns:
|
|
190
|
+
endpoint_url = f"/endpoint?namespace={ns}"
|
|
191
|
+
|
|
192
|
+
response = await self._async_request("GET", endpoint_url)
|
|
193
|
+
|
|
194
|
+
endpoints = []
|
|
195
|
+
items = response.get("items", response) if isinstance(response, dict) else response
|
|
196
|
+
|
|
197
|
+
for item in items:
|
|
198
|
+
endpoints.append(self._parse_endpoint(item))
|
|
199
|
+
|
|
200
|
+
return endpoints
|
|
201
|
+
|
|
202
|
+
except HTTPError as e:
|
|
203
|
+
if e.code == 401:
|
|
204
|
+
return [] # Not authenticated
|
|
205
|
+
raise
|
|
206
|
+
except Exception:
|
|
207
|
+
return []
|
|
208
|
+
|
|
209
|
+
async def get_endpoint(
|
|
210
|
+
self, name: str, namespace: Optional[str] = None
|
|
211
|
+
) -> Optional[InferenceEndpoint]:
|
|
212
|
+
"""Get a specific Inference Endpoint.
|
|
213
|
+
|
|
214
|
+
Args:
|
|
215
|
+
name: Endpoint name.
|
|
216
|
+
namespace: Namespace (organization/username).
|
|
217
|
+
|
|
218
|
+
Returns:
|
|
219
|
+
InferenceEndpoint or None if not found.
|
|
220
|
+
"""
|
|
221
|
+
if not self.is_authenticated:
|
|
222
|
+
return None
|
|
223
|
+
|
|
224
|
+
ns = namespace or self._namespace
|
|
225
|
+
if not ns:
|
|
226
|
+
# Try to find from list
|
|
227
|
+
endpoints = await self.list_endpoints()
|
|
228
|
+
for ep in endpoints:
|
|
229
|
+
if ep.name == name:
|
|
230
|
+
return ep
|
|
231
|
+
return None
|
|
232
|
+
|
|
233
|
+
try:
|
|
234
|
+
response = await self._async_request("GET", f"/endpoint/{ns}/{name}")
|
|
235
|
+
return self._parse_endpoint(response)
|
|
236
|
+
except HTTPError as e:
|
|
237
|
+
if e.code == 404:
|
|
238
|
+
return None
|
|
239
|
+
raise
|
|
240
|
+
except Exception:
|
|
241
|
+
return None
|
|
242
|
+
|
|
243
|
+
async def get_endpoint_status(
|
|
244
|
+
self, name: str, namespace: Optional[str] = None
|
|
245
|
+
) -> Dict[str, Any]:
|
|
246
|
+
"""Get the status of an Inference Endpoint.
|
|
247
|
+
|
|
248
|
+
Args:
|
|
249
|
+
name: Endpoint name.
|
|
250
|
+
namespace: Namespace.
|
|
251
|
+
|
|
252
|
+
Returns:
|
|
253
|
+
Status dictionary.
|
|
254
|
+
"""
|
|
255
|
+
endpoint = await self.get_endpoint(name, namespace)
|
|
256
|
+
|
|
257
|
+
if not endpoint:
|
|
258
|
+
return {
|
|
259
|
+
"available": False,
|
|
260
|
+
"error": "Endpoint not found",
|
|
261
|
+
}
|
|
262
|
+
|
|
263
|
+
return {
|
|
264
|
+
"available": endpoint.is_running,
|
|
265
|
+
"state": endpoint.state.value,
|
|
266
|
+
"url": endpoint.url,
|
|
267
|
+
"model": endpoint.model_id,
|
|
268
|
+
"paused": endpoint.is_paused,
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
async def chat(
|
|
272
|
+
self,
|
|
273
|
+
endpoint_url: str,
|
|
274
|
+
messages: List[Dict[str, str]],
|
|
275
|
+
max_tokens: int = 2048,
|
|
276
|
+
temperature: float = 0.7,
|
|
277
|
+
tools: Optional[List[Dict]] = None,
|
|
278
|
+
) -> EndpointResponse:
|
|
279
|
+
"""Send a chat completion request to an endpoint.
|
|
280
|
+
|
|
281
|
+
Args:
|
|
282
|
+
endpoint_url: Full endpoint URL.
|
|
283
|
+
messages: Chat messages.
|
|
284
|
+
max_tokens: Maximum tokens to generate.
|
|
285
|
+
temperature: Sampling temperature.
|
|
286
|
+
tools: Tool definitions for function calling.
|
|
287
|
+
|
|
288
|
+
Returns:
|
|
289
|
+
EndpointResponse with generated content.
|
|
290
|
+
"""
|
|
291
|
+
payload: Dict[str, Any] = {
|
|
292
|
+
"messages": messages,
|
|
293
|
+
"max_tokens": max_tokens,
|
|
294
|
+
"temperature": temperature,
|
|
295
|
+
"stream": False,
|
|
296
|
+
}
|
|
297
|
+
|
|
298
|
+
if tools:
|
|
299
|
+
payload["tools"] = tools
|
|
300
|
+
|
|
301
|
+
try:
|
|
302
|
+
# Endpoints use OpenAI-compatible format
|
|
303
|
+
chat_url = endpoint_url.rstrip("/") + "/v1/chat/completions"
|
|
304
|
+
|
|
305
|
+
headers = {
|
|
306
|
+
"Content-Type": "application/json",
|
|
307
|
+
"Accept": "application/json",
|
|
308
|
+
}
|
|
309
|
+
|
|
310
|
+
if self._token:
|
|
311
|
+
headers["Authorization"] = f"Bearer {self._token}"
|
|
312
|
+
|
|
313
|
+
body = json.dumps(payload).encode("utf-8")
|
|
314
|
+
request = Request(chat_url, data=body, headers=headers, method="POST")
|
|
315
|
+
|
|
316
|
+
loop = asyncio.get_event_loop()
|
|
317
|
+
|
|
318
|
+
def do_request():
|
|
319
|
+
with urlopen(request, timeout=120.0) as response:
|
|
320
|
+
return json.loads(response.read().decode("utf-8"))
|
|
321
|
+
|
|
322
|
+
response = await loop.run_in_executor(None, do_request)
|
|
323
|
+
|
|
324
|
+
return self._parse_chat_response(response)
|
|
325
|
+
|
|
326
|
+
except HTTPError as e:
|
|
327
|
+
error_body = ""
|
|
328
|
+
try:
|
|
329
|
+
error_body = e.read().decode("utf-8")
|
|
330
|
+
except Exception:
|
|
331
|
+
pass
|
|
332
|
+
|
|
333
|
+
return EndpointResponse(error=f"HTTP {e.code}: {error_body or e.reason}")
|
|
334
|
+
|
|
335
|
+
except Exception as e:
|
|
336
|
+
return EndpointResponse(error=str(e))
|
|
337
|
+
|
|
338
|
+
def _parse_endpoint(self, data: Dict[str, Any]) -> InferenceEndpoint:
|
|
339
|
+
"""Parse endpoint data from API response."""
|
|
340
|
+
# Parse state
|
|
341
|
+
state_str = data.get("status", {}).get("state", "pending")
|
|
342
|
+
try:
|
|
343
|
+
state = EndpointState(state_str.lower().replace("-", "_"))
|
|
344
|
+
except ValueError:
|
|
345
|
+
state = EndpointState.PENDING
|
|
346
|
+
|
|
347
|
+
# Parse type
|
|
348
|
+
type_str = data.get("type", "protected")
|
|
349
|
+
try:
|
|
350
|
+
endpoint_type = EndpointType(type_str.lower())
|
|
351
|
+
except ValueError:
|
|
352
|
+
endpoint_type = EndpointType.PROTECTED
|
|
353
|
+
|
|
354
|
+
# Parse timestamps
|
|
355
|
+
created_at = None
|
|
356
|
+
updated_at = None
|
|
357
|
+
if "createdAt" in data:
|
|
358
|
+
try:
|
|
359
|
+
created_at = datetime.fromisoformat(data["createdAt"].replace("Z", "+00:00"))
|
|
360
|
+
except Exception:
|
|
361
|
+
pass
|
|
362
|
+
if "updatedAt" in data:
|
|
363
|
+
try:
|
|
364
|
+
updated_at = datetime.fromisoformat(data["updatedAt"].replace("Z", "+00:00"))
|
|
365
|
+
except Exception:
|
|
366
|
+
pass
|
|
367
|
+
|
|
368
|
+
# Get model info
|
|
369
|
+
model_info = data.get("model", {})
|
|
370
|
+
model_id = model_info.get("repository", "")
|
|
371
|
+
revision = model_info.get("revision", "")
|
|
372
|
+
framework = model_info.get("framework", "")
|
|
373
|
+
task = model_info.get("task", "")
|
|
374
|
+
|
|
375
|
+
# Get compute info
|
|
376
|
+
compute = data.get("compute", {})
|
|
377
|
+
instance_type = compute.get("instanceType", "")
|
|
378
|
+
instance_size = compute.get("instanceSize", "")
|
|
379
|
+
|
|
380
|
+
# Get URL
|
|
381
|
+
status = data.get("status", {})
|
|
382
|
+
url = status.get("url", "")
|
|
383
|
+
|
|
384
|
+
return InferenceEndpoint(
|
|
385
|
+
name=data.get("name", ""),
|
|
386
|
+
model_id=model_id,
|
|
387
|
+
url=url,
|
|
388
|
+
state=state,
|
|
389
|
+
type=endpoint_type,
|
|
390
|
+
instance_type=instance_type,
|
|
391
|
+
instance_size=instance_size,
|
|
392
|
+
region=data.get("provider", {}).get("region", ""),
|
|
393
|
+
created_at=created_at,
|
|
394
|
+
updated_at=updated_at,
|
|
395
|
+
revision=revision,
|
|
396
|
+
framework=framework,
|
|
397
|
+
task=task,
|
|
398
|
+
scaling=compute.get("scaling", {}),
|
|
399
|
+
)
|
|
400
|
+
|
|
401
|
+
def _parse_chat_response(self, response: Dict[str, Any]) -> EndpointResponse:
|
|
402
|
+
"""Parse a chat completion response."""
|
|
403
|
+
choices = response.get("choices", [])
|
|
404
|
+
|
|
405
|
+
if not choices:
|
|
406
|
+
if "error" in response:
|
|
407
|
+
return EndpointResponse(
|
|
408
|
+
error=response.get("error", {}).get("message", str(response["error"]))
|
|
409
|
+
)
|
|
410
|
+
return EndpointResponse(error="No response choices")
|
|
411
|
+
|
|
412
|
+
choice = choices[0]
|
|
413
|
+
message = choice.get("message", {})
|
|
414
|
+
content = message.get("content", "")
|
|
415
|
+
|
|
416
|
+
usage = response.get("usage", {})
|
|
417
|
+
|
|
418
|
+
return EndpointResponse(
|
|
419
|
+
content=content,
|
|
420
|
+
model=response.get("model", ""),
|
|
421
|
+
usage={
|
|
422
|
+
"prompt_tokens": usage.get("prompt_tokens", 0),
|
|
423
|
+
"completion_tokens": usage.get("completion_tokens", 0),
|
|
424
|
+
"total_tokens": usage.get("total_tokens", 0),
|
|
425
|
+
},
|
|
426
|
+
)
|
|
427
|
+
|
|
428
|
+
|
|
429
|
+
# Singleton instance
|
|
430
|
+
_endpoints_client: Optional[HFEndpointsClient] = None
|
|
431
|
+
|
|
432
|
+
|
|
433
|
+
def get_hf_endpoints_client() -> HFEndpointsClient:
|
|
434
|
+
"""Get the global HF Endpoints client instance.
|
|
435
|
+
|
|
436
|
+
Returns:
|
|
437
|
+
HFEndpointsClient instance.
|
|
438
|
+
"""
|
|
439
|
+
global _endpoints_client
|
|
440
|
+
if _endpoints_client is None:
|
|
441
|
+
_endpoints_client = HFEndpointsClient()
|
|
442
|
+
return _endpoints_client
|