switch-sdk 0.3.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- switch_sdk-0.3.0/PKG-INFO +294 -0
- switch_sdk-0.3.0/README.md +262 -0
- switch_sdk-0.3.0/pyproject.toml +60 -0
- switch_sdk-0.3.0/setup.cfg +4 -0
- switch_sdk-0.3.0/src/switch_sdk/__init__.py +84 -0
- switch_sdk-0.3.0/src/switch_sdk/client.py +519 -0
- switch_sdk-0.3.0/src/switch_sdk/context.py +51 -0
- switch_sdk-0.3.0/src/switch_sdk/errors.py +41 -0
- switch_sdk-0.3.0/src/switch_sdk/executorch_runtime.py +376 -0
- switch_sdk-0.3.0/src/switch_sdk/local_models.py +359 -0
- switch_sdk-0.3.0/src/switch_sdk/local_runtime.py +34 -0
- switch_sdk-0.3.0/src/switch_sdk/models.py +346 -0
- switch_sdk-0.3.0/src/switch_sdk/py.typed +1 -0
- switch_sdk-0.3.0/src/switch_sdk/telemetry.py +98 -0
- switch_sdk-0.3.0/src/switch_sdk.egg-info/PKG-INFO +294 -0
- switch_sdk-0.3.0/src/switch_sdk.egg-info/SOURCES.txt +22 -0
- switch_sdk-0.3.0/src/switch_sdk.egg-info/dependency_links.txt +1 -0
- switch_sdk-0.3.0/src/switch_sdk.egg-info/requires.txt +17 -0
- switch_sdk-0.3.0/src/switch_sdk.egg-info/top_level.txt +1 -0
- switch_sdk-0.3.0/tests/test_client.py +305 -0
- switch_sdk-0.3.0/tests/test_executorch_runtime.py +69 -0
- switch_sdk-0.3.0/tests/test_local_models.py +77 -0
- switch_sdk-0.3.0/tests/test_region_switching_live.py +110 -0
- switch_sdk-0.3.0/tests/test_telemetry.py +56 -0
|
@@ -0,0 +1,294 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: switch-sdk
|
|
3
|
+
Version: 0.3.0
|
|
4
|
+
Summary: Python SDK for Shift managed AI routing, telemetry, and local-first execution
|
|
5
|
+
Author: Shift
|
|
6
|
+
License-Expression: LicenseRef-Proprietary
|
|
7
|
+
Keywords: ai,llm,routing,telemetry,sustainability,executorch
|
|
8
|
+
Classifier: Development Status :: 3 - Alpha
|
|
9
|
+
Classifier: Intended Audience :: Developers
|
|
10
|
+
Classifier: Programming Language :: Python :: 3
|
|
11
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.14
|
|
16
|
+
Classifier: Operating System :: OS Independent
|
|
17
|
+
Classifier: Typing :: Typed
|
|
18
|
+
Requires-Python: >=3.10
|
|
19
|
+
Description-Content-Type: text/markdown
|
|
20
|
+
Requires-Dist: httpx>=0.27.0
|
|
21
|
+
Provides-Extra: dev
|
|
22
|
+
Requires-Dist: pytest>=8.0.0; extra == "dev"
|
|
23
|
+
Requires-Dist: pytest-asyncio>=0.23.0; extra == "dev"
|
|
24
|
+
Requires-Dist: respx>=0.21.0; extra == "dev"
|
|
25
|
+
Provides-Extra: local
|
|
26
|
+
Requires-Dist: torch>=2.2.0; python_version < "3.14" and extra == "local"
|
|
27
|
+
Requires-Dist: transformers>=4.41.0; python_version < "3.14" and extra == "local"
|
|
28
|
+
Requires-Dist: optimum-executorch>=1.1.0; python_version < "3.14" and extra == "local"
|
|
29
|
+
Provides-Extra: publish
|
|
30
|
+
Requires-Dist: build>=1.2.2; extra == "publish"
|
|
31
|
+
Requires-Dist: twine>=5.1.1; extra == "publish"
|
|
32
|
+
|
|
33
|
+
# switch-sdk
|
|
34
|
+
|
|
35
|
+
Python SDK for the Shift (Switch gateway) managed API.
|
|
36
|
+
|
|
37
|
+
## Install
|
|
38
|
+
|
|
39
|
+
```bash
|
|
40
|
+
pip install switch-sdk
|
|
41
|
+
```
|
|
42
|
+
|
|
43
|
+
For local development:
|
|
44
|
+
|
|
45
|
+
```bash
|
|
46
|
+
pip install -e .[dev]
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
For local ExecuTorch runtime work:
|
|
50
|
+
|
|
51
|
+
```bash
|
|
52
|
+
pip install -e .[dev,local]
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
For packaging and publishing:
|
|
56
|
+
|
|
57
|
+
```bash
|
|
58
|
+
pip install -e .[publish]
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
Note: ExecuTorch wheels are not available for Python 3.14 yet. Use Python 3.10-3.13 (3.11 works well).
|
|
62
|
+
|
|
63
|
+
## Required values
|
|
64
|
+
|
|
65
|
+
- `base_url`: your gateway URL, for example `http://localhost:8000`
|
|
66
|
+
- `api_key`: your plain project key (for example `aura_...`), not the SHA256 hash
|
|
67
|
+
|
|
68
|
+
Environment shortcuts are supported:
|
|
69
|
+
|
|
70
|
+
- `SHIFT_BASE_URL` (fallback: `SWITCH_BASE_URL`)
|
|
71
|
+
- `SHIFT_API_KEY` (fallbacks: `SWITCH_API_KEY`, `API_KEY`)
|
|
72
|
+
|
|
73
|
+
## Quick start
|
|
74
|
+
|
|
75
|
+
```python
|
|
76
|
+
import asyncio
|
|
77
|
+
from switch_sdk import SwitchClient, ChatMessage
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
async def main() -> None:
|
|
81
|
+
async with SwitchClient.from_env() as client:
|
|
82
|
+
completion = await client.chat(
|
|
83
|
+
model="gpt-5",
|
|
84
|
+
messages=[ChatMessage(role="user", content="Reply with: SDK_OK")],
|
|
85
|
+
residency="US",
|
|
86
|
+
sla="realtime",
|
|
87
|
+
capability_flags={"force_cloud": True, "preferred_region": "eastus"},
|
|
88
|
+
)
|
|
89
|
+
|
|
90
|
+
print(completion.choices[0].message.content)
|
|
91
|
+
print(completion.switch_meta["route"]["target"]["region"])
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
asyncio.run(main())
|
|
95
|
+
```
|
|
96
|
+
|
|
97
|
+
Set env vars first:
|
|
98
|
+
|
|
99
|
+
```bash
|
|
100
|
+
export SHIFT_BASE_URL=http://localhost:8000
|
|
101
|
+
export SHIFT_API_KEY=aura_your_plain_project_key
|
|
102
|
+
```
|
|
103
|
+
|
|
104
|
+
## Hybrid local-first mode (ExecuTorch-ready)
|
|
105
|
+
|
|
106
|
+
`chat_hybrid()` tries local execution first, then falls back to cloud when needed.
|
|
107
|
+
Local models are cached on disk and downloaded only once per model version.
|
|
108
|
+
|
|
109
|
+
```python
|
|
110
|
+
import asyncio
|
|
111
|
+
from switch_sdk import ChatMessage, LocalModelManager, SwitchClient
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
manifest = [
|
|
115
|
+
{
|
|
116
|
+
"model_id": "smollm2-135m",
|
|
117
|
+
"task": "chat",
|
|
118
|
+
"download_url": "https://your-model-host/smollm2-135m.pte",
|
|
119
|
+
"sha256": "replace_with_sha256",
|
|
120
|
+
"size_mb": 550,
|
|
121
|
+
"min_ram_gb": 4,
|
|
122
|
+
"max_prompt_chars": 280,
|
|
123
|
+
"rank": 10,
|
|
124
|
+
},
|
|
125
|
+
]
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
async def main() -> None:
|
|
129
|
+
manager = LocalModelManager(cache_dir="~/.shift/models", manifest=manifest)
|
|
130
|
+
# Optional: real ExecuTorch adapter (requires deps below)
|
|
131
|
+
from switch_sdk import build_executorch_text_runtime
|
|
132
|
+
local_runtime = build_executorch_text_runtime(
|
|
133
|
+
tokenizer_source="HuggingFaceTB/SmolLM2-135M-Instruct",
|
|
134
|
+
max_new_tokens=96,
|
|
135
|
+
prefer_optimum=True,
|
|
136
|
+
)
|
|
137
|
+
|
|
138
|
+
async with SwitchClient(
|
|
139
|
+
base_url="http://localhost:8000",
|
|
140
|
+
api_key="aura_your_plain_project_key",
|
|
141
|
+
local_model_manager=manager,
|
|
142
|
+
local_runtime=local_runtime,
|
|
143
|
+
) as client:
|
|
144
|
+
completion = await client.chat_hybrid(
|
|
145
|
+
model="auto",
|
|
146
|
+
messages=[ChatMessage(role="user", content="Reply exactly: LOCAL_OK")],
|
|
147
|
+
capability_flags={"auto_model": True},
|
|
148
|
+
)
|
|
149
|
+
print(completion.model)
|
|
150
|
+
print(completion.choices[0].message.content)
|
|
151
|
+
print(completion.switch_meta)
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
asyncio.run(main())
|
|
155
|
+
```
|
|
156
|
+
|
|
157
|
+
Notes:
|
|
158
|
+
- Default local runtime is a stub (for wiring/tests).
|
|
159
|
+
- `build_executorch_text_runtime(...)` provides a real adapter that prefers Optimum ExecuTorch and falls back to raw `executorch.runtime`.
|
|
160
|
+
- Cache path format: `~/.shift/models/<model_id>/<version>/model.pte`
|
|
161
|
+
- LRU eviction is applied when cache exceeds `max_cache_gb`.
|
|
162
|
+
|
|
163
|
+
Install local runtime dependencies:
|
|
164
|
+
|
|
165
|
+
```bash
|
|
166
|
+
pip install -e .[local]
|
|
167
|
+
```
|
|
168
|
+
|
|
169
|
+
Ready-made demo manifest:
|
|
170
|
+
|
|
171
|
+
- `/Users/proguy/Documents/projects/switch/switch-sdk/examples/local_manifest_smollm2_135m.json`
|
|
172
|
+
|
|
173
|
+
Runtime callable contract:
|
|
174
|
+
|
|
175
|
+
```python
|
|
176
|
+
from switch_sdk import ChatMessage, LocalModelHandle
|
|
177
|
+
|
|
178
|
+
async def my_executorch_runtime(messages: list[ChatMessage], handle: LocalModelHandle) -> str:
|
|
179
|
+
# Load/use handle.path (.pte) with your ExecuTorch integration.
|
|
180
|
+
# Return assistant text.
|
|
181
|
+
return "LOCAL_EXECUTORCH_OK"
|
|
182
|
+
```
|
|
183
|
+
|
|
184
|
+
## Routing-only call
|
|
185
|
+
|
|
186
|
+
```python
|
|
187
|
+
decision = await client.route(
|
|
188
|
+
model="gpt-5",
|
|
189
|
+
residency="US",
|
|
190
|
+
sla="realtime",
|
|
191
|
+
capability_flags={"force_cloud": True},
|
|
192
|
+
)
|
|
193
|
+
|
|
194
|
+
print(decision.target.region)
|
|
195
|
+
print(decision.scores)
|
|
196
|
+
print(decision.candidate_breakdown)
|
|
197
|
+
```
|
|
198
|
+
|
|
199
|
+
## Dashboard + carbon endpoints
|
|
200
|
+
|
|
201
|
+
```python
|
|
202
|
+
summary = await client.get_dashboard_summary()
|
|
203
|
+
feed = await client.get_dashboard_feed(limit=20)
|
|
204
|
+
carbon = await client.get_live_carbon()
|
|
205
|
+
|
|
206
|
+
print(summary.summary.total_requests)
|
|
207
|
+
print(len(feed.items))
|
|
208
|
+
print(carbon.provider, carbon.regions.get("eastus"))
|
|
209
|
+
```
|
|
210
|
+
|
|
211
|
+
## Custom telemetry event
|
|
212
|
+
|
|
213
|
+
```python
|
|
214
|
+
from switch_sdk import TelemetryEvent
|
|
215
|
+
|
|
216
|
+
await client.track_event(
|
|
217
|
+
TelemetryEvent(
|
|
218
|
+
event_type="sdk_custom",
|
|
219
|
+
request_id="custom-123",
|
|
220
|
+
model="gpt-5",
|
|
221
|
+
metadata={"feature": "my_feature"},
|
|
222
|
+
)
|
|
223
|
+
)
|
|
224
|
+
|
|
225
|
+
await client.flush_telemetry()
|
|
226
|
+
```
|
|
227
|
+
|
|
228
|
+
## Error handling
|
|
229
|
+
|
|
230
|
+
```python
|
|
231
|
+
from switch_sdk import SwitchAPIError, SwitchNetworkError, SwitchTimeoutError
|
|
232
|
+
|
|
233
|
+
try:
|
|
234
|
+
await client.route(model="gpt-5")
|
|
235
|
+
except SwitchAPIError as exc:
|
|
236
|
+
print(exc.status_code, exc.detail)
|
|
237
|
+
except SwitchTimeoutError:
|
|
238
|
+
print("Request timed out")
|
|
239
|
+
except SwitchNetworkError as exc:
|
|
240
|
+
print(f"Network issue: {exc}")
|
|
241
|
+
```
|
|
242
|
+
|
|
243
|
+
## Notes
|
|
244
|
+
|
|
245
|
+
- The SDK is async-first.
|
|
246
|
+
- Use `async with SwitchClient(...)` so telemetry flushes cleanly on exit.
|
|
247
|
+
- Retries/backoff are built in for transient failures.
|
|
248
|
+
- Telemetry is best-effort and never blocks successful chat/route calls.
|
|
249
|
+
|
|
250
|
+
## Live switching checks
|
|
251
|
+
|
|
252
|
+
Automatic east/west region-switch verification script:
|
|
253
|
+
|
|
254
|
+
```bash
|
|
255
|
+
cd switch-sdk
|
|
256
|
+
.venv/bin/python examples/test_region_switching.py \
|
|
257
|
+
--base-url http://localhost:8000 \
|
|
258
|
+
--api-key aura_your_plain_project_key \
|
|
259
|
+
--east-region eastus \
|
|
260
|
+
--west-region westus \
|
|
261
|
+
--central-region centralus \
|
|
262
|
+
--check-chat
|
|
263
|
+
```
|
|
264
|
+
|
|
265
|
+
## From-env example script
|
|
266
|
+
|
|
267
|
+
```bash
|
|
268
|
+
export SHIFT_BASE_URL=http://localhost:8000
|
|
269
|
+
export SHIFT_API_KEY=aura_your_plain_project_key
|
|
270
|
+
python /Users/proguy/Documents/projects/switch/switch-sdk/examples/test_from_env.py
|
|
271
|
+
```
|
|
272
|
+
|
|
273
|
+
## Local ExecuTorch sanity check
|
|
274
|
+
|
|
275
|
+
Force local execution and fail if local runtime does not work:
|
|
276
|
+
|
|
277
|
+
```bash
|
|
278
|
+
cd /Users/proguy/Documents/projects/switch/switch-sdk
|
|
279
|
+
.venv311/bin/python examples/test_hybrid_local.py \
|
|
280
|
+
--base-url http://localhost:8000 \
|
|
281
|
+
--api-key dummy_local_only \
|
|
282
|
+
--manifest-path examples/local_manifest_smollm2_135m.json \
|
|
283
|
+
--executorch \
|
|
284
|
+
--prefer-runtime \
|
|
285
|
+
--tokenizer-source HuggingFaceTB/SmolLM2-135M-Instruct \
|
|
286
|
+
--no-download \
|
|
287
|
+
--no-cloud-fallback
|
|
288
|
+
```
|
|
289
|
+
|
|
290
|
+
Expected: output JSON includes `"source": "sdk-local"` in `switch_meta`.
|
|
291
|
+
|
|
292
|
+
## Release
|
|
293
|
+
|
|
294
|
+
See `/Users/proguy/Documents/projects/switch/switch-sdk/RELEASING.md` for TestPyPI and PyPI release steps.
|
|
@@ -0,0 +1,262 @@
|
|
|
1
|
+
# switch-sdk
|
|
2
|
+
|
|
3
|
+
Python SDK for the Shift (Switch gateway) managed API.
|
|
4
|
+
|
|
5
|
+
## Install
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
pip install switch-sdk
|
|
9
|
+
```
|
|
10
|
+
|
|
11
|
+
For local development:
|
|
12
|
+
|
|
13
|
+
```bash
|
|
14
|
+
pip install -e .[dev]
|
|
15
|
+
```
|
|
16
|
+
|
|
17
|
+
For local ExecuTorch runtime work:
|
|
18
|
+
|
|
19
|
+
```bash
|
|
20
|
+
pip install -e .[dev,local]
|
|
21
|
+
```
|
|
22
|
+
|
|
23
|
+
For packaging and publishing:
|
|
24
|
+
|
|
25
|
+
```bash
|
|
26
|
+
pip install -e .[publish]
|
|
27
|
+
```
|
|
28
|
+
|
|
29
|
+
Note: ExecuTorch wheels are not available for Python 3.14 yet. Use Python 3.10-3.13 (3.11 works well).
|
|
30
|
+
|
|
31
|
+
## Required values
|
|
32
|
+
|
|
33
|
+
- `base_url`: your gateway URL, for example `http://localhost:8000`
|
|
34
|
+
- `api_key`: your plain project key (for example `aura_...`), not the SHA256 hash
|
|
35
|
+
|
|
36
|
+
Environment shortcuts are supported:
|
|
37
|
+
|
|
38
|
+
- `SHIFT_BASE_URL` (fallback: `SWITCH_BASE_URL`)
|
|
39
|
+
- `SHIFT_API_KEY` (fallbacks: `SWITCH_API_KEY`, `API_KEY`)
|
|
40
|
+
|
|
41
|
+
## Quick start
|
|
42
|
+
|
|
43
|
+
```python
|
|
44
|
+
import asyncio
|
|
45
|
+
from switch_sdk import SwitchClient, ChatMessage
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
async def main() -> None:
|
|
49
|
+
async with SwitchClient.from_env() as client:
|
|
50
|
+
completion = await client.chat(
|
|
51
|
+
model="gpt-5",
|
|
52
|
+
messages=[ChatMessage(role="user", content="Reply with: SDK_OK")],
|
|
53
|
+
residency="US",
|
|
54
|
+
sla="realtime",
|
|
55
|
+
capability_flags={"force_cloud": True, "preferred_region": "eastus"},
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
print(completion.choices[0].message.content)
|
|
59
|
+
print(completion.switch_meta["route"]["target"]["region"])
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
asyncio.run(main())
|
|
63
|
+
```
|
|
64
|
+
|
|
65
|
+
Set env vars first:
|
|
66
|
+
|
|
67
|
+
```bash
|
|
68
|
+
export SHIFT_BASE_URL=http://localhost:8000
|
|
69
|
+
export SHIFT_API_KEY=aura_your_plain_project_key
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
## Hybrid local-first mode (ExecuTorch-ready)
|
|
73
|
+
|
|
74
|
+
`chat_hybrid()` tries local execution first, then falls back to cloud when needed.
|
|
75
|
+
Local models are cached on disk and downloaded only once per model version.
|
|
76
|
+
|
|
77
|
+
```python
|
|
78
|
+
import asyncio
|
|
79
|
+
from switch_sdk import ChatMessage, LocalModelManager, SwitchClient
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
manifest = [
|
|
83
|
+
{
|
|
84
|
+
"model_id": "smollm2-135m",
|
|
85
|
+
"task": "chat",
|
|
86
|
+
"download_url": "https://your-model-host/smollm2-135m.pte",
|
|
87
|
+
"sha256": "replace_with_sha256",
|
|
88
|
+
"size_mb": 550,
|
|
89
|
+
"min_ram_gb": 4,
|
|
90
|
+
"max_prompt_chars": 280,
|
|
91
|
+
"rank": 10,
|
|
92
|
+
},
|
|
93
|
+
]
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
async def main() -> None:
|
|
97
|
+
manager = LocalModelManager(cache_dir="~/.shift/models", manifest=manifest)
|
|
98
|
+
# Optional: real ExecuTorch adapter (requires deps below)
|
|
99
|
+
from switch_sdk import build_executorch_text_runtime
|
|
100
|
+
local_runtime = build_executorch_text_runtime(
|
|
101
|
+
tokenizer_source="HuggingFaceTB/SmolLM2-135M-Instruct",
|
|
102
|
+
max_new_tokens=96,
|
|
103
|
+
prefer_optimum=True,
|
|
104
|
+
)
|
|
105
|
+
|
|
106
|
+
async with SwitchClient(
|
|
107
|
+
base_url="http://localhost:8000",
|
|
108
|
+
api_key="aura_your_plain_project_key",
|
|
109
|
+
local_model_manager=manager,
|
|
110
|
+
local_runtime=local_runtime,
|
|
111
|
+
) as client:
|
|
112
|
+
completion = await client.chat_hybrid(
|
|
113
|
+
model="auto",
|
|
114
|
+
messages=[ChatMessage(role="user", content="Reply exactly: LOCAL_OK")],
|
|
115
|
+
capability_flags={"auto_model": True},
|
|
116
|
+
)
|
|
117
|
+
print(completion.model)
|
|
118
|
+
print(completion.choices[0].message.content)
|
|
119
|
+
print(completion.switch_meta)
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
asyncio.run(main())
|
|
123
|
+
```
|
|
124
|
+
|
|
125
|
+
Notes:
|
|
126
|
+
- Default local runtime is a stub (for wiring/tests).
|
|
127
|
+
- `build_executorch_text_runtime(...)` provides a real adapter that prefers Optimum ExecuTorch and falls back to raw `executorch.runtime`.
|
|
128
|
+
- Cache path format: `~/.shift/models/<model_id>/<version>/model.pte`
|
|
129
|
+
- LRU eviction is applied when cache exceeds `max_cache_gb`.
|
|
130
|
+
|
|
131
|
+
Install local runtime dependencies:
|
|
132
|
+
|
|
133
|
+
```bash
|
|
134
|
+
pip install -e .[local]
|
|
135
|
+
```
|
|
136
|
+
|
|
137
|
+
Ready-made demo manifest:
|
|
138
|
+
|
|
139
|
+
- `/Users/proguy/Documents/projects/switch/switch-sdk/examples/local_manifest_smollm2_135m.json`
|
|
140
|
+
|
|
141
|
+
Runtime callable contract:
|
|
142
|
+
|
|
143
|
+
```python
|
|
144
|
+
from switch_sdk import ChatMessage, LocalModelHandle
|
|
145
|
+
|
|
146
|
+
async def my_executorch_runtime(messages: list[ChatMessage], handle: LocalModelHandle) -> str:
|
|
147
|
+
# Load/use handle.path (.pte) with your ExecuTorch integration.
|
|
148
|
+
# Return assistant text.
|
|
149
|
+
return "LOCAL_EXECUTORCH_OK"
|
|
150
|
+
```
|
|
151
|
+
|
|
152
|
+
## Routing-only call
|
|
153
|
+
|
|
154
|
+
```python
|
|
155
|
+
decision = await client.route(
|
|
156
|
+
model="gpt-5",
|
|
157
|
+
residency="US",
|
|
158
|
+
sla="realtime",
|
|
159
|
+
capability_flags={"force_cloud": True},
|
|
160
|
+
)
|
|
161
|
+
|
|
162
|
+
print(decision.target.region)
|
|
163
|
+
print(decision.scores)
|
|
164
|
+
print(decision.candidate_breakdown)
|
|
165
|
+
```
|
|
166
|
+
|
|
167
|
+
## Dashboard + carbon endpoints
|
|
168
|
+
|
|
169
|
+
```python
|
|
170
|
+
summary = await client.get_dashboard_summary()
|
|
171
|
+
feed = await client.get_dashboard_feed(limit=20)
|
|
172
|
+
carbon = await client.get_live_carbon()
|
|
173
|
+
|
|
174
|
+
print(summary.summary.total_requests)
|
|
175
|
+
print(len(feed.items))
|
|
176
|
+
print(carbon.provider, carbon.regions.get("eastus"))
|
|
177
|
+
```
|
|
178
|
+
|
|
179
|
+
## Custom telemetry event
|
|
180
|
+
|
|
181
|
+
```python
|
|
182
|
+
from switch_sdk import TelemetryEvent
|
|
183
|
+
|
|
184
|
+
await client.track_event(
|
|
185
|
+
TelemetryEvent(
|
|
186
|
+
event_type="sdk_custom",
|
|
187
|
+
request_id="custom-123",
|
|
188
|
+
model="gpt-5",
|
|
189
|
+
metadata={"feature": "my_feature"},
|
|
190
|
+
)
|
|
191
|
+
)
|
|
192
|
+
|
|
193
|
+
await client.flush_telemetry()
|
|
194
|
+
```
|
|
195
|
+
|
|
196
|
+
## Error handling
|
|
197
|
+
|
|
198
|
+
```python
|
|
199
|
+
from switch_sdk import SwitchAPIError, SwitchNetworkError, SwitchTimeoutError
|
|
200
|
+
|
|
201
|
+
try:
|
|
202
|
+
await client.route(model="gpt-5")
|
|
203
|
+
except SwitchAPIError as exc:
|
|
204
|
+
print(exc.status_code, exc.detail)
|
|
205
|
+
except SwitchTimeoutError:
|
|
206
|
+
print("Request timed out")
|
|
207
|
+
except SwitchNetworkError as exc:
|
|
208
|
+
print(f"Network issue: {exc}")
|
|
209
|
+
```
|
|
210
|
+
|
|
211
|
+
## Notes
|
|
212
|
+
|
|
213
|
+
- The SDK is async-first.
|
|
214
|
+
- Use `async with SwitchClient(...)` so telemetry flushes cleanly on exit.
|
|
215
|
+
- Retries/backoff are built in for transient failures.
|
|
216
|
+
- Telemetry is best-effort and never blocks successful chat/route calls.
|
|
217
|
+
|
|
218
|
+
## Live switching checks
|
|
219
|
+
|
|
220
|
+
Automatic east/west region-switch verification script:
|
|
221
|
+
|
|
222
|
+
```bash
|
|
223
|
+
cd switch-sdk
|
|
224
|
+
.venv/bin/python examples/test_region_switching.py \
|
|
225
|
+
--base-url http://localhost:8000 \
|
|
226
|
+
--api-key aura_your_plain_project_key \
|
|
227
|
+
--east-region eastus \
|
|
228
|
+
--west-region westus \
|
|
229
|
+
--central-region centralus \
|
|
230
|
+
--check-chat
|
|
231
|
+
```
|
|
232
|
+
|
|
233
|
+
## From-env example script
|
|
234
|
+
|
|
235
|
+
```bash
|
|
236
|
+
export SHIFT_BASE_URL=http://localhost:8000
|
|
237
|
+
export SHIFT_API_KEY=aura_your_plain_project_key
|
|
238
|
+
python /Users/proguy/Documents/projects/switch/switch-sdk/examples/test_from_env.py
|
|
239
|
+
```
|
|
240
|
+
|
|
241
|
+
## Local ExecuTorch sanity check
|
|
242
|
+
|
|
243
|
+
Force local execution and fail if local runtime does not work:
|
|
244
|
+
|
|
245
|
+
```bash
|
|
246
|
+
cd /Users/proguy/Documents/projects/switch/switch-sdk
|
|
247
|
+
.venv311/bin/python examples/test_hybrid_local.py \
|
|
248
|
+
--base-url http://localhost:8000 \
|
|
249
|
+
--api-key dummy_local_only \
|
|
250
|
+
--manifest-path examples/local_manifest_smollm2_135m.json \
|
|
251
|
+
--executorch \
|
|
252
|
+
--prefer-runtime \
|
|
253
|
+
--tokenizer-source HuggingFaceTB/SmolLM2-135M-Instruct \
|
|
254
|
+
--no-download \
|
|
255
|
+
--no-cloud-fallback
|
|
256
|
+
```
|
|
257
|
+
|
|
258
|
+
Expected: output JSON includes `"source": "sdk-local"` in `switch_meta`.
|
|
259
|
+
|
|
260
|
+
## Release
|
|
261
|
+
|
|
262
|
+
See `/Users/proguy/Documents/projects/switch/switch-sdk/RELEASING.md` for TestPyPI and PyPI release steps.
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=68", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "switch-sdk"
|
|
7
|
+
version = "0.3.0"
|
|
8
|
+
description = "Python SDK for Shift managed AI routing, telemetry, and local-first execution"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
requires-python = ">=3.10"
|
|
11
|
+
authors = [{ name = "Shift" }]
|
|
12
|
+
license = "LicenseRef-Proprietary"
|
|
13
|
+
keywords = ["ai", "llm", "routing", "telemetry", "sustainability", "executorch"]
|
|
14
|
+
classifiers = [
|
|
15
|
+
"Development Status :: 3 - Alpha",
|
|
16
|
+
"Intended Audience :: Developers",
|
|
17
|
+
"Programming Language :: Python :: 3",
|
|
18
|
+
"Programming Language :: Python :: 3.10",
|
|
19
|
+
"Programming Language :: Python :: 3.11",
|
|
20
|
+
"Programming Language :: Python :: 3.12",
|
|
21
|
+
"Programming Language :: Python :: 3.13",
|
|
22
|
+
"Programming Language :: Python :: 3.14",
|
|
23
|
+
"Operating System :: OS Independent",
|
|
24
|
+
"Typing :: Typed",
|
|
25
|
+
]
|
|
26
|
+
dependencies = [
|
|
27
|
+
"httpx>=0.27.0"
|
|
28
|
+
]
|
|
29
|
+
|
|
30
|
+
[project.optional-dependencies]
|
|
31
|
+
dev = [
|
|
32
|
+
"pytest>=8.0.0",
|
|
33
|
+
"pytest-asyncio>=0.23.0",
|
|
34
|
+
"respx>=0.21.0",
|
|
35
|
+
]
|
|
36
|
+
local = [
|
|
37
|
+
"torch>=2.2.0; python_version < '3.14'",
|
|
38
|
+
"transformers>=4.41.0; python_version < '3.14'",
|
|
39
|
+
"optimum-executorch>=1.1.0; python_version < '3.14'",
|
|
40
|
+
]
|
|
41
|
+
publish = [
|
|
42
|
+
"build>=1.2.2",
|
|
43
|
+
"twine>=5.1.1",
|
|
44
|
+
]
|
|
45
|
+
|
|
46
|
+
[tool.setuptools]
|
|
47
|
+
package-dir = {"" = "src"}
|
|
48
|
+
|
|
49
|
+
[tool.setuptools.packages.find]
|
|
50
|
+
where = ["src"]
|
|
51
|
+
|
|
52
|
+
[tool.setuptools.package-data]
|
|
53
|
+
switch_sdk = ["py.typed"]
|
|
54
|
+
|
|
55
|
+
[tool.pytest.ini_options]
|
|
56
|
+
asyncio_mode = "auto"
|
|
57
|
+
pythonpath = ["src"]
|
|
58
|
+
markers = [
|
|
59
|
+
"integration: live tests that hit a running Shift gateway",
|
|
60
|
+
]
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
from importlib.metadata import PackageNotFoundError, version
|
|
2
|
+
|
|
3
|
+
from switch_sdk.client import SwitchClient
|
|
4
|
+
from switch_sdk.context import switch_trace, trace_execution
|
|
5
|
+
from switch_sdk.errors import (
|
|
6
|
+
SwitchAPIError,
|
|
7
|
+
SwitchClientNotStartedError,
|
|
8
|
+
SwitchLocalModelError,
|
|
9
|
+
SwitchNetworkError,
|
|
10
|
+
SwitchSDKError,
|
|
11
|
+
SwitchTimeoutError,
|
|
12
|
+
)
|
|
13
|
+
from switch_sdk.executorch_runtime import (
|
|
14
|
+
ExecuTorchRuntimeConfig,
|
|
15
|
+
ExecuTorchTextRuntime,
|
|
16
|
+
build_executorch_text_runtime,
|
|
17
|
+
)
|
|
18
|
+
from switch_sdk.local_models import HardwareProfile, LocalModelHandle, LocalModelManager, LocalModelSpec
|
|
19
|
+
from switch_sdk.local_runtime import LocalChatRuntime, default_stub_local_runtime
|
|
20
|
+
from switch_sdk.models import (
|
|
21
|
+
CarbonLiveResponse,
|
|
22
|
+
CandidateScoreBreakdown,
|
|
23
|
+
ChatChoice,
|
|
24
|
+
ChatChoiceMessage,
|
|
25
|
+
ChatCompletion,
|
|
26
|
+
ChatMessage,
|
|
27
|
+
ChatRequest,
|
|
28
|
+
ChatUsage,
|
|
29
|
+
DashboardFeedItem,
|
|
30
|
+
DashboardFeedResponse,
|
|
31
|
+
DashboardSummary,
|
|
32
|
+
DashboardSummaryResponse,
|
|
33
|
+
RouteDecision,
|
|
34
|
+
RouteRequest,
|
|
35
|
+
RouteTarget,
|
|
36
|
+
RoutingWeights,
|
|
37
|
+
TelemetryEvent,
|
|
38
|
+
TelemetryIngestResult,
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
try:
|
|
42
|
+
__version__ = version('switch-sdk')
|
|
43
|
+
except PackageNotFoundError:
|
|
44
|
+
__version__ = '0.0.0'
|
|
45
|
+
|
|
46
|
+
__all__ = [
|
|
47
|
+
'__version__',
|
|
48
|
+
'CarbonLiveResponse',
|
|
49
|
+
'CandidateScoreBreakdown',
|
|
50
|
+
'ChatChoice',
|
|
51
|
+
'ChatChoiceMessage',
|
|
52
|
+
'ChatCompletion',
|
|
53
|
+
'ChatMessage',
|
|
54
|
+
'ChatRequest',
|
|
55
|
+
'ChatUsage',
|
|
56
|
+
'DashboardFeedItem',
|
|
57
|
+
'DashboardFeedResponse',
|
|
58
|
+
'DashboardSummary',
|
|
59
|
+
'DashboardSummaryResponse',
|
|
60
|
+
'RouteDecision',
|
|
61
|
+
'RouteRequest',
|
|
62
|
+
'RouteTarget',
|
|
63
|
+
'RoutingWeights',
|
|
64
|
+
'SwitchAPIError',
|
|
65
|
+
'SwitchClient',
|
|
66
|
+
'SwitchClientNotStartedError',
|
|
67
|
+
'SwitchLocalModelError',
|
|
68
|
+
'SwitchNetworkError',
|
|
69
|
+
'SwitchSDKError',
|
|
70
|
+
'SwitchTimeoutError',
|
|
71
|
+
'TelemetryEvent',
|
|
72
|
+
'TelemetryIngestResult',
|
|
73
|
+
'ExecuTorchRuntimeConfig',
|
|
74
|
+
'ExecuTorchTextRuntime',
|
|
75
|
+
'build_executorch_text_runtime',
|
|
76
|
+
'HardwareProfile',
|
|
77
|
+
'LocalModelSpec',
|
|
78
|
+
'LocalModelHandle',
|
|
79
|
+
'LocalModelManager',
|
|
80
|
+
'LocalChatRuntime',
|
|
81
|
+
'default_stub_local_runtime',
|
|
82
|
+
'switch_trace',
|
|
83
|
+
'trace_execution',
|
|
84
|
+
]
|