terradev-cli 3.5.3__tar.gz → 3.7.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- terradev_cli-3.7.0/PKG-INFO +658 -0
- terradev_cli-3.7.0/README.md +607 -0
- terradev_cli-3.7.0/core/helm_generator.py +923 -0
- {terradev_cli-3.5.3 → terradev_cli-3.7.0}/core/stripe_manager.py +90 -9
- {terradev_cli-3.5.3 → terradev_cli-3.7.0}/core/training_orchestrator.py +162 -0
- {terradev_cli-3.5.3 → terradev_cli-3.7.0}/ml_services/kubernetes_enhanced.py +249 -13
- {terradev_cli-3.5.3 → terradev_cli-3.7.0}/ml_services/kubernetes_service.py +4 -4
- {terradev_cli-3.5.3 → terradev_cli-3.7.0}/setup.py +4 -4
- terradev_cli-3.7.0/terradev_cli/__init__.py +1 -0
- {terradev_cli-3.5.3 → terradev_cli-3.7.0/terradev_cli}/cli.py +264 -193
- terradev_cli-3.7.0/terradev_cli.egg-info/PKG-INFO +658 -0
- {terradev_cli-3.5.3 → terradev_cli-3.7.0}/terradev_cli.egg-info/SOURCES.txt +2 -1
- terradev_cli-3.7.0/terradev_cli.egg-info/entry_points.txt +2 -0
- {terradev_cli-3.5.3 → terradev_cli-3.7.0}/terradev_cli.egg-info/top_level.txt +1 -1
- terradev_cli-3.5.3/PKG-INFO +0 -337
- terradev_cli-3.5.3/README.md +0 -286
- terradev_cli-3.5.3/core/helm_generator.py +0 -769
- terradev_cli-3.5.3/terradev_cli.egg-info/PKG-INFO +0 -337
- terradev_cli-3.5.3/terradev_cli.egg-info/entry_points.txt +0 -2
- {terradev_cli-3.5.3 → terradev_cli-3.7.0}/core/__init__.py +0 -0
- {terradev_cli-3.5.3 → terradev_cli-3.7.0}/core/auth.py +0 -0
- {terradev_cli-3.5.3 → terradev_cli-3.7.0}/core/checkpoint_manager.py +0 -0
- {terradev_cli-3.5.3 → terradev_cli-3.7.0}/core/config.py +0 -0
- {terradev_cli-3.5.3 → terradev_cli-3.7.0}/core/cost_scaler.py +0 -0
- {terradev_cli-3.5.3 → terradev_cli-3.7.0}/core/cost_tracker.py +0 -0
- {terradev_cli-3.5.3 → terradev_cli-3.7.0}/core/dag_executor.py +0 -0
- {terradev_cli-3.5.3 → terradev_cli-3.7.0}/core/data_governance.py +0 -0
- {terradev_cli-3.5.3 → terradev_cli-3.7.0}/core/dataset_stager.py +0 -0
- {terradev_cli-3.5.3 → terradev_cli-3.7.0}/core/deployment_router.py +0 -0
- {terradev_cli-3.5.3 → terradev_cli-3.7.0}/core/drift_detector.py +0 -0
- {terradev_cli-3.5.3 → terradev_cli-3.7.0}/core/egress_optimizer.py +0 -0
- {terradev_cli-3.5.3 → terradev_cli-3.7.0}/core/gitops_manager.py +0 -0
- {terradev_cli-3.5.3 → terradev_cli-3.7.0}/core/gpu_topology.py +0 -0
- {terradev_cli-3.5.3 → terradev_cli-3.7.0}/core/helm_generator_old.py +0 -0
- {terradev_cli-3.5.3 → terradev_cli-3.7.0}/core/hf_cli_integration.py +0 -0
- {terradev_cli-3.5.3 → terradev_cli-3.7.0}/core/hf_smart_templates.py +0 -0
- {terradev_cli-3.5.3 → terradev_cli-3.7.0}/core/hf_spaces.py +0 -0
- {terradev_cli-3.5.3 → terradev_cli-3.7.0}/core/inference_router.py +0 -0
- {terradev_cli-3.5.3 → terradev_cli-3.7.0}/core/job_state_manager.py +0 -0
- {terradev_cli-3.5.3 → terradev_cli-3.7.0}/core/manifest_cache.py +0 -0
- {terradev_cli-3.5.3 → terradev_cli-3.7.0}/core/model_orchestrator.py +0 -0
- {terradev_cli-3.5.3 → terradev_cli-3.7.0}/core/parallel_provisioner.py +0 -0
- {terradev_cli-3.5.3 → terradev_cli-3.7.0}/core/preflight_validator.py +0 -0
- {terradev_cli-3.5.3 → terradev_cli-3.7.0}/core/price_discovery.py +0 -0
- {terradev_cli-3.5.3 → terradev_cli-3.7.0}/core/price_discovery_mock.py +0 -0
- {terradev_cli-3.5.3 → terradev_cli-3.7.0}/core/price_intelligence.py +0 -0
- {terradev_cli-3.5.3 → terradev_cli-3.7.0}/core/quick_start.py +0 -0
- {terradev_cli-3.5.3 → terradev_cli-3.7.0}/core/rate_limiter.py +0 -0
- {terradev_cli-3.5.3 → terradev_cli-3.7.0}/core/semantic_router.py +0 -0
- {terradev_cli-3.5.3 → terradev_cli-3.7.0}/core/semantic_signals/__init__.py +0 -0
- {terradev_cli-3.5.3 → terradev_cli-3.7.0}/core/semantic_signals/base_signal.py +0 -0
- {terradev_cli-3.5.3 → terradev_cli-3.7.0}/core/semantic_signals/complexity_signal.py +0 -0
- {terradev_cli-3.5.3 → terradev_cli-3.7.0}/core/semantic_signals/domain_signal.py +0 -0
- {terradev_cli-3.5.3 → terradev_cli-3.7.0}/core/semantic_signals/keyword_signal.py +0 -0
- {terradev_cli-3.5.3 → terradev_cli-3.7.0}/core/semantic_signals/language_signal.py +0 -0
- {terradev_cli-3.5.3 → terradev_cli-3.7.0}/core/semantic_signals/modality_signal.py +0 -0
- {terradev_cli-3.5.3 → terradev_cli-3.7.0}/core/semantic_signals/orchestrator.py +0 -0
- {terradev_cli-3.5.3 → terradev_cli-3.7.0}/core/semantic_signals/safety_signal.py +0 -0
- {terradev_cli-3.5.3 → terradev_cli-3.7.0}/core/ssh_key_manager.py +0 -0
- {terradev_cli-3.5.3 → terradev_cli-3.7.0}/core/telemetry.py +0 -0
- {terradev_cli-3.5.3 → terradev_cli-3.7.0}/core/telemetry_backup.py +0 -0
- {terradev_cli-3.5.3 → terradev_cli-3.7.0}/core/terradev_engine.py +0 -0
- {terradev_cli-3.5.3 → terradev_cli-3.7.0}/core/tier_manager.py +0 -0
- {terradev_cli-3.5.3 → terradev_cli-3.7.0}/core/trace_viewer.py +0 -0
- {terradev_cli-3.5.3 → terradev_cli-3.7.0}/core/training_monitor.py +0 -0
- {terradev_cli-3.5.3 → terradev_cli-3.7.0}/core/warm_pool_manager.py +0 -0
- {terradev_cli-3.5.3 → terradev_cli-3.7.0}/integrations/__init__.py +0 -0
- {terradev_cli-3.5.3 → terradev_cli-3.7.0}/integrations/datadog_integration.py +0 -0
- {terradev_cli-3.5.3 → terradev_cli-3.7.0}/integrations/prometheus_integration.py +0 -0
- {terradev_cli-3.5.3 → terradev_cli-3.7.0}/integrations/wandb_integration.py +0 -0
- {terradev_cli-3.5.3 → terradev_cli-3.7.0}/ml_services/__init__.py +0 -0
- {terradev_cli-3.5.3 → terradev_cli-3.7.0}/ml_services/dvc_service.py +0 -0
- {terradev_cli-3.5.3 → terradev_cli-3.7.0}/ml_services/guardrails_service.py +0 -0
- {terradev_cli-3.5.3 → terradev_cli-3.7.0}/ml_services/huggingface_service.py +0 -0
- {terradev_cli-3.5.3 → terradev_cli-3.7.0}/ml_services/kserve_service.py +0 -0
- {terradev_cli-3.5.3 → terradev_cli-3.7.0}/ml_services/kubernetes_enhanced_fixed.py +0 -0
- {terradev_cli-3.5.3 → terradev_cli-3.7.0}/ml_services/langchain_service.py +0 -0
- {terradev_cli-3.5.3 → terradev_cli-3.7.0}/ml_services/langgraph_service.py +0 -0
- {terradev_cli-3.5.3 → terradev_cli-3.7.0}/ml_services/langsmith_service.py +0 -0
- {terradev_cli-3.5.3 → terradev_cli-3.7.0}/ml_services/lmcache_service.py +0 -0
- {terradev_cli-3.5.3 → terradev_cli-3.7.0}/ml_services/mlflow_service.py +0 -0
- {terradev_cli-3.5.3 → terradev_cli-3.7.0}/ml_services/ollama_service.py +0 -0
- {terradev_cli-3.5.3 → terradev_cli-3.7.0}/ml_services/phoenix_service.py +0 -0
- {terradev_cli-3.5.3 → terradev_cli-3.7.0}/ml_services/qdrant_service.py +0 -0
- {terradev_cli-3.5.3 → terradev_cli-3.7.0}/ml_services/ray_enhanced.py +0 -0
- {terradev_cli-3.5.3 → terradev_cli-3.7.0}/ml_services/ray_service.py +0 -0
- {terradev_cli-3.5.3 → terradev_cli-3.7.0}/ml_services/sglang_service.py +0 -0
- {terradev_cli-3.5.3 → terradev_cli-3.7.0}/ml_services/vllm_service.py +0 -0
- {terradev_cli-3.5.3 → terradev_cli-3.7.0}/ml_services/wandb_enhanced.py +0 -0
- {terradev_cli-3.5.3 → terradev_cli-3.7.0}/ml_services/wandb_service.py +0 -0
- {terradev_cli-3.5.3 → terradev_cli-3.7.0}/providers/__init__.py +0 -0
- {terradev_cli-3.5.3 → terradev_cli-3.7.0}/providers/alibaba_provider.py +0 -0
- {terradev_cli-3.5.3 → terradev_cli-3.7.0}/providers/aws_provider.py +0 -0
- {terradev_cli-3.5.3 → terradev_cli-3.7.0}/providers/azure_provider.py +0 -0
- {terradev_cli-3.5.3 → terradev_cli-3.7.0}/providers/base_provider.py +0 -0
- {terradev_cli-3.5.3 → terradev_cli-3.7.0}/providers/baseten_provider.py +0 -0
- {terradev_cli-3.5.3 → terradev_cli-3.7.0}/providers/coreweave_provider.py +0 -0
- {terradev_cli-3.5.3 → terradev_cli-3.7.0}/providers/crusoe_provider.py +0 -0
- {terradev_cli-3.5.3 → terradev_cli-3.7.0}/providers/demo_mode.py +0 -0
- {terradev_cli-3.5.3 → terradev_cli-3.7.0}/providers/digitalocean_provider.py +0 -0
- {terradev_cli-3.5.3 → terradev_cli-3.7.0}/providers/fluidstack_provider.py +0 -0
- {terradev_cli-3.5.3 → terradev_cli-3.7.0}/providers/gcp_provider.py +0 -0
- {terradev_cli-3.5.3 → terradev_cli-3.7.0}/providers/hetzner_provider.py +0 -0
- {terradev_cli-3.5.3 → terradev_cli-3.7.0}/providers/huggingface_provider.py +0 -0
- {terradev_cli-3.5.3 → terradev_cli-3.7.0}/providers/hyperstack_provider.py +0 -0
- {terradev_cli-3.5.3 → terradev_cli-3.7.0}/providers/inferx_provider.py +0 -0
- {terradev_cli-3.5.3 → terradev_cli-3.7.0}/providers/lambda_labs_provider.py +0 -0
- {terradev_cli-3.5.3 → terradev_cli-3.7.0}/providers/oracle_provider.py +0 -0
- {terradev_cli-3.5.3 → terradev_cli-3.7.0}/providers/ovhcloud_provider.py +0 -0
- {terradev_cli-3.5.3 → terradev_cli-3.7.0}/providers/provider_factory.py +0 -0
- {terradev_cli-3.5.3 → terradev_cli-3.7.0}/providers/real_pricing.py +0 -0
- {terradev_cli-3.5.3 → terradev_cli-3.7.0}/providers/runpod_provider.py +0 -0
- {terradev_cli-3.5.3 → terradev_cli-3.7.0}/providers/siliconflow_provider.py +0 -0
- {terradev_cli-3.5.3 → terradev_cli-3.7.0}/providers/tensordock_provider.py +0 -0
- {terradev_cli-3.5.3 → terradev_cli-3.7.0}/providers/vastai_provider.py +0 -0
- {terradev_cli-3.5.3 → terradev_cli-3.7.0}/setup.cfg +0 -0
- {terradev_cli-3.5.3 → terradev_cli-3.7.0}/terradev_cli.egg-info/dependency_links.txt +0 -0
- {terradev_cli-3.5.3 → terradev_cli-3.7.0}/terradev_cli.egg-info/not-zip-safe +0 -0
- {terradev_cli-3.5.3 → terradev_cli-3.7.0}/terradev_cli.egg-info/requires.txt +0 -0
- {terradev_cli-3.5.3 → terradev_cli-3.7.0}/utils/__init__.py +0 -0
- {terradev_cli-3.5.3 → terradev_cli-3.7.0}/utils/formatters.py +0 -0
|
@@ -0,0 +1,658 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: terradev-cli
|
|
3
|
+
Version: 3.7.0
|
|
4
|
+
Summary: Cross-cloud GPU provisioning with Arize Phoenix observability, NeMo Guardrails safety, Qdrant vector DB, vLLM/SGLang inference, and Terraform-powered parallel provisioning
|
|
5
|
+
Home-page: https://github.com/theoddden/terradev
|
|
6
|
+
Author: Terradev Team
|
|
7
|
+
Author-email: team@terradev.com
|
|
8
|
+
Classifier: Development Status :: 4 - Beta
|
|
9
|
+
Classifier: Intended Audience :: Developers
|
|
10
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
11
|
+
Classifier: Programming Language :: Python :: 3
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.8
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
16
|
+
Requires-Python: >=3.8
|
|
17
|
+
Description-Content-Type: text/markdown
|
|
18
|
+
Requires-Dist: click>=8.0.0
|
|
19
|
+
Requires-Dist: aiohttp>=3.9.0
|
|
20
|
+
Requires-Dist: pyyaml>=6.0
|
|
21
|
+
Requires-Dist: requests>=2.31.0
|
|
22
|
+
Requires-Dist: stripe>=7.0.0
|
|
23
|
+
Provides-Extra: aws
|
|
24
|
+
Requires-Dist: boto3>=1.34.0; extra == "aws"
|
|
25
|
+
Provides-Extra: gcp
|
|
26
|
+
Requires-Dist: google-cloud-compute>=1.8.0; extra == "gcp"
|
|
27
|
+
Provides-Extra: azure
|
|
28
|
+
Requires-Dist: azure-mgmt-compute>=29.0.0; extra == "azure"
|
|
29
|
+
Requires-Dist: azure-identity>=1.12.0; extra == "azure"
|
|
30
|
+
Provides-Extra: oracle
|
|
31
|
+
Requires-Dist: oci>=2.118.0; extra == "oracle"
|
|
32
|
+
Provides-Extra: hf
|
|
33
|
+
Requires-Dist: huggingface-hub>=0.19.0; extra == "hf"
|
|
34
|
+
Provides-Extra: all
|
|
35
|
+
Requires-Dist: boto3>=1.34.0; extra == "all"
|
|
36
|
+
Requires-Dist: google-cloud-compute>=1.8.0; extra == "all"
|
|
37
|
+
Requires-Dist: azure-mgmt-compute>=29.0.0; extra == "all"
|
|
38
|
+
Requires-Dist: azure-identity>=1.12.0; extra == "all"
|
|
39
|
+
Requires-Dist: oci>=2.118.0; extra == "all"
|
|
40
|
+
Requires-Dist: huggingface-hub>=0.19.0; extra == "all"
|
|
41
|
+
Dynamic: author
|
|
42
|
+
Dynamic: author-email
|
|
43
|
+
Dynamic: classifier
|
|
44
|
+
Dynamic: description
|
|
45
|
+
Dynamic: description-content-type
|
|
46
|
+
Dynamic: home-page
|
|
47
|
+
Dynamic: provides-extra
|
|
48
|
+
Dynamic: requires-dist
|
|
49
|
+
Dynamic: requires-python
|
|
50
|
+
Dynamic: summary
|
|
51
|
+
|
|
52
|
+
# Terradev CLI v3.7.0
|
|
53
|
+
|
|
54
|
+
**Compare GPU prices across 20 clouds. Provision the cheapest one in one command. Arize Phoenix observability, NeMo Guardrails safety, Qdrant vector DB, and automatic vLLM optimization included.**
|
|
55
|
+
|
|
56
|
+
<p align="center">
|
|
57
|
+
<img src="https://raw.githubusercontent.com/theoddden/Terradev/main/demo/terradev-demo.gif" alt="Terradev CLI Demo" width="800">
|
|
58
|
+
</p>
|
|
59
|
+
|
|
60
|
+
## 🚀 What's New in v3.7.0
|
|
61
|
+
|
|
62
|
+
### 🧠 Automatic vLLM Workload-Based Optimization
|
|
63
|
+
The most advanced vLLM optimization system - automatically analyzes your workload patterns and optimizes the 6 critical knobs most teams never touch:
|
|
64
|
+
|
|
65
|
+
```bash
|
|
66
|
+
# Auto-optimize based on your actual workload
|
|
67
|
+
terradev vllm auto-optimize -s workload.json -m meta-llama/Llama-2-7b-hf -g 4
|
|
68
|
+
|
|
69
|
+
# Analyze running server for optimization
|
|
70
|
+
terradev vllm analyze -e http://localhost:8000
|
|
71
|
+
|
|
72
|
+
# Generate optimized configurations
|
|
73
|
+
terradev vllm optimize -m mistralai/Mistral-7B-v0.1 -t throughput -o helm
|
|
74
|
+
```
|
|
75
|
+
|
|
76
|
+
**Performance Gains:**
|
|
77
|
+
- **2-8x throughput improvements** typical
|
|
78
|
+
- **10-20% latency reduction** for sensitive workloads
|
|
79
|
+
- **Zero manual tuning required** - intelligent workload analysis
|
|
80
|
+
- **Dynamic adaptation** - adjusts as patterns change
|
|
81
|
+
|
|
82
|
+
### 🔧 The 6 Critical vLLM Knobs
|
|
83
|
+
1. **`--max-num-batched-tokens`** - 2048→16384 (8x throughput)
|
|
84
|
+
2. **`--gpu-memory-utilization`** - 0.90→0.95 (5% more VRAM)
|
|
85
|
+
3. **`--max-num-seqs`** - 256/1024→512-2048 (prevent queuing)
|
|
86
|
+
4. **`--enable-prefix-caching`** - OFF→ON (free throughput win)
|
|
87
|
+
5. **`--enable-chunked-prefill`** - OFF→ON (better prefill)
|
|
88
|
+
6. **CPU Core Allocation** - 2 + #GPUs (prevent starvation)
|
|
89
|
+
|
|
90
|
+
### 🎯 Smart Workload Analysis
|
|
91
|
+
- **QPS Pattern Recognition** - High traffic → larger batches
|
|
92
|
+
- **Prompt Length Analysis** - Long prompts → chunked prefill
|
|
93
|
+
- **Concurrency Detection** - Multi-user → higher sequence limits
|
|
94
|
+
- **Latency Sensitivity** - Balanced vs aggressive optimization
|
|
95
|
+
- **Memory Pressure** - Conservative vs aggressive GPU utilization
|
|
96
|
+
|
|
97
|
+
## GitOps Automation
|
|
98
|
+
|
|
99
|
+
Production-ready GitOps workflows based on real-world Kubernetes experience:
|
|
100
|
+
|
|
101
|
+
```bash
|
|
102
|
+
# Initialize GitOps repository
|
|
103
|
+
terradev gitops init --provider github --repo my-org/infra --tool argocd --cluster production
|
|
104
|
+
|
|
105
|
+
# Bootstrap GitOps tool on cluster
|
|
106
|
+
terradev gitops bootstrap --tool argocd --cluster production
|
|
107
|
+
|
|
108
|
+
# Sync cluster with Git repository
|
|
109
|
+
terradev gitops sync --cluster production --environment prod
|
|
110
|
+
|
|
111
|
+
# Validate configuration
|
|
112
|
+
terradev gitops validate --dry-run --cluster production
|
|
113
|
+
```
|
|
114
|
+
|
|
115
|
+
### GitOps Features
|
|
116
|
+
- **Multi-Provider Support**: GitHub, GitLab, Bitbucket, Azure DevOps
|
|
117
|
+
- **Tool Integration**: ArgoCD and Flux CD support
|
|
118
|
+
- **Repository Structure**: Automated GitOps repository setup
|
|
119
|
+
- **Policy as Code**: Gatekeeper/Kyverno policy templates
|
|
120
|
+
- **Multi-Environment**: Dev, staging, production environments
|
|
121
|
+
- **Resource Management**: Automated quotas and network policies
|
|
122
|
+
- **Validation**: Dry-run and apply validation
|
|
123
|
+
- **Security**: Best practices and compliance policies
|
|
124
|
+
|
|
125
|
+
### GitOps Repository Structure
|
|
126
|
+
```
|
|
127
|
+
my-infra/
|
|
128
|
+
├── clusters/
|
|
129
|
+
│ ├── dev/
|
|
130
|
+
│ ├── staging/
|
|
131
|
+
│ └── prod/
|
|
132
|
+
├── apps/
|
|
133
|
+
├── infra/
|
|
134
|
+
├── policies/
|
|
135
|
+
└── monitoring/
|
|
136
|
+
```
|
|
137
|
+
|
|
138
|
+
## HuggingFace Spaces Integration
|
|
139
|
+
|
|
140
|
+
Deploy any HuggingFace model to Spaces with one command:
|
|
141
|
+
|
|
142
|
+
```bash
|
|
143
|
+
# Install HF Spaces support
|
|
144
|
+
pip install terradev-cli[hf]
|
|
145
|
+
|
|
146
|
+
# Set your HF token
|
|
147
|
+
export HF_TOKEN=your_huggingface_token
|
|
148
|
+
|
|
149
|
+
# Deploy Llama 2 with one click
|
|
150
|
+
terradev hf-space my-llama --model-id meta-llama/Llama-2-7b-hf --template llm
|
|
151
|
+
|
|
152
|
+
# Deploy custom model with GPU
|
|
153
|
+
terradev hf-space my-model --model-id microsoft/DialoGPT-medium \
|
|
154
|
+
--hardware a10g-large --sdk gradio
|
|
155
|
+
|
|
156
|
+
# Result:
|
|
157
|
+
# Space URL: https://huggingface.co/spaces/username/my-llama
|
|
158
|
+
# 100k+ researchers can now access your model!
|
|
159
|
+
```
|
|
160
|
+
|
|
161
|
+
### HF Spaces Features
|
|
162
|
+
- **One-Click Deployment**: No manual configuration required
|
|
163
|
+
- **Template-Based**: LLM, embedding, and image model templates
|
|
164
|
+
- **Multi-Hardware**: CPU-basic to A100-large GPU tiers
|
|
165
|
+
- **Auto-Generated Apps**: Gradio, Streamlit, and Docker support
|
|
166
|
+
- **Revenue Streams**: Hardware upgrades, private spaces, template licensing
|
|
167
|
+
|
|
168
|
+
### Available Templates
|
|
169
|
+
```bash
|
|
170
|
+
# LLM Template (A10G GPU)
|
|
171
|
+
terradev hf-space my-llama --model-id meta-llama/Llama-2-7b-hf --template llm
|
|
172
|
+
|
|
173
|
+
# Embedding Template (CPU-upgrade)
|
|
174
|
+
terradev hf-space my-embeddings --model-id sentence-transformers/all-MiniLM-L6-v2 --template embedding
|
|
175
|
+
|
|
176
|
+
# Image Model Template (T4 GPU)
|
|
177
|
+
terradev hf-space my-image --model-id runwayml/stable-diffusion-v1-5 --template image
|
|
178
|
+
```
|
|
179
|
+
|
|
180
|
+
## MoE Cluster Templates (NEW in v3.2.0)
|
|
181
|
+
|
|
182
|
+
Production-ready cluster configs optimized for Mixture-of-Experts models — the dominant architecture for every major 2026 release (GLM-5, Qwen 3.5, Mistral Large 3, DeepSeek V4, Llama 5).
|
|
183
|
+
|
|
184
|
+
```bash
|
|
185
|
+
# Deploy any MoE model with one command
|
|
186
|
+
terradev provision --task clusters/moe-template/task.yaml \
|
|
187
|
+
--set model_id=zai-org/GLM-5-FP8 --set tp_size=8
|
|
188
|
+
|
|
189
|
+
# Or Qwen 3.5 flagship
|
|
190
|
+
terradev provision --task clusters/moe-template/task.yaml \
|
|
191
|
+
--set model_id=Qwen/Qwen3.5-397B-A17B
|
|
192
|
+
|
|
193
|
+
# Kubernetes
|
|
194
|
+
kubectl apply -f clusters/moe-template/k8s/
|
|
195
|
+
|
|
196
|
+
# Helm
|
|
197
|
+
helm upgrade --install moe-inf ./helm/terradev \
|
|
198
|
+
-f clusters/moe-template/helm/values-moe.yaml \
|
|
199
|
+
--set model.id=zai-org/GLM-5-FP8
|
|
200
|
+
```
|
|
201
|
+
|
|
202
|
+
### MoE Template Features
|
|
203
|
+
- **Any MoE Model**: Parameterized for GLM-5, Qwen 3.5, Mistral Large 3, DeepSeek V4, Llama 5
|
|
204
|
+
- **NVLink Topology**: Enforced single-node TP with NUMA alignment
|
|
205
|
+
- **vLLM + SGLang**: Both serving backends supported
|
|
206
|
+
- **FP8 Quantization**: Half the VRAM of BF16 on H100/H200
|
|
207
|
+
- **GPU-Aware Autoscaling**: HPA on DCGM metrics and vLLM queue depth
|
|
208
|
+
- **Multi-Cloud**: RunPod, Vast.ai, Lambda, AWS, CoreWeave
|
|
209
|
+
|
|
210
|
+
### Auto-Applied Cost Optimizations (v3.5.2)
|
|
211
|
+
|
|
212
|
+
Every MoE deployment automatically includes vLLM optimizations that reduce your inference costs — no configuration needed:
|
|
213
|
+
|
|
214
|
+
| Optimization | What it does | Impact |
|
|
215
|
+
|---|---|---|
|
|
216
|
+
| **FlashInfer Fused Attention** | Persistent kernel fuses RMSNorm + QKV + RoPE + attention — eliminates ~50% memory pipeline bubbles between kernel launches | ~1.7x memory bandwidth utilization |
|
|
217
|
+
| **LMCache Distributed KV Cache** | Shares KV cache across vLLM instances via Redis — eliminates redundant prefill computation | 3-10x TTFT reduction |
|
|
218
|
+
| **KV Cache Offloading** | Spills KV cache to CPU DRAM so the GPU never recomputes prefills | Up to 9x throughput |
|
|
219
|
+
| **MTP Speculative Decoding** | Small draft predictions verified in batch by the full model | Up to 2.8x generation speed |
|
|
220
|
+
| **Sleep Mode** | Idle models hibernate to CPU RAM instead of holding GPU memory | 18-200x faster than cold restart |
|
|
221
|
+
| **Expert Parallel Load Balancer** | Rebalances MoE expert routing at runtime based on actual traffic | Eliminates GPU hotspots |
|
|
222
|
+
| **DeepEP + DeepGEMM** | Optimized all-to-all and GEMM kernels for MoE expert computation | Lower per-token latency |
|
|
223
|
+
|
|
224
|
+
### Multi-LoRA Serving (v3.5.0)
|
|
225
|
+
|
|
226
|
+
Serve **N fine-tuned models on one base MoE model, on one GPU set**. Uses vLLM's `fused_moe_lora` kernel (454% higher output tokens/sec, 87% lower TTFT). Supported: GPT-OSS, Qwen3-MoE, DeepSeek, Llama MoE.
|
|
227
|
+
|
|
228
|
+
```bash
|
|
229
|
+
# Deploy base model normally — optimizations are automatic
|
|
230
|
+
terradev provision --task clusters/moe-template/task.yaml \
|
|
231
|
+
--set model_id=Qwen/Qwen3.5-397B-A17B
|
|
232
|
+
|
|
233
|
+
# Hot-load customer adapters onto the running endpoint
|
|
234
|
+
terradev lora add -e http://<endpoint>:8000 -n customer-a -p /adapters/customer-a
|
|
235
|
+
terradev lora add -e http://<endpoint>:8000 -n customer-b -p /adapters/customer-b
|
|
236
|
+
|
|
237
|
+
# Each adapter is a model name in the OpenAI-compatible API
|
|
238
|
+
curl http://<endpoint>:8000/v1/chat/completions \
|
|
239
|
+
-d '{"model": "customer-a", "messages": [...]}'
|
|
240
|
+
|
|
241
|
+
# List / remove adapters
|
|
242
|
+
terradev lora list -e http://<endpoint>:8000
|
|
243
|
+
terradev lora remove -e http://<endpoint>:8000 -n customer-b
|
|
244
|
+
```
|
|
245
|
+
|
|
246
|
+
See [`clusters/moe-template/`](clusters/moe-template/) for full docs and [`clusters/glm-5/`](clusters/glm-5/) for a model-specific example.
|
|
247
|
+
|
|
248
|
+
## Production Resilience + Training Pipeline (NEW in v3.4.0)
|
|
249
|
+
|
|
250
|
+
v3.4.0 makes Terradev production-ready in **any environment** — including sandboxed runtimes like Claude Code — by eliminating hard-crash import failures and adding a complete training orchestration pipeline.
|
|
251
|
+
|
|
252
|
+
### v3.4.0 Features
|
|
253
|
+
|
|
254
|
+
- **Lazy Provider Loading**: `ProviderFactory` no longer eagerly imports all cloud SDKs. Each provider is loaded on first use — missing `boto3` won't crash the CLI if you're only using RunPod.
|
|
255
|
+
- **Graceful Dependency Fallbacks**: `stripe`, `numpy`, `boto3` wrapped in `try/except` with clear error messages. The CLI boots and runs every local command even with zero optional deps installed.
|
|
256
|
+
- **stdlib NumPy Shim**: `price_discovery` and `cost_optimizer` fall back to Python's `statistics` module when NumPy is absent.
|
|
257
|
+
- **Training Orchestrator**: DAG-parallel training launch across multi-node GPU clusters via `torchrun`, `deepspeed`, `accelerate`, or `megatron`.
|
|
258
|
+
- **Training Monitor**: Real-time GPU utilization, memory, temperature, and cost tracking per node.
|
|
259
|
+
- **Checkpoint Manager**: DAG-parallel shard writes with manifest assembly, remote upload, and state DB tracking.
|
|
260
|
+
- **Preflight Validator**: Pre-launch checks for GPU availability, NCCL, RDMA, and driver versions across all nodes.
|
|
261
|
+
- **Job State Manager**: SQLite-backed job lifecycle (created → running → completed/failed) with checkpoint history.
|
|
262
|
+
- **Provision-to-Train Bridge**: `terradev train --from-provision latest` resolves IPs from your last `provision` command automatically.
|
|
263
|
+
- **294 Tests Passing**: Comprehensive test suite covering core modules, provider contracts, and CLI smoke tests.
|
|
264
|
+
|
|
265
|
+
```bash
|
|
266
|
+
# Full training pipeline
|
|
267
|
+
terradev provision -g H100 -n 4 --parallel 6 # Provision 4x H100s
|
|
268
|
+
terradev train --script train.py --from-provision latest # Launch on provisioned nodes
|
|
269
|
+
terradev train-status # Check all training jobs
|
|
270
|
+
terradev checkpoint list --job my-job # List checkpoints
|
|
271
|
+
terradev monitor --job my-job # Live GPU metrics
|
|
272
|
+
```
|
|
273
|
+
|
|
274
|
+
## Ray Serve LLM + Expert Parallelism (NEW in v3.3.0)
|
|
275
|
+
|
|
276
|
+
v3.3.0 adds first-class support for **Wide Expert Parallelism (EP)**, **disaggregated Prefill/Decode serving**, and **NIXL KV cache transfer** — the production stack for serving 600B+ MoE models at scale.
|
|
277
|
+
|
|
278
|
+
```bash
|
|
279
|
+
# Deploy GLM-5 with Wide-EP across 32 GPUs (TP=1, DP=32)
|
|
280
|
+
terradev ml ray --deploy-wide-ep \
|
|
281
|
+
--model zai-org/GLM-5-FP8 \
|
|
282
|
+
--tp-size 1 --dp-size 32
|
|
283
|
+
|
|
284
|
+
# Disaggregated Prefill/Decode with NIXL KV transfer
|
|
285
|
+
terradev ml ray --deploy-pd \
|
|
286
|
+
--model zai-org/GLM-5-FP8 \
|
|
287
|
+
--prefill-tp 8 --decode-tp 1 --decode-dp 24
|
|
288
|
+
|
|
289
|
+
# SGLang serving with EP + EPLB + DBO
|
|
290
|
+
terradev ml sglang --start --instance-ip <IP> \
|
|
291
|
+
--model zai-org/GLM-5-FP8 \
|
|
292
|
+
--tp-size 1 --dp-size 8 \
|
|
293
|
+
--enable-expert-parallel --enable-eplb --enable-dbo
|
|
294
|
+
```
|
|
295
|
+
|
|
296
|
+
### v3.3.0 Features
|
|
297
|
+
- **Ray Serve LLM Integration**: `build_dp_deployment` and `build_pd_openai_app` for Wide-EP and disaggregated P/D serving via Ray Serve
|
|
298
|
+
- **Expert Parallelism (EP)**: Distribute MoE experts across GPUs — serve 744B models on 8 GPUs where pure TP would OOM
|
|
299
|
+
- **Expert Parallel Load Balancer (EPLB)**: Runtime expert rebalancing based on actual token routing patterns
|
|
300
|
+
- **Dual-Batch Overlap (DBO)**: Overlap compute with all-to-all communication for higher throughput
|
|
301
|
+
- **DeepEP + DeepGEMM**: Environment variables auto-configured for optimized MoE kernels
|
|
302
|
+
- **NIXL KV Connector**: Zero-copy GPU-to-GPU KV cache transfer over RDMA/NVLink for disaggregated serving
|
|
303
|
+
- **MoE-Aware Orchestrator**: Memory estimation uses weight vs active parameter distinction (744B total, 40B active)
|
|
304
|
+
- **EP Group Routing**: Inference router tracks expert ranges per rank and routes to the GPU hosting target experts
|
|
305
|
+
- **SGLang Lifecycle**: Real SSH/systemd server management matching vLLM — `start_server`, `stop_server`, `install_on_instance`
|
|
306
|
+
- **Transport-Aware P/D Routing**: Prefers NIXL+RDMA > NIXL > LMCache for KV cache handoff scoring
|
|
307
|
+
|
|
308
|
+
### MoE Memory Model
|
|
309
|
+
|
|
310
|
+
| Model | Total Params | Active Params | FP8 Weight | Per-GPU (EP=8) |
|
|
311
|
+
|-------|-------------|---------------|------------|----------------|
|
|
312
|
+
| GLM-5 | 744B | 40B | ~380GB | ~55GB |
|
|
313
|
+
| DeepSeek V3 | 671B | 37B | ~340GB | ~50GB |
|
|
314
|
+
| Qwen 3.5 | 397B | 17B | ~200GB | ~32GB |
|
|
315
|
+
|
|
316
|
+
## Installation
|
|
317
|
+
|
|
318
|
+
```bash
|
|
319
|
+
pip install terradev-cli==3.6.2
|
|
320
|
+
```
|
|
321
|
+
|
|
322
|
+
With vLLM optimization and HF Spaces support:
|
|
323
|
+
```bash
|
|
324
|
+
pip install terradev-cli[vllm] # vLLM optimization features
|
|
325
|
+
pip install terradev-cli[hf] # HuggingFace Spaces deployment
|
|
326
|
+
pip install terradev-cli[all] # All cloud providers + ML services + HF Spaces + vLLM
|
|
327
|
+
```
|
|
328
|
+
|
|
329
|
+
## Quick Start
|
|
330
|
+
|
|
331
|
+
```bash
|
|
332
|
+
# 1. Get setup instructions for any provider
|
|
333
|
+
terradev setup runpod --quick
|
|
334
|
+
terradev setup aws --quick
|
|
335
|
+
|
|
336
|
+
# 2. Configure your cloud credentials (BYOAPI — you own your keys)
|
|
337
|
+
terradev configure --provider runpod
|
|
338
|
+
terradev configure --provider aws
|
|
339
|
+
terradev configure --provider vastai
|
|
340
|
+
|
|
341
|
+
# 3. Deploy to HuggingFace Spaces (NEW!)
|
|
342
|
+
terradev hf-space my-llama --model-id meta-llama/Llama-2-7b-hf --template llm
|
|
343
|
+
terradev hf-space my-embeddings --model-id sentence-transformers/all-MiniLM-L6-v2 --template embedding
|
|
344
|
+
terradev hf-space my-image --model-id runwayml/stable-diffusion-v1-5 --template image
|
|
345
|
+
|
|
346
|
+
# 4. Optimize vLLM automatically (NEW!)
|
|
347
|
+
terradev vllm auto-optimize -s workload.json -m meta-llama/Llama-2-7b-hf -g 4
|
|
348
|
+
terradev vllm analyze -e http://localhost:8000 # Analyze running server
|
|
349
|
+
terradev vllm benchmark -e http://localhost:8000 -c 10 # Performance test
|
|
350
|
+
|
|
351
|
+
# 5. Get enhanced quotes with conversion prompts
|
|
352
|
+
terradev quote -g A100
|
|
353
|
+
terradev quote -g A100 --quick # Quick provision best quote
|
|
354
|
+
|
|
355
|
+
# 6. Provision the cheapest instance (real API call)
|
|
356
|
+
terradev provision -g A100
|
|
357
|
+
|
|
358
|
+
# 7. Configure ML services
|
|
359
|
+
terradev configure --provider wandb --dashboard-enabled true
|
|
360
|
+
terradev configure --provider langchain --tracing-enabled true
|
|
361
|
+
|
|
362
|
+
# 8. Use ML services
|
|
363
|
+
terradev ml wandb --test
|
|
364
|
+
terradev ml langchain --create-workflow my-workflow
|
|
365
|
+
|
|
366
|
+
# 9. View analytics
|
|
367
|
+
python user_analytics.py
|
|
368
|
+
|
|
369
|
+
# 10. Provision 4x H100s in parallel across multiple clouds
|
|
370
|
+
terradev provision -g H100 -n 4 --parallel 6
|
|
371
|
+
|
|
372
|
+
# 10. Dry-run to see the allocation plan without launching
|
|
373
|
+
terradev provision -g A100 -n 2 --dry-run
|
|
374
|
+
|
|
375
|
+
# 11. Manage running instances
|
|
376
|
+
terradev status --live
|
|
377
|
+
terradev manage -i <instance-id> -a stop
|
|
378
|
+
terradev manage -i <instance-id> -a start
|
|
379
|
+
terradev manage -i <instance-id> -a terminate
|
|
380
|
+
|
|
381
|
+
# 12. Execute commands on provisioned instances
|
|
382
|
+
terradev execute -i <instance-id> -c "python train.py"
|
|
383
|
+
|
|
384
|
+
# 13. Stage datasets near compute (compress + chunk + upload)
|
|
385
|
+
terradev stage -d ./my-dataset --target-regions us-east-1,eu-west-1
|
|
386
|
+
|
|
387
|
+
# 14. View cost analytics from the tracking database
|
|
388
|
+
terradev analytics --days 30
|
|
389
|
+
|
|
390
|
+
# 15. Find cheaper alternatives for running instances
|
|
391
|
+
terradev optimize
|
|
392
|
+
|
|
393
|
+
# 16. One-command Docker workload (provision + deploy + run)
|
|
394
|
+
terradev run --gpu A100 --image pytorch/pytorch:latest -c "python train.py"
|
|
395
|
+
|
|
396
|
+
# 17. Keep an inference server alive
|
|
397
|
+
terradev run --gpu H100 --image vllm/vllm-openai:latest --keep-alive --port 8000
|
|
398
|
+
```
|
|
399
|
+
|
|
400
|
+
## BYOAuth — Bring Your Own Authentication
|
|
401
|
+
|
|
402
|
+
Terradev never touches, stores, or proxies your cloud credentials through a third party. Your API keys stay on your machine in `~/.terradev/credentials.json` — encrypted at rest, never transmitted.
|
|
403
|
+
|
|
404
|
+
**How it works:**
|
|
405
|
+
|
|
406
|
+
1. You run `terradev configure --provider <name>` and enter your API key
|
|
407
|
+
2. Credentials are stored locally in your home directory — never sent to Terradev servers
|
|
408
|
+
3. Every API call goes directly from your machine to the cloud provider
|
|
409
|
+
4. No middleman account, no shared credentials, no markup on provider pricing
|
|
410
|
+
|
|
411
|
+
**Why this matters:**
|
|
412
|
+
|
|
413
|
+
- **Zero trust exposure** — No third party holds your AWS/GCP/Azure keys
|
|
414
|
+
- **No vendor lock-in** — If you stop using Terradev, your cloud accounts are untouched
|
|
415
|
+
- **Enterprise-ready** — Compliant with SOC2, HIPAA, and internal security policies that prohibit sharing credentials with SaaS vendors
|
|
416
|
+
- **Full audit trail** — Every provision is logged locally with provider, cost, and timestamp
|
|
417
|
+
|
|
418
|
+
## CLI Commands
|
|
419
|
+
|
|
420
|
+
### Provisioning & Management
|
|
421
|
+
| Command | Description |
|
|
422
|
+
|---------|-------------|
|
|
423
|
+
| `terradev configure` | Set up API credentials for any provider |
|
|
424
|
+
| `terradev quote` | Get real-time GPU pricing across all clouds |
|
|
425
|
+
| `terradev provision` | Provision instances with parallel multi-cloud arbitrage |
|
|
426
|
+
| `terradev manage` | Stop, start, terminate, or check instance status |
|
|
427
|
+
| `terradev status` | View all instances and cost summary |
|
|
428
|
+
| `terradev execute` | Run commands on provisioned instances |
|
|
429
|
+
| `terradev stage` | Compress, chunk, and stage datasets near compute |
|
|
430
|
+
| `terradev analytics` | Cost analytics with daily spend trends |
|
|
431
|
+
| `terradev optimize` | Find cheaper alternatives for running instances |
|
|
432
|
+
| `terradev run` | Provision + deploy Docker container + execute in one command |
|
|
433
|
+
|
|
434
|
+
### Training Pipeline (v3.4.0)
|
|
435
|
+
| Command | Description |
|
|
436
|
+
|---------|-------------|
|
|
437
|
+
| `terradev train` | Launch distributed training (torchrun/deepspeed/accelerate/megatron) |
|
|
438
|
+
| `terradev train --from-provision` | Auto-resolve nodes from last provision command |
|
|
439
|
+
| `terradev train-status` | List all training jobs and their state |
|
|
440
|
+
| `terradev monitor` | Real-time GPU metrics, utilization, cost tracking |
|
|
441
|
+
| `terradev checkpoint list` | List checkpoints for a training job |
|
|
442
|
+
| `terradev checkpoint save` | Manually trigger a checkpoint save |
|
|
443
|
+
| `terradev preflight` | Validate GPU, NCCL, RDMA, drivers before training |
|
|
444
|
+
|
|
445
|
+
### Inference & Deployment
|
|
446
|
+
| Command | Description |
|
|
447
|
+
|---------|-------------|
|
|
448
|
+
| `terradev hf-space` | One-click HuggingFace Spaces deployment |
|
|
449
|
+
| `terradev inferx` | InferX serverless inference platform - <2s cold starts |
|
|
450
|
+
| `terradev infer-status` | Inference endpoint health and latency |
|
|
451
|
+
| `terradev infer-failover` | Auto-failover between inference endpoints |
|
|
452
|
+
| `terradev lora add` | Hot-load a LoRA adapter onto a running vLLM endpoint |
|
|
453
|
+
| `terradev lora list` | List loaded LoRA adapters |
|
|
454
|
+
| `terradev lora remove` | Hot-unload a LoRA adapter |
|
|
455
|
+
|
|
456
|
+
### GitOps & Infrastructure
|
|
457
|
+
| Command | Description |
|
|
458
|
+
|---------|-------------|
|
|
459
|
+
| `terradev up` | Manifest cache + drift detection |
|
|
460
|
+
| `terradev rollback` | Versioned rollback to any deployment |
|
|
461
|
+
| `terradev manifests` | List cached deployment manifests |
|
|
462
|
+
| `terradev gitops` | ArgoCD/Flux CD GitOps repository management |
|
|
463
|
+
| `terradev integrations` | Show status of W&B, Prometheus, and infra hooks |
|
|
464
|
+
| `terradev price-discovery` | Enhanced price analytics with confidence scoring |
|
|
465
|
+
|
|
466
|
+
### HF Spaces Commands (NEW!)
|
|
467
|
+
```bash
|
|
468
|
+
# Deploy Llama 2 to HF Spaces
|
|
469
|
+
terradev hf-space my-llama --model-id meta-llama/Llama-2-7b-hf --template llm
|
|
470
|
+
|
|
471
|
+
# Deploy with custom hardware
|
|
472
|
+
terradev hf-space my-model --model-id microsoft/DialoGPT-medium \
|
|
473
|
+
--hardware a10g-large --sdk gradio --private
|
|
474
|
+
|
|
475
|
+
# Deploy embedding model
|
|
476
|
+
terradev hf-space my-embeddings --model-id sentence-transformers/all-MiniLM-L6-v2 \
|
|
477
|
+
--template embedding --env BATCH_SIZE=64
|
|
478
|
+
```
|
|
479
|
+
|
|
480
|
+
### Manifest Cache Commands (NEW!)
|
|
481
|
+
```bash
|
|
482
|
+
# Provision with manifest cache
|
|
483
|
+
terradev up --job my-training --gpu-type A100 --gpu-count 4
|
|
484
|
+
|
|
485
|
+
# Fix drift automatically
|
|
486
|
+
terradev up --job my-training --fix-drift
|
|
487
|
+
|
|
488
|
+
# Rollback to previous version
|
|
489
|
+
terradev rollback my-training@v2
|
|
490
|
+
|
|
491
|
+
# List all cached manifests
|
|
492
|
+
terradev manifests --job my-training
|
|
493
|
+
```
|
|
494
|
+
|
|
495
|
+
### InferX Commands (NEW!)
|
|
496
|
+
```bash
|
|
497
|
+
# Start InferX serverless inference platform
|
|
498
|
+
terradev inferx start --model-id meta-llama/Llama-2-7b-hf --hardware a10g
|
|
499
|
+
|
|
500
|
+
# Deploy inference endpoint with auto-scaling
|
|
501
|
+
terradev inferx deploy --endpoint my-llama-api --model-id microsoft/DialoGPT-medium \
|
|
502
|
+
--hardware t4 --max-concurrency 100
|
|
503
|
+
|
|
504
|
+
# Get inference endpoint status and health
|
|
505
|
+
terradev inferx status --endpoint my-llama-api
|
|
506
|
+
|
|
507
|
+
# Route inference requests to optimal endpoint
|
|
508
|
+
terradev inferx route --query "What is machine learning?" --model-type llm
|
|
509
|
+
|
|
510
|
+
# Run failover tests for high availability
|
|
511
|
+
terradev inferx failover --endpoint my-llama-api --test-load 1000
|
|
512
|
+
|
|
513
|
+
# Get cost analysis for inference workloads
|
|
514
|
+
terradev inferx cost-analysis --days 30 --endpoint my-llama-api
|
|
515
|
+
```
|
|
516
|
+
|
|
517
|
+
## Observability & ML Integrations
|
|
518
|
+
|
|
519
|
+
Terradev facilitates connections to your existing tools via BYOAPI — your keys stay local, all data flows directly from your instances to your services.
|
|
520
|
+
|
|
521
|
+
| Integration | What Terradev Does | Setup |
|
|
522
|
+
|-------------|-------------------|-------|
|
|
523
|
+
| **Weights & Biases** | Auto-injects WANDB_* env vars into provisioned containers | `terradev configure --provider wandb --api-key YOUR_KEY` |
|
|
524
|
+
| **Prometheus** | Pushes provision/terminate metrics to your Pushgateway | `terradev configure --provider prometheus --api-key PUSHGATEWAY_URL` |
|
|
525
|
+
| **Grafana** | Exports a ready-to-import dashboard JSON | `terradev integrations --export-grafana` |
|
|
526
|
+
|
|
527
|
+
> Prices queried in real-time from all 19 providers. Actual savings vary by availability.
|
|
528
|
+
|
|
529
|
+
## Pricing Tiers
|
|
530
|
+
|
|
531
|
+
| Feature | Research (Free) | Research+ ($49.99/mo) | Enterprise ($299.99/mo) | Enterprise+ ($0.09/GPU-hr) |
|
|
532
|
+
|----------|------------------|------------------------|------------------------|---------------------------|
|
|
533
|
+
| Max concurrent instances | 1 | 8 | 32 | Unlimited |
|
|
534
|
+
| Provisions/month | 10 | 100 | Unlimited | Unlimited |
|
|
535
|
+
| User seats | 1 | 1 | 5 | Unlimited |
|
|
536
|
+
| Providers | All 19 | All 19 | All 19 + priority | All 19 + dedicated support |
|
|
537
|
+
| Cost tracking | Yes | Yes | Yes | Yes + fleet dashboard |
|
|
538
|
+
| Dataset staging | Yes | Yes | Yes | Yes |
|
|
539
|
+
| Egress optimization | Basic | Full | Full + custom routes | Full + custom routes |
|
|
540
|
+
| GPU-hour metering | - | - | - | $0.09/GPU-hr (32 GPU min) |
|
|
541
|
+
| Fleet management | - | - | - | Yes |
|
|
542
|
+
| SLA guarantee | - | - | Yes | Yes |
|
|
543
|
+
|
|
544
|
+
> **Enterprise+**: Metered billing at **$0.09 per GPU-hour** with a **minimum commitment of 32 GPUs**. You always pay for at least 32 GPU-hours per hour ($2.88/hr floor) whether you use them or not — same model as AWS Reserved Instances. Billed monthly to your card via Stripe. Run `terradev upgrade -t enterprise_plus` to get started.
|
|
545
|
+
|
|
546
|
+
## Integrations
|
|
547
|
+
|
|
548
|
+
### Jupyter / Colab / VS Code Notebooks
|
|
549
|
+
```bash
|
|
550
|
+
pip install terradev-jupyter
|
|
551
|
+
%load_ext terradev_jupyter
|
|
552
|
+
|
|
553
|
+
%terradev quote -g A100
|
|
554
|
+
%terradev provision -g H100 --dry-run
|
|
555
|
+
%terradev run --gpu A100 --image pytorch/pytorch:latest --dry-run
|
|
556
|
+
```
|
|
557
|
+
|
|
558
|
+
### GitHub Actions
|
|
559
|
+
```yaml
|
|
560
|
+
- uses: theodden/terradev-action@v1
|
|
561
|
+
with:
|
|
562
|
+
gpu-type: A100
|
|
563
|
+
max-price: "1.50"
|
|
564
|
+
env:
|
|
565
|
+
TERRADEV_RUNPOD_KEY: ${{ secrets.RUNPOD_API_KEY }}
|
|
566
|
+
```
|
|
567
|
+
|
|
568
|
+
### Docker (One-Command Workloads)
|
|
569
|
+
```bash
|
|
570
|
+
terradev run --gpu A100 --image pytorch/pytorch:latest -c "python train.py"
|
|
571
|
+
terradev run --gpu H100 --image vllm/vllm-openai:latest --keep-alive --port 8000
|
|
572
|
+
```
|
|
573
|
+
|
|
574
|
+
## GPU Topology Optimization (v3.2 / v3.3)
|
|
575
|
+
|
|
576
|
+
Terradev v3.2 automatically optimizes GPU infrastructure topology — NUMA alignment, PCIe switch pairing, SR-IOV, RDMA, and kubelet Topology Manager configuration. **You never configure any of this.** It's applied automatically when you create clusters or provision GPU nodes.
|
|
577
|
+
|
|
578
|
+
### What happens behind the scenes
|
|
579
|
+
|
|
580
|
+
When you run `terradev k8s create my-cluster --gpu H100 --count 4`:
|
|
581
|
+
|
|
582
|
+
| Layer | What Terradev auto-configures |
|
|
583
|
+
|-------|------------------------------|
|
|
584
|
+
| **NUMA Alignment** | Kubelet Topology Manager set to `restricted` with `prefer-closest-numa-nodes=true` |
|
|
585
|
+
| **CPU Pinning** | `cpuManagerPolicy: static` for deterministic core assignment |
|
|
586
|
+
| **GPUDirect RDMA** | `nvidia_peermem` kernel module loaded on all GPU nodes |
|
|
587
|
+
| **SR-IOV** | VF-per-GPU pairing enabled for multi-node clusters |
|
|
588
|
+
| **NCCL Tuning** | `NCCL_NET_GDR_LEVEL=PIX`, `NCCL_NET_GDR_READ=1`, IB enabled |
|
|
589
|
+
| **PCIe Locality** | GPU-NIC pairs forced to same NUMA node (eliminates cross-socket penalty) |
|
|
590
|
+
| **Karpenter** | Topology-aware NodePool with correct instance families per GPU type |
|
|
591
|
+
|
|
592
|
+
### Why this matters
|
|
593
|
+
|
|
594
|
+
Without topology optimization, Kubernetes randomly assigns GPUs and NICs across NUMA nodes and PCIe switches. A cross-socket GPU-NIC pairing can cut RDMA bandwidth by 30-50%. Terradev eliminates this class of performance bug entirely.
|
|
595
|
+
|
|
596
|
+
```bash
|
|
597
|
+
# All of this is automatic — just provision normally
|
|
598
|
+
terradev k8s create training-cluster --gpu H100 --count 8 --prefer-spot
|
|
599
|
+
|
|
600
|
+
# Output includes topology confirmation:
|
|
601
|
+
# 🧬 Topology optimization (auto-applied):
|
|
602
|
+
# Kubelet Topology Manager: restricted (NUMA-aligned)
|
|
603
|
+
# CPU Manager: static (pinned cores)
|
|
604
|
+
# GPUDirect RDMA: enabled (nvidia_peermem)
|
|
605
|
+
# SR-IOV: enabled (8 nodes, VF-per-GPU pairing)
|
|
606
|
+
# NCCL: IB enabled, GDR_LEVEL=PIX, GDR_READ=1
|
|
607
|
+
# PCIe locality: GPU-NIC pairs forced to same NUMA node
|
|
608
|
+
```
|
|
609
|
+
|
|
610
|
+
### DRA / DRANET Ready
|
|
611
|
+
|
|
612
|
+
Terradev's topology module includes DRA (Dynamic Resource Allocation) and DRANET resource claim generation for K8s 1.31+. When KEP-4381 lands, Terradev will automatically use `resource.kubernetes.io/pcieRoot` constraints to enforce PCIe-switch-level GPU-NIC pairing — the finest granularity possible.
|
|
613
|
+
|
|
614
|
+
## Claude Code Integration (NEW!)
|
|
615
|
+
|
|
616
|
+
Access Terradev directly from Claude Code with the MCP server:
|
|
617
|
+
|
|
618
|
+
```bash
|
|
619
|
+
# Install the MCP server
|
|
620
|
+
npm install -g terradev-mcp
|
|
621
|
+
|
|
622
|
+
# Add to your Claude Code MCP configuration:
|
|
623
|
+
{
|
|
624
|
+
"mcpServers": {
|
|
625
|
+
"terradev": {
|
|
626
|
+
"command": "terradev-mcp"
|
|
627
|
+
}
|
|
628
|
+
}
|
|
629
|
+
}
|
|
630
|
+
|
|
631
|
+
# Check MCP connection
|
|
632
|
+
/mcp
|
|
633
|
+
|
|
634
|
+
# Use Terradev commands naturally in Claude Code:
|
|
635
|
+
terradev quote -g H100
|
|
636
|
+
terradev provision -g A100 -n 4 --parallel 6
|
|
637
|
+
terradev k8s create my-cluster --gpu H100 --count 4 --multi-cloud
|
|
638
|
+
```
|
|
639
|
+
|
|
640
|
+
**Features available through Claude Code:**
|
|
641
|
+
- GPU price quotes across 19 providers
|
|
642
|
+
- Instance provisioning with cost optimization
|
|
643
|
+
- Kubernetes cluster creation and management
|
|
644
|
+
- Inference endpoint deployment (InferX)
|
|
645
|
+
- HuggingFace Spaces deployment
|
|
646
|
+
- Cost analytics and optimization
|
|
647
|
+
- Multi-cloud provider management
|
|
648
|
+
|
|
649
|
+
**Security:** BYOAPI - All credentials stay on your machine. Terradev never proxies API keys.
|
|
650
|
+
|
|
651
|
+
## Requirements
|
|
652
|
+
|
|
653
|
+
- Python >= 3.9
|
|
654
|
+
- Cloud provider API keys (configured via `terradev configure`)
|
|
655
|
+
|
|
656
|
+
## License
|
|
657
|
+
|
|
658
|
+
Business Source License 1.1 (BUSL-1.1) - see LICENSE file for details
|