vec-inf 0.7.2__py3-none-any.whl → 0.8.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vec_inf/README.md +2 -1
- vec_inf/cli/_cli.py +43 -12
- vec_inf/cli/_helper.py +79 -12
- vec_inf/cli/_vars.py +37 -22
- vec_inf/client/_client_vars.py +31 -1
- vec_inf/client/_helper.py +154 -49
- vec_inf/client/_slurm_script_generator.py +109 -43
- vec_inf/client/_slurm_templates.py +110 -48
- vec_inf/client/_slurm_vars.py +13 -4
- vec_inf/client/_utils.py +13 -7
- vec_inf/client/api.py +47 -0
- vec_inf/client/config.py +17 -7
- vec_inf/client/models.py +25 -19
- vec_inf/config/README.md +1 -1
- vec_inf/config/environment.yaml +9 -2
- vec_inf/config/models.yaml +184 -368
- vec_inf/find_port.sh +10 -1
- {vec_inf-0.7.2.dist-info → vec_inf-0.8.0.dist-info}/METADATA +17 -16
- vec_inf-0.8.0.dist-info/RECORD +27 -0
- {vec_inf-0.7.2.dist-info → vec_inf-0.8.0.dist-info}/WHEEL +1 -1
- vec_inf-0.7.2.dist-info/RECORD +0 -27
- {vec_inf-0.7.2.dist-info → vec_inf-0.8.0.dist-info}/entry_points.txt +0 -0
- {vec_inf-0.7.2.dist-info → vec_inf-0.8.0.dist-info}/licenses/LICENSE +0 -0
vec_inf/config/models.yaml
CHANGED
|
@@ -6,12 +6,14 @@ models:
|
|
|
6
6
|
gpus_per_node: 4
|
|
7
7
|
num_nodes: 2
|
|
8
8
|
vocab_size: 256000
|
|
9
|
-
time: 08:00:00
|
|
10
|
-
resource_type: l40s
|
|
11
9
|
vllm_args:
|
|
12
10
|
--pipeline-parallel-size: 2
|
|
13
11
|
--tensor-parallel-size: 4
|
|
14
12
|
--max-model-len: 65536
|
|
13
|
+
sglang_args:
|
|
14
|
+
--pipeline-parallel-size: 2
|
|
15
|
+
--tensor-parallel-size: 4
|
|
16
|
+
--context-length: 65536
|
|
15
17
|
c4ai-command-r-08-2024:
|
|
16
18
|
model_family: c4ai-command-r
|
|
17
19
|
model_variant: 08-2024
|
|
@@ -19,11 +21,12 @@ models:
|
|
|
19
21
|
gpus_per_node: 2
|
|
20
22
|
num_nodes: 1
|
|
21
23
|
vocab_size: 256000
|
|
22
|
-
time: 08:00:00
|
|
23
|
-
resource_type: l40s
|
|
24
24
|
vllm_args:
|
|
25
25
|
--tensor-parallel-size: 2
|
|
26
26
|
--max-model-len: 32768
|
|
27
|
+
sglang_args:
|
|
28
|
+
--tensor-parallel-size: 2
|
|
29
|
+
--context-length: 32768
|
|
27
30
|
CodeLlama-7b-hf:
|
|
28
31
|
model_family: CodeLlama
|
|
29
32
|
model_variant: 7b-hf
|
|
@@ -31,10 +34,6 @@ models:
|
|
|
31
34
|
gpus_per_node: 1
|
|
32
35
|
num_nodes: 1
|
|
33
36
|
vocab_size: 32000
|
|
34
|
-
time: 08:00:00
|
|
35
|
-
resource_type: l40s
|
|
36
|
-
vllm_args:
|
|
37
|
-
--max-model-len: 16384
|
|
38
37
|
CodeLlama-7b-Instruct-hf:
|
|
39
38
|
model_family: CodeLlama
|
|
40
39
|
model_variant: 7b-Instruct-hf
|
|
@@ -42,10 +41,6 @@ models:
|
|
|
42
41
|
gpus_per_node: 1
|
|
43
42
|
num_nodes: 1
|
|
44
43
|
vocab_size: 32000
|
|
45
|
-
time: 08:00:00
|
|
46
|
-
resource_type: l40s
|
|
47
|
-
vllm_args:
|
|
48
|
-
--max-model-len: 16384
|
|
49
44
|
CodeLlama-13b-hf:
|
|
50
45
|
model_family: CodeLlama
|
|
51
46
|
model_variant: 13b-hf
|
|
@@ -53,10 +48,6 @@ models:
|
|
|
53
48
|
gpus_per_node: 1
|
|
54
49
|
num_nodes: 1
|
|
55
50
|
vocab_size: 32000
|
|
56
|
-
time: 08:00:00
|
|
57
|
-
resource_type: l40s
|
|
58
|
-
vllm_args:
|
|
59
|
-
--max-model-len: 16384
|
|
60
51
|
CodeLlama-13b-Instruct-hf:
|
|
61
52
|
model_family: CodeLlama
|
|
62
53
|
model_variant: 13b-Instruct-hf
|
|
@@ -64,10 +55,6 @@ models:
|
|
|
64
55
|
gpus_per_node: 1
|
|
65
56
|
num_nodes: 1
|
|
66
57
|
vocab_size: 32000
|
|
67
|
-
time: 08:00:00
|
|
68
|
-
resource_type: l40s
|
|
69
|
-
vllm_args:
|
|
70
|
-
--max-model-len: 16384
|
|
71
58
|
CodeLlama-34b-hf:
|
|
72
59
|
model_family: CodeLlama
|
|
73
60
|
model_variant: 34b-hf
|
|
@@ -75,11 +62,10 @@ models:
|
|
|
75
62
|
gpus_per_node: 2
|
|
76
63
|
num_nodes: 1
|
|
77
64
|
vocab_size: 32000
|
|
78
|
-
time: 08:00:00
|
|
79
|
-
resource_type: l40s
|
|
80
65
|
vllm_args:
|
|
81
66
|
--tensor-parallel-size: 2
|
|
82
|
-
|
|
67
|
+
sglang_args:
|
|
68
|
+
--tensor-parallel-size: 2
|
|
83
69
|
CodeLlama-34b-Instruct-hf:
|
|
84
70
|
model_family: CodeLlama
|
|
85
71
|
model_variant: 34b-Instruct-hf
|
|
@@ -87,11 +73,10 @@ models:
|
|
|
87
73
|
gpus_per_node: 2
|
|
88
74
|
num_nodes: 1
|
|
89
75
|
vocab_size: 32000
|
|
90
|
-
time: 08:00:00
|
|
91
|
-
resource_type: l40s
|
|
92
76
|
vllm_args:
|
|
93
77
|
--tensor-parallel-size: 2
|
|
94
|
-
|
|
78
|
+
sglang_args:
|
|
79
|
+
--tensor-parallel-size: 2
|
|
95
80
|
CodeLlama-70b-hf:
|
|
96
81
|
model_family: CodeLlama
|
|
97
82
|
model_variant: 70b-hf
|
|
@@ -99,11 +84,10 @@ models:
|
|
|
99
84
|
gpus_per_node: 4
|
|
100
85
|
num_nodes: 1
|
|
101
86
|
vocab_size: 32016
|
|
102
|
-
time: 08:00:00
|
|
103
|
-
resource_type: l40s
|
|
104
87
|
vllm_args:
|
|
105
88
|
--tensor-parallel-size: 4
|
|
106
|
-
|
|
89
|
+
sglang_args:
|
|
90
|
+
--tensor-parallel-size: 4
|
|
107
91
|
CodeLlama-70b-Instruct-hf:
|
|
108
92
|
model_family: CodeLlama
|
|
109
93
|
model_variant: 70b-Instruct-hf
|
|
@@ -111,11 +95,10 @@ models:
|
|
|
111
95
|
gpus_per_node: 4
|
|
112
96
|
num_nodes: 1
|
|
113
97
|
vocab_size: 32016
|
|
114
|
-
time: 08:00:00
|
|
115
|
-
resource_type: l40s
|
|
116
98
|
vllm_args:
|
|
117
99
|
--tensor-parallel-size: 4
|
|
118
|
-
|
|
100
|
+
sglang_args:
|
|
101
|
+
--tensor-parallel-size: 4
|
|
119
102
|
gemma-2-2b-it:
|
|
120
103
|
model_family: gemma-2
|
|
121
104
|
model_variant: 2b-it
|
|
@@ -123,10 +106,6 @@ models:
|
|
|
123
106
|
gpus_per_node: 1
|
|
124
107
|
num_nodes: 1
|
|
125
108
|
vocab_size: 256000
|
|
126
|
-
time: 08:00:00
|
|
127
|
-
resource_type: l40s
|
|
128
|
-
vllm_args:
|
|
129
|
-
--max-model-len: 4096
|
|
130
109
|
gemma-2-9b:
|
|
131
110
|
model_family: gemma-2
|
|
132
111
|
model_variant: 9b
|
|
@@ -134,10 +113,6 @@ models:
|
|
|
134
113
|
gpus_per_node: 1
|
|
135
114
|
num_nodes: 1
|
|
136
115
|
vocab_size: 256000
|
|
137
|
-
time: 08:00:00
|
|
138
|
-
resource_type: l40s
|
|
139
|
-
vllm_args:
|
|
140
|
-
--max-model-len: 4096
|
|
141
116
|
gemma-2-9b-it:
|
|
142
117
|
model_family: gemma-2
|
|
143
118
|
model_variant: 9b-it
|
|
@@ -145,10 +120,6 @@ models:
|
|
|
145
120
|
gpus_per_node: 1
|
|
146
121
|
num_nodes: 1
|
|
147
122
|
vocab_size: 256000
|
|
148
|
-
time: 08:00:00
|
|
149
|
-
resource_type: l40s
|
|
150
|
-
vllm_args:
|
|
151
|
-
--max-model-len: 4096
|
|
152
123
|
gemma-2-27b:
|
|
153
124
|
model_family: gemma-2
|
|
154
125
|
model_variant: 27b
|
|
@@ -156,11 +127,10 @@ models:
|
|
|
156
127
|
gpus_per_node: 2
|
|
157
128
|
num_nodes: 1
|
|
158
129
|
vocab_size: 256000
|
|
159
|
-
time: 08:00:00
|
|
160
|
-
resource_type: l40s
|
|
161
130
|
vllm_args:
|
|
162
131
|
--tensor-parallel-size: 2
|
|
163
|
-
|
|
132
|
+
sglang_args:
|
|
133
|
+
--tensor-parallel-size: 2
|
|
164
134
|
gemma-2-27b-it:
|
|
165
135
|
model_family: gemma-2
|
|
166
136
|
model_variant: 27b-it
|
|
@@ -168,11 +138,10 @@ models:
|
|
|
168
138
|
gpus_per_node: 2
|
|
169
139
|
num_nodes: 1
|
|
170
140
|
vocab_size: 256000
|
|
171
|
-
time: 08:00:00
|
|
172
|
-
resource_type: l40s
|
|
173
141
|
vllm_args:
|
|
174
142
|
--tensor-parallel-size: 2
|
|
175
|
-
|
|
143
|
+
sglang_args:
|
|
144
|
+
--tensor-parallel-size: 2
|
|
176
145
|
Llama-2-7b-hf:
|
|
177
146
|
model_family: Llama-2
|
|
178
147
|
model_variant: 7b-hf
|
|
@@ -180,10 +149,6 @@ models:
|
|
|
180
149
|
gpus_per_node: 1
|
|
181
150
|
num_nodes: 1
|
|
182
151
|
vocab_size: 32000
|
|
183
|
-
time: 08:00:00
|
|
184
|
-
resource_type: l40s
|
|
185
|
-
vllm_args:
|
|
186
|
-
--max-model-len: 4096
|
|
187
152
|
Llama-2-7b-chat-hf:
|
|
188
153
|
model_family: Llama-2
|
|
189
154
|
model_variant: 7b-chat-hf
|
|
@@ -191,10 +156,6 @@ models:
|
|
|
191
156
|
gpus_per_node: 1
|
|
192
157
|
num_nodes: 1
|
|
193
158
|
vocab_size: 32000
|
|
194
|
-
time: 08:00:00
|
|
195
|
-
resource_type: l40s
|
|
196
|
-
vllm_args:
|
|
197
|
-
--max-model-len: 4096
|
|
198
159
|
Llama-2-13b-hf:
|
|
199
160
|
model_family: Llama-2
|
|
200
161
|
model_variant: 13b-hf
|
|
@@ -202,10 +163,6 @@ models:
|
|
|
202
163
|
gpus_per_node: 1
|
|
203
164
|
num_nodes: 1
|
|
204
165
|
vocab_size: 32000
|
|
205
|
-
time: 08:00:00
|
|
206
|
-
resource_type: l40s
|
|
207
|
-
vllm_args:
|
|
208
|
-
--max-model-len: 4096
|
|
209
166
|
Llama-2-13b-chat-hf:
|
|
210
167
|
model_family: Llama-2
|
|
211
168
|
model_variant: 13b-chat-hf
|
|
@@ -213,22 +170,6 @@ models:
|
|
|
213
170
|
gpus_per_node: 1
|
|
214
171
|
num_nodes: 1
|
|
215
172
|
vocab_size: 32000
|
|
216
|
-
time: 08:00:00
|
|
217
|
-
resource_type: l40s
|
|
218
|
-
vllm_args:
|
|
219
|
-
--max-model-len: 4096
|
|
220
|
-
Llama-2-70b-hf:
|
|
221
|
-
model_family: Llama-2
|
|
222
|
-
model_variant: 70b-hf
|
|
223
|
-
model_type: LLM
|
|
224
|
-
gpus_per_node: 4
|
|
225
|
-
num_nodes: 1
|
|
226
|
-
vocab_size: 32000
|
|
227
|
-
time: 08:00:00
|
|
228
|
-
resource_type: l40s
|
|
229
|
-
vllm_args:
|
|
230
|
-
--tensor-parallel-size: 4
|
|
231
|
-
--max-model-len: 4096
|
|
232
173
|
Llama-2-70b-chat-hf:
|
|
233
174
|
model_family: Llama-2
|
|
234
175
|
model_variant: 70b-chat-hf
|
|
@@ -236,11 +177,10 @@ models:
|
|
|
236
177
|
gpus_per_node: 4
|
|
237
178
|
num_nodes: 1
|
|
238
179
|
vocab_size: 32000
|
|
239
|
-
time: 08:00:00
|
|
240
|
-
resource_type: l40s
|
|
241
180
|
vllm_args:
|
|
242
181
|
--tensor-parallel-size: 4
|
|
243
|
-
|
|
182
|
+
sglang_args:
|
|
183
|
+
--tensor-parallel-size: 4
|
|
244
184
|
llava-1.5-7b-hf:
|
|
245
185
|
model_family: llava-1.5
|
|
246
186
|
model_variant: 7b-hf
|
|
@@ -248,10 +188,6 @@ models:
|
|
|
248
188
|
gpus_per_node: 1
|
|
249
189
|
num_nodes: 1
|
|
250
190
|
vocab_size: 32000
|
|
251
|
-
time: 08:00:00
|
|
252
|
-
resource_type: l40s
|
|
253
|
-
vllm_args:
|
|
254
|
-
--max-model-len: 4096
|
|
255
191
|
llava-1.5-13b-hf:
|
|
256
192
|
model_family: llava-1.5
|
|
257
193
|
model_variant: 13b-hf
|
|
@@ -259,10 +195,6 @@ models:
|
|
|
259
195
|
gpus_per_node: 1
|
|
260
196
|
num_nodes: 1
|
|
261
197
|
vocab_size: 32000
|
|
262
|
-
time: 08:00:00
|
|
263
|
-
resource_type: l40s
|
|
264
|
-
vllm_args:
|
|
265
|
-
--max-model-len: 4096
|
|
266
198
|
llava-v1.6-mistral-7b-hf:
|
|
267
199
|
model_family: llava-v1.6
|
|
268
200
|
model_variant: mistral-7b-hf
|
|
@@ -270,10 +202,6 @@ models:
|
|
|
270
202
|
gpus_per_node: 1
|
|
271
203
|
num_nodes: 1
|
|
272
204
|
vocab_size: 32064
|
|
273
|
-
time: 08:00:00
|
|
274
|
-
resource_type: l40s
|
|
275
|
-
vllm_args:
|
|
276
|
-
--max-model-len: 32768
|
|
277
205
|
llava-v1.6-34b-hf:
|
|
278
206
|
model_family: llava-v1.6
|
|
279
207
|
model_variant: 34b-hf
|
|
@@ -281,11 +209,10 @@ models:
|
|
|
281
209
|
gpus_per_node: 2
|
|
282
210
|
num_nodes: 1
|
|
283
211
|
vocab_size: 64064
|
|
284
|
-
time: 08:00:00
|
|
285
|
-
resource_type: l40s
|
|
286
212
|
vllm_args:
|
|
287
213
|
--tensor-parallel-size: 2
|
|
288
|
-
|
|
214
|
+
sglang_args:
|
|
215
|
+
--tensor-parallel-size: 2
|
|
289
216
|
Meta-Llama-3-8B:
|
|
290
217
|
model_family: Meta-Llama-3
|
|
291
218
|
model_variant: 8B
|
|
@@ -293,10 +220,6 @@ models:
|
|
|
293
220
|
gpus_per_node: 1
|
|
294
221
|
num_nodes: 1
|
|
295
222
|
vocab_size: 128256
|
|
296
|
-
time: 08:00:00
|
|
297
|
-
resource_type: l40s
|
|
298
|
-
vllm_args:
|
|
299
|
-
--max-model-len: 8192
|
|
300
223
|
Meta-Llama-3-8B-Instruct:
|
|
301
224
|
model_family: Meta-Llama-3
|
|
302
225
|
model_variant: 8B-Instruct
|
|
@@ -304,10 +227,6 @@ models:
|
|
|
304
227
|
gpus_per_node: 1
|
|
305
228
|
num_nodes: 1
|
|
306
229
|
vocab_size: 128256
|
|
307
|
-
time: 08:00:00
|
|
308
|
-
resource_type: l40s
|
|
309
|
-
vllm_args:
|
|
310
|
-
--max-model-len: 8192
|
|
311
230
|
Meta-Llama-3-70B:
|
|
312
231
|
model_family: Meta-Llama-3
|
|
313
232
|
model_variant: 70B
|
|
@@ -315,11 +234,10 @@ models:
|
|
|
315
234
|
gpus_per_node: 4
|
|
316
235
|
num_nodes: 1
|
|
317
236
|
vocab_size: 128256
|
|
318
|
-
time: 08:00:00
|
|
319
|
-
resource_type: l40s
|
|
320
237
|
vllm_args:
|
|
321
238
|
--tensor-parallel-size: 4
|
|
322
|
-
|
|
239
|
+
sglang_args:
|
|
240
|
+
--tensor-parallel-size: 4
|
|
323
241
|
Meta-Llama-3-70B-Instruct:
|
|
324
242
|
model_family: Meta-Llama-3
|
|
325
243
|
model_variant: 70B-Instruct
|
|
@@ -327,11 +245,10 @@ models:
|
|
|
327
245
|
gpus_per_node: 4
|
|
328
246
|
num_nodes: 1
|
|
329
247
|
vocab_size: 128256
|
|
330
|
-
time: 08:00:00
|
|
331
|
-
resource_type: l40s
|
|
332
248
|
vllm_args:
|
|
333
249
|
--tensor-parallel-size: 4
|
|
334
|
-
|
|
250
|
+
sglang_args:
|
|
251
|
+
--tensor-parallel-size: 4
|
|
335
252
|
Meta-Llama-3.1-8B:
|
|
336
253
|
model_family: Meta-Llama-3.1
|
|
337
254
|
model_variant: 8B
|
|
@@ -339,10 +256,6 @@ models:
|
|
|
339
256
|
gpus_per_node: 1
|
|
340
257
|
num_nodes: 1
|
|
341
258
|
vocab_size: 128256
|
|
342
|
-
time: 08:00:00
|
|
343
|
-
resource_type: l40s
|
|
344
|
-
vllm_args:
|
|
345
|
-
--max-model-len: 131072
|
|
346
259
|
Meta-Llama-3.1-8B-Instruct:
|
|
347
260
|
model_family: Meta-Llama-3.1
|
|
348
261
|
model_variant: 8B-Instruct
|
|
@@ -350,10 +263,6 @@ models:
|
|
|
350
263
|
gpus_per_node: 1
|
|
351
264
|
num_nodes: 1
|
|
352
265
|
vocab_size: 128256
|
|
353
|
-
time: 08:00:00
|
|
354
|
-
resource_type: l40s
|
|
355
|
-
vllm_args:
|
|
356
|
-
--max-model-len: 131072
|
|
357
266
|
Meta-Llama-3.1-70B:
|
|
358
267
|
model_family: Meta-Llama-3.1
|
|
359
268
|
model_variant: 70B
|
|
@@ -361,11 +270,12 @@ models:
|
|
|
361
270
|
gpus_per_node: 4
|
|
362
271
|
num_nodes: 1
|
|
363
272
|
vocab_size: 128256
|
|
364
|
-
time: 08:00:00
|
|
365
|
-
resource_type: l40s
|
|
366
273
|
vllm_args:
|
|
367
274
|
--tensor-parallel-size: 4
|
|
368
275
|
--max-model-len: 65536
|
|
276
|
+
sglang_args:
|
|
277
|
+
--tensor-parallel-size: 4
|
|
278
|
+
--context-length: 65536
|
|
369
279
|
Meta-Llama-3.1-70B-Instruct:
|
|
370
280
|
model_family: Meta-Llama-3.1
|
|
371
281
|
model_variant: 70B-Instruct
|
|
@@ -373,11 +283,12 @@ models:
|
|
|
373
283
|
gpus_per_node: 4
|
|
374
284
|
num_nodes: 1
|
|
375
285
|
vocab_size: 128256
|
|
376
|
-
time: 08:00:00
|
|
377
|
-
resource_type: l40s
|
|
378
286
|
vllm_args:
|
|
379
287
|
--tensor-parallel-size: 4
|
|
380
288
|
--max-model-len: 65536
|
|
289
|
+
sglang_args:
|
|
290
|
+
--tensor-parallel-size: 4
|
|
291
|
+
--context-length: 65536
|
|
381
292
|
Meta-Llama-3.1-405B-Instruct:
|
|
382
293
|
model_family: Meta-Llama-3.1
|
|
383
294
|
model_variant: 405B-Instruct
|
|
@@ -385,12 +296,14 @@ models:
|
|
|
385
296
|
gpus_per_node: 4
|
|
386
297
|
num_nodes: 8
|
|
387
298
|
vocab_size: 128256
|
|
388
|
-
time: 08:00:00
|
|
389
|
-
resource_type: l40s
|
|
390
299
|
vllm_args:
|
|
391
300
|
--pipeline-parallel-size: 8
|
|
392
301
|
--tensor-parallel-size: 4
|
|
393
302
|
--max-model-len: 16384
|
|
303
|
+
sglang_args:
|
|
304
|
+
--pipeline-parallel-size: 8
|
|
305
|
+
--tensor-parallel-size: 4
|
|
306
|
+
--context-length: 16384
|
|
394
307
|
Mistral-7B-Instruct-v0.1:
|
|
395
308
|
model_family: Mistral
|
|
396
309
|
model_variant: 7B-Instruct-v0.1
|
|
@@ -398,10 +311,6 @@ models:
|
|
|
398
311
|
gpus_per_node: 1
|
|
399
312
|
num_nodes: 1
|
|
400
313
|
vocab_size: 32000
|
|
401
|
-
time: 08:00:00
|
|
402
|
-
resource_type: l40s
|
|
403
|
-
vllm_args:
|
|
404
|
-
--max-model-len: 32768
|
|
405
314
|
Mistral-7B-Instruct-v0.2:
|
|
406
315
|
model_family: Mistral
|
|
407
316
|
model_variant: 7B-Instruct-v0.2
|
|
@@ -409,10 +318,6 @@ models:
|
|
|
409
318
|
gpus_per_node: 1
|
|
410
319
|
num_nodes: 1
|
|
411
320
|
vocab_size: 32000
|
|
412
|
-
time: 08:00:00
|
|
413
|
-
resource_type: l40s
|
|
414
|
-
vllm_args:
|
|
415
|
-
--max-model-len: 32768
|
|
416
321
|
Mistral-7B-v0.3:
|
|
417
322
|
model_family: Mistral
|
|
418
323
|
model_variant: 7B-v0.3
|
|
@@ -420,10 +325,6 @@ models:
|
|
|
420
325
|
gpus_per_node: 1
|
|
421
326
|
num_nodes: 1
|
|
422
327
|
vocab_size: 32768
|
|
423
|
-
time: 08:00:00
|
|
424
|
-
resource_type: l40s
|
|
425
|
-
vllm_args:
|
|
426
|
-
--max-model-len: 32768
|
|
427
328
|
Mistral-7B-Instruct-v0.3:
|
|
428
329
|
model_family: Mistral
|
|
429
330
|
model_variant: 7B-Instruct-v0.3
|
|
@@ -431,10 +332,6 @@ models:
|
|
|
431
332
|
gpus_per_node: 1
|
|
432
333
|
num_nodes: 1
|
|
433
334
|
vocab_size: 32768
|
|
434
|
-
time: 08:00:00
|
|
435
|
-
resource_type: l40s
|
|
436
|
-
vllm_args:
|
|
437
|
-
--max-model-len: 32768
|
|
438
335
|
Mistral-Large-Instruct-2407:
|
|
439
336
|
model_family: Mistral
|
|
440
337
|
model_variant: Large-Instruct-2407
|
|
@@ -442,12 +339,14 @@ models:
|
|
|
442
339
|
gpus_per_node: 4
|
|
443
340
|
num_nodes: 2
|
|
444
341
|
vocab_size: 32768
|
|
445
|
-
time: 08:00:00
|
|
446
|
-
resource_type: l40s
|
|
447
342
|
vllm_args:
|
|
448
343
|
--pipeline-parallel-size: 2
|
|
449
344
|
--tensor-parallel-size: 4
|
|
450
345
|
--max-model-len: 32768
|
|
346
|
+
sglang_args:
|
|
347
|
+
--pipeline-parallel-size: 2
|
|
348
|
+
--tensor-parallel-size: 4
|
|
349
|
+
--context-length: 32768
|
|
451
350
|
Mistral-Large-Instruct-2411:
|
|
452
351
|
model_family: Mistral
|
|
453
352
|
model_variant: Large-Instruct-2411
|
|
@@ -455,12 +354,14 @@ models:
|
|
|
455
354
|
gpus_per_node: 4
|
|
456
355
|
num_nodes: 2
|
|
457
356
|
vocab_size: 32768
|
|
458
|
-
time: 08:00:00
|
|
459
|
-
resource_type: l40s
|
|
460
357
|
vllm_args:
|
|
461
358
|
--pipeline-parallel-size: 2
|
|
462
359
|
--tensor-parallel-size: 4
|
|
463
360
|
--max-model-len: 32768
|
|
361
|
+
sglang_args:
|
|
362
|
+
--pipeline-parallel-size: 2
|
|
363
|
+
--tensor-parallel-size: 4
|
|
364
|
+
--context-length: 32768
|
|
464
365
|
Mixtral-8x7B-Instruct-v0.1:
|
|
465
366
|
model_family: Mixtral
|
|
466
367
|
model_variant: 8x7B-Instruct-v0.1
|
|
@@ -468,11 +369,10 @@ models:
|
|
|
468
369
|
gpus_per_node: 4
|
|
469
370
|
num_nodes: 1
|
|
470
371
|
vocab_size: 32000
|
|
471
|
-
time: 08:00:00
|
|
472
|
-
resource_type: l40s
|
|
473
372
|
vllm_args:
|
|
474
373
|
--tensor-parallel-size: 4
|
|
475
|
-
|
|
374
|
+
sglang_args:
|
|
375
|
+
--tensor-parallel-size: 4
|
|
476
376
|
Mixtral-8x22B-v0.1:
|
|
477
377
|
model_family: Mixtral
|
|
478
378
|
model_variant: 8x22B-v0.1
|
|
@@ -480,12 +380,12 @@ models:
|
|
|
480
380
|
gpus_per_node: 4
|
|
481
381
|
num_nodes: 2
|
|
482
382
|
vocab_size: 32768
|
|
483
|
-
time: 08:00:00
|
|
484
|
-
resource_type: l40s
|
|
485
383
|
vllm_args:
|
|
486
384
|
--pipeline-parallel-size: 2
|
|
487
385
|
--tensor-parallel-size: 4
|
|
488
|
-
|
|
386
|
+
sglang_args:
|
|
387
|
+
--pipeline-parallel-size: 2
|
|
388
|
+
--tensor-parallel-size: 4
|
|
489
389
|
Mixtral-8x22B-Instruct-v0.1:
|
|
490
390
|
model_family: Mixtral
|
|
491
391
|
model_variant: 8x22B-Instruct-v0.1
|
|
@@ -493,12 +393,12 @@ models:
|
|
|
493
393
|
gpus_per_node: 4
|
|
494
394
|
num_nodes: 2
|
|
495
395
|
vocab_size: 32768
|
|
496
|
-
time: 08:00:00
|
|
497
|
-
resource_type: l40s
|
|
498
396
|
vllm_args:
|
|
499
397
|
--pipeline-parallel-size: 2
|
|
500
398
|
--tensor-parallel-size: 4
|
|
501
|
-
|
|
399
|
+
sglang_args:
|
|
400
|
+
--pipeline-parallel-size: 2
|
|
401
|
+
--tensor-parallel-size: 4
|
|
502
402
|
Phi-3-medium-128k-instruct:
|
|
503
403
|
model_family: Phi-3
|
|
504
404
|
model_variant: medium-128k-instruct
|
|
@@ -506,11 +406,10 @@ models:
|
|
|
506
406
|
gpus_per_node: 2
|
|
507
407
|
num_nodes: 1
|
|
508
408
|
vocab_size: 32064
|
|
509
|
-
time: 08:00:00
|
|
510
|
-
resource_type: l40s
|
|
511
409
|
vllm_args:
|
|
512
410
|
--tensor-parallel-size: 2
|
|
513
|
-
|
|
411
|
+
sglang_args:
|
|
412
|
+
--tensor-parallel-size: 2
|
|
514
413
|
Phi-3-vision-128k-instruct:
|
|
515
414
|
model_family: Phi-3-vision
|
|
516
415
|
model_variant: 128k-instruct
|
|
@@ -518,11 +417,12 @@ models:
|
|
|
518
417
|
gpus_per_node: 2
|
|
519
418
|
num_nodes: 1
|
|
520
419
|
vocab_size: 32064
|
|
521
|
-
time: 08:00:00
|
|
522
|
-
resource_type: l40s
|
|
523
420
|
vllm_args:
|
|
524
421
|
--tensor-parallel-size: 2
|
|
525
422
|
--max-model-len: 65536
|
|
423
|
+
sglang_args:
|
|
424
|
+
--tensor-parallel-size: 2
|
|
425
|
+
--context-length: 65536
|
|
526
426
|
Llama-3.1-Nemotron-70B-Instruct-HF:
|
|
527
427
|
model_family: Llama-3.1-Nemotron
|
|
528
428
|
model_variant: 70B-Instruct-HF
|
|
@@ -530,11 +430,12 @@ models:
|
|
|
530
430
|
gpus_per_node: 4
|
|
531
431
|
num_nodes: 1
|
|
532
432
|
vocab_size: 128256
|
|
533
|
-
time: 08:00:00
|
|
534
|
-
resource_type: l40s
|
|
535
433
|
vllm_args:
|
|
536
434
|
--tensor-parallel-size: 4
|
|
537
435
|
--max-model-len: 65536
|
|
436
|
+
sglang_args:
|
|
437
|
+
--tensor-parallel-size: 4
|
|
438
|
+
--context-length: 65536
|
|
538
439
|
Llama-3.2-1B:
|
|
539
440
|
model_family: Llama-3.2
|
|
540
441
|
model_variant: 1B
|
|
@@ -542,10 +443,6 @@ models:
|
|
|
542
443
|
gpus_per_node: 1
|
|
543
444
|
num_nodes: 1
|
|
544
445
|
vocab_size: 128256
|
|
545
|
-
time: 08:00:00
|
|
546
|
-
resource_type: l40s
|
|
547
|
-
vllm_args:
|
|
548
|
-
--max-model-len: 131072
|
|
549
446
|
Llama-3.2-1B-Instruct:
|
|
550
447
|
model_family: Llama-3.2
|
|
551
448
|
model_variant: 1B-Instruct
|
|
@@ -553,10 +450,6 @@ models:
|
|
|
553
450
|
gpus_per_node: 1
|
|
554
451
|
num_nodes: 1
|
|
555
452
|
vocab_size: 128256
|
|
556
|
-
time: 08:00:00
|
|
557
|
-
resource_type: l40s
|
|
558
|
-
vllm_args:
|
|
559
|
-
--max-model-len: 131072
|
|
560
453
|
Llama-3.2-3B:
|
|
561
454
|
model_family: Llama-3.2
|
|
562
455
|
model_variant: 3B
|
|
@@ -564,10 +457,6 @@ models:
|
|
|
564
457
|
gpus_per_node: 1
|
|
565
458
|
num_nodes: 1
|
|
566
459
|
vocab_size: 128256
|
|
567
|
-
time: 08:00:00
|
|
568
|
-
resource_type: l40s
|
|
569
|
-
vllm_args:
|
|
570
|
-
--max-model-len: 131072
|
|
571
460
|
Llama-3.2-3B-Instruct:
|
|
572
461
|
model_family: Llama-3.2
|
|
573
462
|
model_variant: 3B-Instruct
|
|
@@ -575,24 +464,6 @@ models:
|
|
|
575
464
|
gpus_per_node: 1
|
|
576
465
|
num_nodes: 1
|
|
577
466
|
vocab_size: 128256
|
|
578
|
-
time: 08:00:00
|
|
579
|
-
resource_type: l40s
|
|
580
|
-
vllm_args:
|
|
581
|
-
--max-model-len: 131072
|
|
582
|
-
Llama-3.2-11B-Vision:
|
|
583
|
-
model_family: Llama-3.2
|
|
584
|
-
model_variant: 11B-Vision
|
|
585
|
-
model_type: VLM
|
|
586
|
-
gpus_per_node: 2
|
|
587
|
-
num_nodes: 1
|
|
588
|
-
vocab_size: 128256
|
|
589
|
-
time: 08:00:00
|
|
590
|
-
resource_type: l40s
|
|
591
|
-
vllm_args:
|
|
592
|
-
--tensor-parallel-size: 2
|
|
593
|
-
--max-model-len: 4096
|
|
594
|
-
--max-num-seqs: 64
|
|
595
|
-
--enforce-eager: true
|
|
596
467
|
Llama-3.2-11B-Vision-Instruct:
|
|
597
468
|
model_family: Llama-3.2
|
|
598
469
|
model_variant: 11B-Vision-Instruct
|
|
@@ -600,27 +471,9 @@ models:
|
|
|
600
471
|
gpus_per_node: 2
|
|
601
472
|
num_nodes: 1
|
|
602
473
|
vocab_size: 128256
|
|
603
|
-
|
|
604
|
-
|
|
605
|
-
vllm_args:
|
|
474
|
+
engine: sglang
|
|
475
|
+
sglang_args:
|
|
606
476
|
--tensor-parallel-size: 2
|
|
607
|
-
--max-model-len: 4096
|
|
608
|
-
--max-num-seqs: 64
|
|
609
|
-
--enforce-eager: true
|
|
610
|
-
Llama-3.2-90B-Vision:
|
|
611
|
-
model_family: Llama-3.2
|
|
612
|
-
model_variant: 90B-Vision
|
|
613
|
-
model_type: VLM
|
|
614
|
-
gpus_per_node: 4
|
|
615
|
-
num_nodes: 2
|
|
616
|
-
vocab_size: 128256
|
|
617
|
-
time: 08:00:00
|
|
618
|
-
resource_type: l40s
|
|
619
|
-
vllm_args:
|
|
620
|
-
--tensor-parallel-size: 8
|
|
621
|
-
--max-model-len: 4096
|
|
622
|
-
--max-num-seqs: 32
|
|
623
|
-
--enforce-eager: true
|
|
624
477
|
Llama-3.2-90B-Vision-Instruct:
|
|
625
478
|
model_family: Llama-3.2
|
|
626
479
|
model_variant: 90B-Vision-Instruct
|
|
@@ -628,13 +481,9 @@ models:
|
|
|
628
481
|
gpus_per_node: 4
|
|
629
482
|
num_nodes: 2
|
|
630
483
|
vocab_size: 128256
|
|
631
|
-
|
|
632
|
-
|
|
633
|
-
vllm_args:
|
|
484
|
+
engine: sglang
|
|
485
|
+
sglang_args:
|
|
634
486
|
--tensor-parallel-size: 8
|
|
635
|
-
--max-model-len: 4096
|
|
636
|
-
--max-num-seqs: 32
|
|
637
|
-
--enforce-eager: true
|
|
638
487
|
Qwen2.5-0.5B-Instruct:
|
|
639
488
|
model_family: Qwen2.5
|
|
640
489
|
model_variant: 0.5B-Instruct
|
|
@@ -642,10 +491,6 @@ models:
|
|
|
642
491
|
gpus_per_node: 1
|
|
643
492
|
num_nodes: 1
|
|
644
493
|
vocab_size: 152064
|
|
645
|
-
time: 08:00:00
|
|
646
|
-
resource_type: l40s
|
|
647
|
-
vllm_args:
|
|
648
|
-
--max-model-len: 32768
|
|
649
494
|
Qwen2.5-1.5B-Instruct:
|
|
650
495
|
model_family: Qwen2.5
|
|
651
496
|
model_variant: 1.5B-Instruct
|
|
@@ -653,10 +498,6 @@ models:
|
|
|
653
498
|
gpus_per_node: 1
|
|
654
499
|
num_nodes: 1
|
|
655
500
|
vocab_size: 152064
|
|
656
|
-
time: 08:00:00
|
|
657
|
-
resource_type: l40s
|
|
658
|
-
vllm_args:
|
|
659
|
-
--max-model-len: 32768
|
|
660
501
|
Qwen2.5-3B-Instruct:
|
|
661
502
|
model_family: Qwen2.5
|
|
662
503
|
model_variant: 3B-Instruct
|
|
@@ -664,10 +505,6 @@ models:
|
|
|
664
505
|
gpus_per_node: 1
|
|
665
506
|
num_nodes: 1
|
|
666
507
|
vocab_size: 152064
|
|
667
|
-
time: 08:00:00
|
|
668
|
-
resource_type: l40s
|
|
669
|
-
vllm_args:
|
|
670
|
-
--max-model-len: 32768
|
|
671
508
|
Qwen2.5-7B-Instruct:
|
|
672
509
|
model_family: Qwen2.5
|
|
673
510
|
model_variant: 7B-Instruct
|
|
@@ -675,10 +512,6 @@ models:
|
|
|
675
512
|
gpus_per_node: 1
|
|
676
513
|
num_nodes: 1
|
|
677
514
|
vocab_size: 152064
|
|
678
|
-
time: 08:00:00
|
|
679
|
-
resource_type: l40s
|
|
680
|
-
vllm_args:
|
|
681
|
-
--max-model-len: 32768
|
|
682
515
|
Qwen2.5-14B-Instruct:
|
|
683
516
|
model_family: Qwen2.5
|
|
684
517
|
model_variant: 14B-Instruct
|
|
@@ -686,10 +519,6 @@ models:
|
|
|
686
519
|
gpus_per_node: 1
|
|
687
520
|
num_nodes: 1
|
|
688
521
|
vocab_size: 152064
|
|
689
|
-
time: 08:00:00
|
|
690
|
-
resource_type: l40s
|
|
691
|
-
vllm_args:
|
|
692
|
-
--max-model-len: 32768
|
|
693
522
|
Qwen2.5-32B-Instruct:
|
|
694
523
|
model_family: Qwen2.5
|
|
695
524
|
model_variant: 32B-Instruct
|
|
@@ -697,11 +526,10 @@ models:
|
|
|
697
526
|
gpus_per_node: 2
|
|
698
527
|
num_nodes: 1
|
|
699
528
|
vocab_size: 152064
|
|
700
|
-
time: 08:00:00
|
|
701
|
-
resource_type: l40s
|
|
702
529
|
vllm_args:
|
|
703
530
|
--tensor-parallel-size: 2
|
|
704
|
-
|
|
531
|
+
sglang_args:
|
|
532
|
+
--tensor-parallel-size: 2
|
|
705
533
|
Qwen2.5-72B-Instruct:
|
|
706
534
|
model_family: Qwen2.5
|
|
707
535
|
model_variant: 72B-Instruct
|
|
@@ -709,79 +537,78 @@ models:
|
|
|
709
537
|
gpus_per_node: 4
|
|
710
538
|
num_nodes: 1
|
|
711
539
|
vocab_size: 152064
|
|
712
|
-
time: 08:00:00
|
|
713
|
-
resource_type: l40s
|
|
714
540
|
vllm_args:
|
|
715
541
|
--tensor-parallel-size: 4
|
|
716
|
-
|
|
542
|
+
sglang_args:
|
|
543
|
+
--tensor-parallel-size: 4
|
|
717
544
|
Qwen2.5-Math-1.5B-Instruct:
|
|
718
|
-
model_family: Qwen2.5
|
|
719
|
-
model_variant:
|
|
545
|
+
model_family: Qwen2.5-Math
|
|
546
|
+
model_variant: 1.5B-Instruct
|
|
720
547
|
model_type: LLM
|
|
721
548
|
gpus_per_node: 1
|
|
722
549
|
num_nodes: 1
|
|
723
550
|
vocab_size: 152064
|
|
724
|
-
time: 08:00:00
|
|
725
|
-
resource_type: l40s
|
|
726
|
-
vllm_args:
|
|
727
|
-
--max-model-len: 4096
|
|
728
551
|
Qwen2.5-Math-7B-Instruct:
|
|
729
|
-
model_family: Qwen2.5
|
|
730
|
-
model_variant:
|
|
552
|
+
model_family: Qwen2.5-Math
|
|
553
|
+
model_variant: 7B-Instruct
|
|
731
554
|
model_type: LLM
|
|
732
555
|
gpus_per_node: 1
|
|
733
556
|
num_nodes: 1
|
|
734
557
|
vocab_size: 152064
|
|
735
|
-
time: 08:00:00
|
|
736
|
-
resource_type: l40s
|
|
737
|
-
vllm_args:
|
|
738
|
-
--max-model-len: 4096
|
|
739
558
|
Qwen2.5-Math-72B-Instruct:
|
|
740
|
-
model_family: Qwen2.5
|
|
741
|
-
model_variant:
|
|
559
|
+
model_family: Qwen2.5-Math
|
|
560
|
+
model_variant: 72B-Instruct
|
|
742
561
|
model_type: LLM
|
|
743
562
|
gpus_per_node: 4
|
|
744
563
|
num_nodes: 1
|
|
745
564
|
vocab_size: 152064
|
|
746
|
-
time: 08:00:00
|
|
747
|
-
resource_type: l40s
|
|
748
565
|
vllm_args:
|
|
749
566
|
--tensor-parallel-size: 4
|
|
750
|
-
|
|
567
|
+
sglang_args:
|
|
568
|
+
--tensor-parallel-size: 4
|
|
569
|
+
Qwen2.5-Coder-3B-Instruct:
|
|
570
|
+
model_family: Qwen2.5-Coder
|
|
571
|
+
model_variant: 3B-Instruct
|
|
572
|
+
model_type: LLM
|
|
573
|
+
gpus_per_node: 1
|
|
574
|
+
num_nodes: 1
|
|
575
|
+
vocab_size: 152064
|
|
751
576
|
Qwen2.5-Coder-7B-Instruct:
|
|
752
|
-
model_family: Qwen2.5
|
|
753
|
-
model_variant:
|
|
577
|
+
model_family: Qwen2.5-Coder
|
|
578
|
+
model_variant: 7B-Instruct
|
|
754
579
|
model_type: LLM
|
|
755
580
|
gpus_per_node: 1
|
|
756
581
|
num_nodes: 1
|
|
757
582
|
vocab_size: 152064
|
|
758
|
-
time: 08:00:00
|
|
759
|
-
resource_type: l40s
|
|
760
|
-
vllm_args:
|
|
761
|
-
--max-model-len: 32768
|
|
762
583
|
Qwen2.5-Math-RM-72B:
|
|
763
|
-
model_family: Qwen2.5
|
|
764
|
-
model_variant:
|
|
584
|
+
model_family: Qwen2.5-Math-RM
|
|
585
|
+
model_variant: 72B
|
|
765
586
|
model_type: Reward_Modeling
|
|
766
587
|
gpus_per_node: 4
|
|
767
588
|
num_nodes: 1
|
|
768
589
|
vocab_size: 152064
|
|
769
|
-
time: 08:00:00
|
|
770
|
-
resource_type: l40s
|
|
771
590
|
vllm_args:
|
|
772
591
|
--tensor-parallel-size: 4
|
|
773
|
-
|
|
592
|
+
sglang_args:
|
|
593
|
+
--tensor-parallel-size: 4
|
|
774
594
|
Qwen2.5-Math-PRM-7B:
|
|
775
|
-
model_family: Qwen2.5
|
|
776
|
-
model_variant:
|
|
595
|
+
model_family: Qwen2.5-Math-PRM
|
|
596
|
+
model_variant: 7B
|
|
777
597
|
model_type: Reward_Modeling
|
|
778
598
|
gpus_per_node: 1
|
|
779
599
|
num_nodes: 1
|
|
780
600
|
vocab_size: 152064
|
|
781
|
-
|
|
782
|
-
|
|
601
|
+
Qwen2.5-VL-7B-Instruct:
|
|
602
|
+
model_family: Qwen2.5-VL
|
|
603
|
+
model_variant: 7B-Instruct
|
|
604
|
+
model_type: VLM
|
|
605
|
+
gpus_per_node: 1
|
|
606
|
+
num_nodes: 1
|
|
607
|
+
vocab_size: 152064
|
|
783
608
|
vllm_args:
|
|
784
|
-
--max-model-len:
|
|
609
|
+
--max-model-len: 32768
|
|
610
|
+
sglang_args:
|
|
611
|
+
--context-length: 32768
|
|
785
612
|
QwQ-32B:
|
|
786
613
|
model_family: QwQ
|
|
787
614
|
model_variant: 32B
|
|
@@ -789,11 +616,12 @@ models:
|
|
|
789
616
|
gpus_per_node: 2
|
|
790
617
|
num_nodes: 1
|
|
791
618
|
vocab_size: 152064
|
|
792
|
-
time: 08:00:00
|
|
793
|
-
resource_type: l40s
|
|
794
619
|
vllm_args:
|
|
795
620
|
--tensor-parallel-size: 2
|
|
796
621
|
--max-model-len: 32768
|
|
622
|
+
sglang_args:
|
|
623
|
+
--tensor-parallel-size: 2
|
|
624
|
+
--context-length: 32768
|
|
797
625
|
Pixtral-12B-2409:
|
|
798
626
|
model_family: Pixtral
|
|
799
627
|
model_variant: 12B-2409
|
|
@@ -801,10 +629,10 @@ models:
|
|
|
801
629
|
gpus_per_node: 1
|
|
802
630
|
num_nodes: 1
|
|
803
631
|
vocab_size: 131072
|
|
804
|
-
time: 08:00:00
|
|
805
|
-
resource_type: l40s
|
|
806
632
|
vllm_args:
|
|
807
633
|
--max-model-len: 8192
|
|
634
|
+
sglang_args:
|
|
635
|
+
--context-length: 8192
|
|
808
636
|
e5-mistral-7b-instruct:
|
|
809
637
|
model_family: e5
|
|
810
638
|
model_variant: mistral-7b-instruct
|
|
@@ -812,10 +640,6 @@ models:
|
|
|
812
640
|
gpus_per_node: 1
|
|
813
641
|
num_nodes: 1
|
|
814
642
|
vocab_size: 32000
|
|
815
|
-
time: 08:00:00
|
|
816
|
-
resource_type: l40s
|
|
817
|
-
vllm_args:
|
|
818
|
-
--max-model-len: 4096
|
|
819
643
|
bge-base-en-v1.5:
|
|
820
644
|
model_family: bge
|
|
821
645
|
model_variant: base-en-v1.5
|
|
@@ -823,10 +647,6 @@ models:
|
|
|
823
647
|
gpus_per_node: 1
|
|
824
648
|
num_nodes: 1
|
|
825
649
|
vocab_size: 30522
|
|
826
|
-
time: 08:00:00
|
|
827
|
-
resource_type: l40s
|
|
828
|
-
vllm_args:
|
|
829
|
-
--max-model-len: 512
|
|
830
650
|
all-MiniLM-L6-v2:
|
|
831
651
|
model_family: all-MiniLM
|
|
832
652
|
model_variant: L6-v2
|
|
@@ -834,10 +654,6 @@ models:
|
|
|
834
654
|
gpus_per_node: 1
|
|
835
655
|
num_nodes: 1
|
|
836
656
|
vocab_size: 30522
|
|
837
|
-
time: 08:00:00
|
|
838
|
-
resource_type: l40s
|
|
839
|
-
vllm_args:
|
|
840
|
-
--max-model-len: 512
|
|
841
657
|
Llama-3.3-70B-Instruct:
|
|
842
658
|
model_family: Llama-3.3
|
|
843
659
|
model_variant: 70B-Instruct
|
|
@@ -845,11 +661,21 @@ models:
|
|
|
845
661
|
gpus_per_node: 4
|
|
846
662
|
num_nodes: 1
|
|
847
663
|
vocab_size: 128256
|
|
848
|
-
time: 08:00:00
|
|
849
|
-
resource_type: l40s
|
|
850
664
|
vllm_args:
|
|
851
665
|
--tensor-parallel-size: 4
|
|
852
666
|
--max-model-len: 65536
|
|
667
|
+
sglang_args:
|
|
668
|
+
--tensor-parallel-size: 4
|
|
669
|
+
--context-length: 65536
|
|
670
|
+
InternVL2_5-8B:
|
|
671
|
+
model_family: InternVL2_5
|
|
672
|
+
model_variant: 8B
|
|
673
|
+
model_type: VLM
|
|
674
|
+
gpus_per_node: 1
|
|
675
|
+
num_nodes: 1
|
|
676
|
+
vocab_size: 92553
|
|
677
|
+
vllm_args:
|
|
678
|
+
--trust-remote-code: true
|
|
853
679
|
InternVL2_5-26B:
|
|
854
680
|
model_family: InternVL2_5
|
|
855
681
|
model_variant: 26B
|
|
@@ -857,11 +683,11 @@ models:
|
|
|
857
683
|
gpus_per_node: 2
|
|
858
684
|
num_nodes: 1
|
|
859
685
|
vocab_size: 92553
|
|
860
|
-
time: 08:00:00
|
|
861
|
-
resource_type: l40s
|
|
862
686
|
vllm_args:
|
|
863
687
|
--tensor-parallel-size: 2
|
|
864
|
-
--
|
|
688
|
+
--trust-remote-code: true
|
|
689
|
+
sglang_args:
|
|
690
|
+
--tensor-parallel-size: 2
|
|
865
691
|
InternVL2_5-38B:
|
|
866
692
|
model_family: InternVL2_5
|
|
867
693
|
model_variant: 38B
|
|
@@ -869,23 +695,22 @@ models:
|
|
|
869
695
|
gpus_per_node: 4
|
|
870
696
|
num_nodes: 1
|
|
871
697
|
vocab_size: 92553
|
|
872
|
-
time: 08:00:00
|
|
873
|
-
resource_type: l40s
|
|
874
698
|
vllm_args:
|
|
875
699
|
--tensor-parallel-size: 4
|
|
876
|
-
--
|
|
877
|
-
|
|
878
|
-
|
|
879
|
-
|
|
700
|
+
--trust-remote-code: true
|
|
701
|
+
sglang_args:
|
|
702
|
+
--tensor-parallel-size: 4
|
|
703
|
+
aya-expanse-32b:
|
|
704
|
+
model_family: aya-expanse
|
|
705
|
+
model_variant: 32b
|
|
880
706
|
model_type: LLM
|
|
881
707
|
gpus_per_node: 2
|
|
882
708
|
num_nodes: 1
|
|
883
709
|
vocab_size: 256000
|
|
884
|
-
time: 08:00:00
|
|
885
|
-
resource_type: l40s
|
|
886
710
|
vllm_args:
|
|
887
711
|
--tensor-parallel-size: 2
|
|
888
|
-
|
|
712
|
+
sglang_args:
|
|
713
|
+
--tensor-parallel-size: 2
|
|
889
714
|
DeepSeek-R1-Distill-Llama-70B:
|
|
890
715
|
model_family: DeepSeek-R1
|
|
891
716
|
model_variant: Distill-Llama-70B
|
|
@@ -893,11 +718,12 @@ models:
|
|
|
893
718
|
gpus_per_node: 4
|
|
894
719
|
num_nodes: 1
|
|
895
720
|
vocab_size: 128256
|
|
896
|
-
time: 08:00:00
|
|
897
|
-
resource_type: l40s
|
|
898
721
|
vllm_args:
|
|
899
722
|
--tensor-parallel-size: 4
|
|
900
723
|
--max-model-len: 65536
|
|
724
|
+
sglang_args:
|
|
725
|
+
--tensor-parallel-size: 4
|
|
726
|
+
--context-length: 65536
|
|
901
727
|
DeepSeek-R1-Distill-Llama-8B:
|
|
902
728
|
model_family: DeepSeek-R1
|
|
903
729
|
model_variant: Distill-Llama-8B
|
|
@@ -905,10 +731,6 @@ models:
|
|
|
905
731
|
gpus_per_node: 1
|
|
906
732
|
num_nodes: 1
|
|
907
733
|
vocab_size: 128256
|
|
908
|
-
time: 08:00:00
|
|
909
|
-
resource_type: l40s
|
|
910
|
-
vllm_args:
|
|
911
|
-
--max-model-len: 131072
|
|
912
734
|
DeepSeek-R1-Distill-Qwen-32B:
|
|
913
735
|
model_family: DeepSeek-R1
|
|
914
736
|
model_variant: Distill-Qwen-32B
|
|
@@ -916,11 +738,12 @@ models:
|
|
|
916
738
|
gpus_per_node: 2
|
|
917
739
|
num_nodes: 1
|
|
918
740
|
vocab_size: 152064
|
|
919
|
-
time: 08:00:00
|
|
920
|
-
resource_type: l40s
|
|
921
741
|
vllm_args:
|
|
922
742
|
--tensor-parallel-size: 2
|
|
923
743
|
--max-model-len: 65536
|
|
744
|
+
sglang_args:
|
|
745
|
+
--tensor-parallel-size: 2
|
|
746
|
+
--context-length: 65536
|
|
924
747
|
DeepSeek-R1-Distill-Qwen-14B:
|
|
925
748
|
model_family: DeepSeek-R1
|
|
926
749
|
model_variant: Distill-Qwen-14B
|
|
@@ -928,10 +751,10 @@ models:
|
|
|
928
751
|
gpus_per_node: 1
|
|
929
752
|
num_nodes: 1
|
|
930
753
|
vocab_size: 152064
|
|
931
|
-
time: 08:00:00
|
|
932
|
-
resource_type: l40s
|
|
933
754
|
vllm_args:
|
|
934
755
|
--max-model-len: 65536
|
|
756
|
+
sglang_args:
|
|
757
|
+
--context-length: 65536
|
|
935
758
|
DeepSeek-R1-Distill-Qwen-7B:
|
|
936
759
|
model_family: DeepSeek-R1
|
|
937
760
|
model_variant: Distill-Qwen-7B
|
|
@@ -939,10 +762,6 @@ models:
|
|
|
939
762
|
gpus_per_node: 1
|
|
940
763
|
num_nodes: 1
|
|
941
764
|
vocab_size: 152064
|
|
942
|
-
time: 08:00:00
|
|
943
|
-
resource_type: l40s
|
|
944
|
-
vllm_args:
|
|
945
|
-
--max-model-len: 131072
|
|
946
765
|
DeepSeek-R1-Distill-Qwen-1.5B:
|
|
947
766
|
model_family: DeepSeek-R1
|
|
948
767
|
model_variant: Distill-Qwen-1.5B
|
|
@@ -950,10 +769,6 @@ models:
|
|
|
950
769
|
gpus_per_node: 1
|
|
951
770
|
num_nodes: 1
|
|
952
771
|
vocab_size: 152064
|
|
953
|
-
time: 08:00:00
|
|
954
|
-
resource_type: l40s
|
|
955
|
-
vllm_args:
|
|
956
|
-
--max-model-len: 131072
|
|
957
772
|
Phi-3.5-vision-instruct:
|
|
958
773
|
model_family: Phi-3.5-vision
|
|
959
774
|
model_variant: instruct
|
|
@@ -961,22 +776,12 @@ models:
|
|
|
961
776
|
gpus_per_node: 2
|
|
962
777
|
num_nodes: 1
|
|
963
778
|
vocab_size: 32064
|
|
964
|
-
time: 08:00:00
|
|
965
|
-
resource_type: l40s
|
|
966
779
|
vllm_args:
|
|
967
780
|
--tensor-parallel-size: 2
|
|
968
781
|
--max-model-len: 65536
|
|
969
|
-
|
|
970
|
-
|
|
971
|
-
|
|
972
|
-
model_type: VLM
|
|
973
|
-
gpus_per_node: 1
|
|
974
|
-
num_nodes: 1
|
|
975
|
-
vocab_size: 92553
|
|
976
|
-
time: 08:00:00
|
|
977
|
-
resource_type: l40s
|
|
978
|
-
vllm_args:
|
|
979
|
-
--max-model-len: 32768
|
|
782
|
+
sglang_args:
|
|
783
|
+
--tensor-parallel-size: 2
|
|
784
|
+
--context-length: 65536
|
|
980
785
|
glm-4v-9b:
|
|
981
786
|
model_family: glm-4v
|
|
982
787
|
model_variant: 9b
|
|
@@ -984,10 +789,6 @@ models:
|
|
|
984
789
|
gpus_per_node: 1
|
|
985
790
|
num_nodes: 1
|
|
986
791
|
vocab_size: 151552
|
|
987
|
-
time: 08:00:00
|
|
988
|
-
resource_type: l40s
|
|
989
|
-
vllm_args:
|
|
990
|
-
--max-model-len: 8192
|
|
991
792
|
Molmo-7B-D-0924:
|
|
992
793
|
model_family: Molmo
|
|
993
794
|
model_variant: 7B-D-0924
|
|
@@ -995,21 +796,16 @@ models:
|
|
|
995
796
|
gpus_per_node: 1
|
|
996
797
|
num_nodes: 1
|
|
997
798
|
vocab_size: 152064
|
|
998
|
-
time: 08:00:00
|
|
999
|
-
resource_type: l40s
|
|
1000
|
-
vllm_args:
|
|
1001
|
-
--max-model-len: 4096
|
|
1002
799
|
deepseek-vl2:
|
|
1003
800
|
model_family: deepseek-vl2
|
|
1004
801
|
model_type: VLM
|
|
1005
802
|
gpus_per_node: 2
|
|
1006
803
|
num_nodes: 1
|
|
1007
804
|
vocab_size: 129280
|
|
1008
|
-
time: 08:00:00
|
|
1009
|
-
resource_type: l40s
|
|
1010
805
|
vllm_args:
|
|
1011
806
|
--tensor-parallel-size: 2
|
|
1012
|
-
|
|
807
|
+
sglang_args:
|
|
808
|
+
--tensor-parallel-size: 2
|
|
1013
809
|
deepseek-vl2-small:
|
|
1014
810
|
model_family: deepseek-vl2
|
|
1015
811
|
model_variant: small
|
|
@@ -1017,10 +813,6 @@ models:
|
|
|
1017
813
|
gpus_per_node: 1
|
|
1018
814
|
num_nodes: 1
|
|
1019
815
|
vocab_size: 129280
|
|
1020
|
-
time: 08:00:00
|
|
1021
|
-
resource_type: l40s
|
|
1022
|
-
vllm_args:
|
|
1023
|
-
--max-model-len: 4096
|
|
1024
816
|
Qwen3-8B:
|
|
1025
817
|
model_family: Qwen3
|
|
1026
818
|
model_variant: 8B
|
|
@@ -1028,10 +820,6 @@ models:
|
|
|
1028
820
|
gpus_per_node: 1
|
|
1029
821
|
num_nodes: 1
|
|
1030
822
|
vocab_size: 151936
|
|
1031
|
-
time: 08:00:00
|
|
1032
|
-
resource_type: l40s
|
|
1033
|
-
vllm_args:
|
|
1034
|
-
--max-model-len: 40960
|
|
1035
823
|
Qwen3-14B:
|
|
1036
824
|
model_family: Qwen3
|
|
1037
825
|
model_variant: 14B
|
|
@@ -1039,10 +827,6 @@ models:
|
|
|
1039
827
|
gpus_per_node: 1
|
|
1040
828
|
num_nodes: 1
|
|
1041
829
|
vocab_size: 151936
|
|
1042
|
-
time: 08:00:00
|
|
1043
|
-
resource_type: l40s
|
|
1044
|
-
vllm_args:
|
|
1045
|
-
--max-model-len: 40960
|
|
1046
830
|
Qwen3-32B:
|
|
1047
831
|
model_family: Qwen3
|
|
1048
832
|
model_variant: 32B
|
|
@@ -1050,21 +834,53 @@ models:
|
|
|
1050
834
|
gpus_per_node: 2
|
|
1051
835
|
num_nodes: 1
|
|
1052
836
|
vocab_size: 151936
|
|
1053
|
-
time: 08:00:00
|
|
1054
|
-
resource_type: l40s
|
|
1055
837
|
vllm_args:
|
|
1056
838
|
--tensor-parallel-size: 2
|
|
1057
|
-
|
|
839
|
+
sglang_args:
|
|
840
|
+
--tensor-parallel-size: 2
|
|
1058
841
|
gpt-oss-120b:
|
|
1059
842
|
model_family: gpt-oss
|
|
1060
843
|
model_variant: 120b
|
|
1061
844
|
model_type: LLM
|
|
1062
|
-
gpus_per_node:
|
|
1063
|
-
num_nodes:
|
|
845
|
+
gpus_per_node: 2
|
|
846
|
+
num_nodes: 1
|
|
1064
847
|
vocab_size: 201088
|
|
1065
|
-
|
|
1066
|
-
|
|
848
|
+
vllm_args:
|
|
849
|
+
--tensor-parallel-size: 2
|
|
850
|
+
sglang_args:
|
|
851
|
+
--tensor-parallel-size: 2
|
|
852
|
+
Llama-4-Maverick-17B-128E-Instruct:
|
|
853
|
+
model_family: Llama-4
|
|
854
|
+
model_variant: Maverick-17B-128E-Instruct
|
|
855
|
+
model_type: VLM
|
|
856
|
+
gpus_per_node: 4
|
|
857
|
+
num_nodes: 4
|
|
858
|
+
resource_type: h100
|
|
859
|
+
cpus_per_task: 6
|
|
860
|
+
mem-per-node: 60G
|
|
861
|
+
vocab_size: 202048
|
|
862
|
+
time: 03:00:00
|
|
1067
863
|
vllm_args:
|
|
1068
864
|
--tensor-parallel-size: 4
|
|
1069
|
-
--pipeline-parallel-size:
|
|
1070
|
-
|
|
865
|
+
--pipeline-parallel-size: 4
|
|
866
|
+
sglang_args:
|
|
867
|
+
--tensor-parallel-size: 4
|
|
868
|
+
--pipeline-parallel-size: 4
|
|
869
|
+
medgemma-4b-it:
|
|
870
|
+
model_family: medgemma
|
|
871
|
+
model_variant: 4b-it
|
|
872
|
+
model_type: VLM
|
|
873
|
+
gpus_per_node: 1
|
|
874
|
+
num_nodes: 1
|
|
875
|
+
vocab_size: 262208
|
|
876
|
+
medgemma-27b-it:
|
|
877
|
+
model_family: medgemma
|
|
878
|
+
model_variant: 27b-it
|
|
879
|
+
model_type: VLM
|
|
880
|
+
gpus_per_node: 2
|
|
881
|
+
num_nodes: 1
|
|
882
|
+
vocab_size: 262208
|
|
883
|
+
vllm_args:
|
|
884
|
+
--tensor-parallel-size: 2
|
|
885
|
+
sglang_args:
|
|
886
|
+
--tensor-parallel-size: 2
|