vec-inf 0.7.0__py3-none-any.whl → 0.7.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vec_inf/cli/_cli.py +15 -1
- vec_inf/cli/_helper.py +44 -19
- vec_inf/client/_client_vars.py +0 -7
- vec_inf/client/_helper.py +66 -26
- vec_inf/client/_slurm_script_generator.py +36 -19
- vec_inf/client/_slurm_templates.py +20 -3
- vec_inf/client/_slurm_vars.py +4 -0
- vec_inf/client/_utils.py +56 -7
- vec_inf/client/api.py +8 -2
- vec_inf/client/models.py +6 -0
- vec_inf/config/environment.yaml +4 -0
- vec_inf/config/models.yaml +48 -99
- {vec_inf-0.7.0.dist-info → vec_inf-0.7.2.dist-info}/METADATA +25 -6
- vec_inf-0.7.2.dist-info/RECORD +27 -0
- vec_inf-0.7.0.dist-info/RECORD +0 -27
- {vec_inf-0.7.0.dist-info → vec_inf-0.7.2.dist-info}/WHEEL +0 -0
- {vec_inf-0.7.0.dist-info → vec_inf-0.7.2.dist-info}/entry_points.txt +0 -0
- {vec_inf-0.7.0.dist-info → vec_inf-0.7.2.dist-info}/licenses/LICENSE +0 -0
vec_inf/config/models.yaml
CHANGED
|
@@ -12,7 +12,6 @@ models:
|
|
|
12
12
|
--pipeline-parallel-size: 2
|
|
13
13
|
--tensor-parallel-size: 4
|
|
14
14
|
--max-model-len: 65536
|
|
15
|
-
--max-num-seqs: 256
|
|
16
15
|
c4ai-command-r-08-2024:
|
|
17
16
|
model_family: c4ai-command-r
|
|
18
17
|
model_variant: 08-2024
|
|
@@ -25,7 +24,6 @@ models:
|
|
|
25
24
|
vllm_args:
|
|
26
25
|
--tensor-parallel-size: 2
|
|
27
26
|
--max-model-len: 32768
|
|
28
|
-
--max-num-seqs: 256
|
|
29
27
|
CodeLlama-7b-hf:
|
|
30
28
|
model_family: CodeLlama
|
|
31
29
|
model_variant: 7b-hf
|
|
@@ -37,7 +35,6 @@ models:
|
|
|
37
35
|
resource_type: l40s
|
|
38
36
|
vllm_args:
|
|
39
37
|
--max-model-len: 16384
|
|
40
|
-
--max-num-seqs: 256
|
|
41
38
|
CodeLlama-7b-Instruct-hf:
|
|
42
39
|
model_family: CodeLlama
|
|
43
40
|
model_variant: 7b-Instruct-hf
|
|
@@ -49,7 +46,6 @@ models:
|
|
|
49
46
|
resource_type: l40s
|
|
50
47
|
vllm_args:
|
|
51
48
|
--max-model-len: 16384
|
|
52
|
-
--max-num-seqs: 256
|
|
53
49
|
CodeLlama-13b-hf:
|
|
54
50
|
model_family: CodeLlama
|
|
55
51
|
model_variant: 13b-hf
|
|
@@ -61,7 +57,6 @@ models:
|
|
|
61
57
|
resource_type: l40s
|
|
62
58
|
vllm_args:
|
|
63
59
|
--max-model-len: 16384
|
|
64
|
-
--max-num-seqs: 256
|
|
65
60
|
CodeLlama-13b-Instruct-hf:
|
|
66
61
|
model_family: CodeLlama
|
|
67
62
|
model_variant: 13b-Instruct-hf
|
|
@@ -73,7 +68,6 @@ models:
|
|
|
73
68
|
resource_type: l40s
|
|
74
69
|
vllm_args:
|
|
75
70
|
--max-model-len: 16384
|
|
76
|
-
--max-num-seqs: 256
|
|
77
71
|
CodeLlama-34b-hf:
|
|
78
72
|
model_family: CodeLlama
|
|
79
73
|
model_variant: 34b-hf
|
|
@@ -86,7 +80,6 @@ models:
|
|
|
86
80
|
vllm_args:
|
|
87
81
|
--tensor-parallel-size: 2
|
|
88
82
|
--max-model-len: 16384
|
|
89
|
-
--max-num-seqs: 256
|
|
90
83
|
CodeLlama-34b-Instruct-hf:
|
|
91
84
|
model_family: CodeLlama
|
|
92
85
|
model_variant: 34b-Instruct-hf
|
|
@@ -99,7 +92,6 @@ models:
|
|
|
99
92
|
vllm_args:
|
|
100
93
|
--tensor-parallel-size: 2
|
|
101
94
|
--max-model-len: 16384
|
|
102
|
-
--max-num-seqs: 256
|
|
103
95
|
CodeLlama-70b-hf:
|
|
104
96
|
model_family: CodeLlama
|
|
105
97
|
model_variant: 70b-hf
|
|
@@ -112,7 +104,6 @@ models:
|
|
|
112
104
|
vllm_args:
|
|
113
105
|
--tensor-parallel-size: 4
|
|
114
106
|
--max-model-len: 4096
|
|
115
|
-
--max-num-seqs: 256
|
|
116
107
|
CodeLlama-70b-Instruct-hf:
|
|
117
108
|
model_family: CodeLlama
|
|
118
109
|
model_variant: 70b-Instruct-hf
|
|
@@ -125,7 +116,17 @@ models:
|
|
|
125
116
|
vllm_args:
|
|
126
117
|
--tensor-parallel-size: 4
|
|
127
118
|
--max-model-len: 4096
|
|
128
|
-
|
|
119
|
+
gemma-2-2b-it:
|
|
120
|
+
model_family: gemma-2
|
|
121
|
+
model_variant: 2b-it
|
|
122
|
+
model_type: LLM
|
|
123
|
+
gpus_per_node: 1
|
|
124
|
+
num_nodes: 1
|
|
125
|
+
vocab_size: 256000
|
|
126
|
+
time: 08:00:00
|
|
127
|
+
resource_type: l40s
|
|
128
|
+
vllm_args:
|
|
129
|
+
--max-model-len: 4096
|
|
129
130
|
gemma-2-9b:
|
|
130
131
|
model_family: gemma-2
|
|
131
132
|
model_variant: 9b
|
|
@@ -137,7 +138,6 @@ models:
|
|
|
137
138
|
resource_type: l40s
|
|
138
139
|
vllm_args:
|
|
139
140
|
--max-model-len: 4096
|
|
140
|
-
--max-num-seqs: 256
|
|
141
141
|
gemma-2-9b-it:
|
|
142
142
|
model_family: gemma-2
|
|
143
143
|
model_variant: 9b-it
|
|
@@ -149,7 +149,6 @@ models:
|
|
|
149
149
|
resource_type: l40s
|
|
150
150
|
vllm_args:
|
|
151
151
|
--max-model-len: 4096
|
|
152
|
-
--max-num-seqs: 256
|
|
153
152
|
gemma-2-27b:
|
|
154
153
|
model_family: gemma-2
|
|
155
154
|
model_variant: 27b
|
|
@@ -162,7 +161,6 @@ models:
|
|
|
162
161
|
vllm_args:
|
|
163
162
|
--tensor-parallel-size: 2
|
|
164
163
|
--max-model-len: 4096
|
|
165
|
-
--max-num-seqs: 256
|
|
166
164
|
gemma-2-27b-it:
|
|
167
165
|
model_family: gemma-2
|
|
168
166
|
model_variant: 27b-it
|
|
@@ -175,7 +173,6 @@ models:
|
|
|
175
173
|
vllm_args:
|
|
176
174
|
--tensor-parallel-size: 2
|
|
177
175
|
--max-model-len: 4096
|
|
178
|
-
--max-num-seqs: 256
|
|
179
176
|
Llama-2-7b-hf:
|
|
180
177
|
model_family: Llama-2
|
|
181
178
|
model_variant: 7b-hf
|
|
@@ -187,7 +184,6 @@ models:
|
|
|
187
184
|
resource_type: l40s
|
|
188
185
|
vllm_args:
|
|
189
186
|
--max-model-len: 4096
|
|
190
|
-
--max-num-seqs: 256
|
|
191
187
|
Llama-2-7b-chat-hf:
|
|
192
188
|
model_family: Llama-2
|
|
193
189
|
model_variant: 7b-chat-hf
|
|
@@ -199,7 +195,6 @@ models:
|
|
|
199
195
|
resource_type: l40s
|
|
200
196
|
vllm_args:
|
|
201
197
|
--max-model-len: 4096
|
|
202
|
-
--max-num-seqs: 256
|
|
203
198
|
Llama-2-13b-hf:
|
|
204
199
|
model_family: Llama-2
|
|
205
200
|
model_variant: 13b-hf
|
|
@@ -211,7 +206,6 @@ models:
|
|
|
211
206
|
resource_type: l40s
|
|
212
207
|
vllm_args:
|
|
213
208
|
--max-model-len: 4096
|
|
214
|
-
--max-num-seqs: 256
|
|
215
209
|
Llama-2-13b-chat-hf:
|
|
216
210
|
model_family: Llama-2
|
|
217
211
|
model_variant: 13b-chat-hf
|
|
@@ -223,7 +217,6 @@ models:
|
|
|
223
217
|
resource_type: l40s
|
|
224
218
|
vllm_args:
|
|
225
219
|
--max-model-len: 4096
|
|
226
|
-
--max-num-seqs: 256
|
|
227
220
|
Llama-2-70b-hf:
|
|
228
221
|
model_family: Llama-2
|
|
229
222
|
model_variant: 70b-hf
|
|
@@ -236,7 +229,6 @@ models:
|
|
|
236
229
|
vllm_args:
|
|
237
230
|
--tensor-parallel-size: 4
|
|
238
231
|
--max-model-len: 4096
|
|
239
|
-
--max-num-seqs: 256
|
|
240
232
|
Llama-2-70b-chat-hf:
|
|
241
233
|
model_family: Llama-2
|
|
242
234
|
model_variant: 70b-chat-hf
|
|
@@ -249,7 +241,6 @@ models:
|
|
|
249
241
|
vllm_args:
|
|
250
242
|
--tensor-parallel-size: 4
|
|
251
243
|
--max-model-len: 4096
|
|
252
|
-
--max-num-seqs: 256
|
|
253
244
|
llava-1.5-7b-hf:
|
|
254
245
|
model_family: llava-1.5
|
|
255
246
|
model_variant: 7b-hf
|
|
@@ -261,7 +252,6 @@ models:
|
|
|
261
252
|
resource_type: l40s
|
|
262
253
|
vllm_args:
|
|
263
254
|
--max-model-len: 4096
|
|
264
|
-
--max-num-seqs: 256
|
|
265
255
|
llava-1.5-13b-hf:
|
|
266
256
|
model_family: llava-1.5
|
|
267
257
|
model_variant: 13b-hf
|
|
@@ -273,7 +263,6 @@ models:
|
|
|
273
263
|
resource_type: l40s
|
|
274
264
|
vllm_args:
|
|
275
265
|
--max-model-len: 4096
|
|
276
|
-
--max-num-seqs: 256
|
|
277
266
|
llava-v1.6-mistral-7b-hf:
|
|
278
267
|
model_family: llava-v1.6
|
|
279
268
|
model_variant: mistral-7b-hf
|
|
@@ -285,7 +274,6 @@ models:
|
|
|
285
274
|
resource_type: l40s
|
|
286
275
|
vllm_args:
|
|
287
276
|
--max-model-len: 32768
|
|
288
|
-
--max-num-seqs: 256
|
|
289
277
|
llava-v1.6-34b-hf:
|
|
290
278
|
model_family: llava-v1.6
|
|
291
279
|
model_variant: 34b-hf
|
|
@@ -298,7 +286,6 @@ models:
|
|
|
298
286
|
vllm_args:
|
|
299
287
|
--tensor-parallel-size: 2
|
|
300
288
|
--max-model-len: 4096
|
|
301
|
-
--max-num-seqs: 256
|
|
302
289
|
Meta-Llama-3-8B:
|
|
303
290
|
model_family: Meta-Llama-3
|
|
304
291
|
model_variant: 8B
|
|
@@ -310,7 +297,6 @@ models:
|
|
|
310
297
|
resource_type: l40s
|
|
311
298
|
vllm_args:
|
|
312
299
|
--max-model-len: 8192
|
|
313
|
-
--max-num-seqs: 256
|
|
314
300
|
Meta-Llama-3-8B-Instruct:
|
|
315
301
|
model_family: Meta-Llama-3
|
|
316
302
|
model_variant: 8B-Instruct
|
|
@@ -322,7 +308,6 @@ models:
|
|
|
322
308
|
resource_type: l40s
|
|
323
309
|
vllm_args:
|
|
324
310
|
--max-model-len: 8192
|
|
325
|
-
--max-num-seqs: 256
|
|
326
311
|
Meta-Llama-3-70B:
|
|
327
312
|
model_family: Meta-Llama-3
|
|
328
313
|
model_variant: 70B
|
|
@@ -335,7 +320,6 @@ models:
|
|
|
335
320
|
vllm_args:
|
|
336
321
|
--tensor-parallel-size: 4
|
|
337
322
|
--max-model-len: 8192
|
|
338
|
-
--max-num-seqs: 256
|
|
339
323
|
Meta-Llama-3-70B-Instruct:
|
|
340
324
|
model_family: Meta-Llama-3
|
|
341
325
|
model_variant: 70B-Instruct
|
|
@@ -348,7 +332,6 @@ models:
|
|
|
348
332
|
vllm_args:
|
|
349
333
|
--tensor-parallel-size: 4
|
|
350
334
|
--max-model-len: 8192
|
|
351
|
-
--max-num-seqs: 256
|
|
352
335
|
Meta-Llama-3.1-8B:
|
|
353
336
|
model_family: Meta-Llama-3.1
|
|
354
337
|
model_variant: 8B
|
|
@@ -360,7 +343,6 @@ models:
|
|
|
360
343
|
resource_type: l40s
|
|
361
344
|
vllm_args:
|
|
362
345
|
--max-model-len: 131072
|
|
363
|
-
--max-num-seqs: 256
|
|
364
346
|
Meta-Llama-3.1-8B-Instruct:
|
|
365
347
|
model_family: Meta-Llama-3.1
|
|
366
348
|
model_variant: 8B-Instruct
|
|
@@ -372,7 +354,6 @@ models:
|
|
|
372
354
|
resource_type: l40s
|
|
373
355
|
vllm_args:
|
|
374
356
|
--max-model-len: 131072
|
|
375
|
-
--max-num-seqs: 256
|
|
376
357
|
Meta-Llama-3.1-70B:
|
|
377
358
|
model_family: Meta-Llama-3.1
|
|
378
359
|
model_variant: 70B
|
|
@@ -385,7 +366,6 @@ models:
|
|
|
385
366
|
vllm_args:
|
|
386
367
|
--tensor-parallel-size: 4
|
|
387
368
|
--max-model-len: 65536
|
|
388
|
-
--max-num-seqs: 256
|
|
389
369
|
Meta-Llama-3.1-70B-Instruct:
|
|
390
370
|
model_family: Meta-Llama-3.1
|
|
391
371
|
model_variant: 70B-Instruct
|
|
@@ -398,7 +378,6 @@ models:
|
|
|
398
378
|
vllm_args:
|
|
399
379
|
--tensor-parallel-size: 4
|
|
400
380
|
--max-model-len: 65536
|
|
401
|
-
--max-num-seqs: 256
|
|
402
381
|
Meta-Llama-3.1-405B-Instruct:
|
|
403
382
|
model_family: Meta-Llama-3.1
|
|
404
383
|
model_variant: 405B-Instruct
|
|
@@ -406,14 +385,12 @@ models:
|
|
|
406
385
|
gpus_per_node: 4
|
|
407
386
|
num_nodes: 8
|
|
408
387
|
vocab_size: 128256
|
|
409
|
-
|
|
410
|
-
time: 02:00:00
|
|
388
|
+
time: 08:00:00
|
|
411
389
|
resource_type: l40s
|
|
412
390
|
vllm_args:
|
|
413
391
|
--pipeline-parallel-size: 8
|
|
414
392
|
--tensor-parallel-size: 4
|
|
415
393
|
--max-model-len: 16384
|
|
416
|
-
--max-num-seqs: 256
|
|
417
394
|
Mistral-7B-Instruct-v0.1:
|
|
418
395
|
model_family: Mistral
|
|
419
396
|
model_variant: 7B-Instruct-v0.1
|
|
@@ -425,7 +402,6 @@ models:
|
|
|
425
402
|
resource_type: l40s
|
|
426
403
|
vllm_args:
|
|
427
404
|
--max-model-len: 32768
|
|
428
|
-
--max-num-seqs: 256
|
|
429
405
|
Mistral-7B-Instruct-v0.2:
|
|
430
406
|
model_family: Mistral
|
|
431
407
|
model_variant: 7B-Instruct-v0.2
|
|
@@ -437,7 +413,6 @@ models:
|
|
|
437
413
|
resource_type: l40s
|
|
438
414
|
vllm_args:
|
|
439
415
|
--max-model-len: 32768
|
|
440
|
-
--max-num-seqs: 256
|
|
441
416
|
Mistral-7B-v0.3:
|
|
442
417
|
model_family: Mistral
|
|
443
418
|
model_variant: 7B-v0.3
|
|
@@ -449,7 +424,6 @@ models:
|
|
|
449
424
|
resource_type: l40s
|
|
450
425
|
vllm_args:
|
|
451
426
|
--max-model-len: 32768
|
|
452
|
-
--max-num-seqs: 256
|
|
453
427
|
Mistral-7B-Instruct-v0.3:
|
|
454
428
|
model_family: Mistral
|
|
455
429
|
model_variant: 7B-Instruct-v0.3
|
|
@@ -461,7 +435,6 @@ models:
|
|
|
461
435
|
resource_type: l40s
|
|
462
436
|
vllm_args:
|
|
463
437
|
--max-model-len: 32768
|
|
464
|
-
--max-num-seqs: 256
|
|
465
438
|
Mistral-Large-Instruct-2407:
|
|
466
439
|
model_family: Mistral
|
|
467
440
|
model_variant: Large-Instruct-2407
|
|
@@ -475,7 +448,6 @@ models:
|
|
|
475
448
|
--pipeline-parallel-size: 2
|
|
476
449
|
--tensor-parallel-size: 4
|
|
477
450
|
--max-model-len: 32768
|
|
478
|
-
--max-num-seqs: 256
|
|
479
451
|
Mistral-Large-Instruct-2411:
|
|
480
452
|
model_family: Mistral
|
|
481
453
|
model_variant: Large-Instruct-2411
|
|
@@ -489,7 +461,6 @@ models:
|
|
|
489
461
|
--pipeline-parallel-size: 2
|
|
490
462
|
--tensor-parallel-size: 4
|
|
491
463
|
--max-model-len: 32768
|
|
492
|
-
--max-num-seqs: 256
|
|
493
464
|
Mixtral-8x7B-Instruct-v0.1:
|
|
494
465
|
model_family: Mixtral
|
|
495
466
|
model_variant: 8x7B-Instruct-v0.1
|
|
@@ -502,7 +473,6 @@ models:
|
|
|
502
473
|
vllm_args:
|
|
503
474
|
--tensor-parallel-size: 4
|
|
504
475
|
--max-model-len: 32768
|
|
505
|
-
--max-num-seqs: 256
|
|
506
476
|
Mixtral-8x22B-v0.1:
|
|
507
477
|
model_family: Mixtral
|
|
508
478
|
model_variant: 8x22B-v0.1
|
|
@@ -516,7 +486,6 @@ models:
|
|
|
516
486
|
--pipeline-parallel-size: 2
|
|
517
487
|
--tensor-parallel-size: 4
|
|
518
488
|
--max-model-len: 65536
|
|
519
|
-
--max-num-seqs: 256
|
|
520
489
|
Mixtral-8x22B-Instruct-v0.1:
|
|
521
490
|
model_family: Mixtral
|
|
522
491
|
model_variant: 8x22B-Instruct-v0.1
|
|
@@ -530,7 +499,6 @@ models:
|
|
|
530
499
|
--pipeline-parallel-size: 2
|
|
531
500
|
--tensor-parallel-size: 4
|
|
532
501
|
--max-model-len: 65536
|
|
533
|
-
--max-num-seqs: 256
|
|
534
502
|
Phi-3-medium-128k-instruct:
|
|
535
503
|
model_family: Phi-3
|
|
536
504
|
model_variant: medium-128k-instruct
|
|
@@ -543,7 +511,6 @@ models:
|
|
|
543
511
|
vllm_args:
|
|
544
512
|
--tensor-parallel-size: 2
|
|
545
513
|
--max-model-len: 131072
|
|
546
|
-
--max-num-seqs: 256
|
|
547
514
|
Phi-3-vision-128k-instruct:
|
|
548
515
|
model_family: Phi-3-vision
|
|
549
516
|
model_variant: 128k-instruct
|
|
@@ -556,20 +523,6 @@ models:
|
|
|
556
523
|
vllm_args:
|
|
557
524
|
--tensor-parallel-size: 2
|
|
558
525
|
--max-model-len: 65536
|
|
559
|
-
--max-num-seqs: 256
|
|
560
|
-
Llama3-OpenBioLLM-70B:
|
|
561
|
-
model_family: Llama3-OpenBioLLM
|
|
562
|
-
model_variant: 70B
|
|
563
|
-
model_type: LLM
|
|
564
|
-
gpus_per_node: 4
|
|
565
|
-
num_nodes: 1
|
|
566
|
-
vocab_size: 128256
|
|
567
|
-
time: 08:00:00
|
|
568
|
-
resource_type: l40s
|
|
569
|
-
vllm_args:
|
|
570
|
-
--tensor-parallel-size: 4
|
|
571
|
-
--max-model-len: 8192
|
|
572
|
-
--max-num-seqs: 256
|
|
573
526
|
Llama-3.1-Nemotron-70B-Instruct-HF:
|
|
574
527
|
model_family: Llama-3.1-Nemotron
|
|
575
528
|
model_variant: 70B-Instruct-HF
|
|
@@ -582,7 +535,6 @@ models:
|
|
|
582
535
|
vllm_args:
|
|
583
536
|
--tensor-parallel-size: 4
|
|
584
537
|
--max-model-len: 65536
|
|
585
|
-
--max-num-seqs: 256
|
|
586
538
|
Llama-3.2-1B:
|
|
587
539
|
model_family: Llama-3.2
|
|
588
540
|
model_variant: 1B
|
|
@@ -594,7 +546,6 @@ models:
|
|
|
594
546
|
resource_type: l40s
|
|
595
547
|
vllm_args:
|
|
596
548
|
--max-model-len: 131072
|
|
597
|
-
--max-num-seqs: 256
|
|
598
549
|
Llama-3.2-1B-Instruct:
|
|
599
550
|
model_family: Llama-3.2
|
|
600
551
|
model_variant: 1B-Instruct
|
|
@@ -606,7 +557,6 @@ models:
|
|
|
606
557
|
resource_type: l40s
|
|
607
558
|
vllm_args:
|
|
608
559
|
--max-model-len: 131072
|
|
609
|
-
--max-num-seqs: 256
|
|
610
560
|
Llama-3.2-3B:
|
|
611
561
|
model_family: Llama-3.2
|
|
612
562
|
model_variant: 3B
|
|
@@ -618,7 +568,6 @@ models:
|
|
|
618
568
|
resource_type: l40s
|
|
619
569
|
vllm_args:
|
|
620
570
|
--max-model-len: 131072
|
|
621
|
-
--max-num-seqs: 256
|
|
622
571
|
Llama-3.2-3B-Instruct:
|
|
623
572
|
model_family: Llama-3.2
|
|
624
573
|
model_variant: 3B-Instruct
|
|
@@ -630,7 +579,6 @@ models:
|
|
|
630
579
|
resource_type: l40s
|
|
631
580
|
vllm_args:
|
|
632
581
|
--max-model-len: 131072
|
|
633
|
-
--max-num-seqs: 256
|
|
634
582
|
Llama-3.2-11B-Vision:
|
|
635
583
|
model_family: Llama-3.2
|
|
636
584
|
model_variant: 11B-Vision
|
|
@@ -698,7 +646,6 @@ models:
|
|
|
698
646
|
resource_type: l40s
|
|
699
647
|
vllm_args:
|
|
700
648
|
--max-model-len: 32768
|
|
701
|
-
--max-num-seqs: 256
|
|
702
649
|
Qwen2.5-1.5B-Instruct:
|
|
703
650
|
model_family: Qwen2.5
|
|
704
651
|
model_variant: 1.5B-Instruct
|
|
@@ -710,7 +657,6 @@ models:
|
|
|
710
657
|
resource_type: l40s
|
|
711
658
|
vllm_args:
|
|
712
659
|
--max-model-len: 32768
|
|
713
|
-
--max-num-seqs: 256
|
|
714
660
|
Qwen2.5-3B-Instruct:
|
|
715
661
|
model_family: Qwen2.5
|
|
716
662
|
model_variant: 3B-Instruct
|
|
@@ -722,7 +668,6 @@ models:
|
|
|
722
668
|
resource_type: l40s
|
|
723
669
|
vllm_args:
|
|
724
670
|
--max-model-len: 32768
|
|
725
|
-
--max-num-seqs: 256
|
|
726
671
|
Qwen2.5-7B-Instruct:
|
|
727
672
|
model_family: Qwen2.5
|
|
728
673
|
model_variant: 7B-Instruct
|
|
@@ -734,7 +679,6 @@ models:
|
|
|
734
679
|
resource_type: l40s
|
|
735
680
|
vllm_args:
|
|
736
681
|
--max-model-len: 32768
|
|
737
|
-
--max-num-seqs: 256
|
|
738
682
|
Qwen2.5-14B-Instruct:
|
|
739
683
|
model_family: Qwen2.5
|
|
740
684
|
model_variant: 14B-Instruct
|
|
@@ -746,7 +690,6 @@ models:
|
|
|
746
690
|
resource_type: l40s
|
|
747
691
|
vllm_args:
|
|
748
692
|
--max-model-len: 32768
|
|
749
|
-
--max-num-seqs: 256
|
|
750
693
|
Qwen2.5-32B-Instruct:
|
|
751
694
|
model_family: Qwen2.5
|
|
752
695
|
model_variant: 32B-Instruct
|
|
@@ -759,7 +702,6 @@ models:
|
|
|
759
702
|
vllm_args:
|
|
760
703
|
--tensor-parallel-size: 2
|
|
761
704
|
--max-model-len: 32768
|
|
762
|
-
--max-num-seqs: 256
|
|
763
705
|
Qwen2.5-72B-Instruct:
|
|
764
706
|
model_family: Qwen2.5
|
|
765
707
|
model_variant: 72B-Instruct
|
|
@@ -772,7 +714,6 @@ models:
|
|
|
772
714
|
vllm_args:
|
|
773
715
|
--tensor-parallel-size: 4
|
|
774
716
|
--max-model-len: 16384
|
|
775
|
-
--max-num-seqs: 256
|
|
776
717
|
Qwen2.5-Math-1.5B-Instruct:
|
|
777
718
|
model_family: Qwen2.5
|
|
778
719
|
model_variant: Math-1.5B-Instruct
|
|
@@ -784,7 +725,6 @@ models:
|
|
|
784
725
|
resource_type: l40s
|
|
785
726
|
vllm_args:
|
|
786
727
|
--max-model-len: 4096
|
|
787
|
-
--max-num-seqs: 256
|
|
788
728
|
Qwen2.5-Math-7B-Instruct:
|
|
789
729
|
model_family: Qwen2.5
|
|
790
730
|
model_variant: Math-7B-Instruct
|
|
@@ -796,7 +736,6 @@ models:
|
|
|
796
736
|
resource_type: l40s
|
|
797
737
|
vllm_args:
|
|
798
738
|
--max-model-len: 4096
|
|
799
|
-
--max-num-seqs: 256
|
|
800
739
|
Qwen2.5-Math-72B-Instruct:
|
|
801
740
|
model_family: Qwen2.5
|
|
802
741
|
model_variant: Math-72B-Instruct
|
|
@@ -809,7 +748,6 @@ models:
|
|
|
809
748
|
vllm_args:
|
|
810
749
|
--tensor-parallel-size: 4
|
|
811
750
|
--max-model-len: 4096
|
|
812
|
-
--max-num-seqs: 256
|
|
813
751
|
Qwen2.5-Coder-7B-Instruct:
|
|
814
752
|
model_family: Qwen2.5
|
|
815
753
|
model_variant: Coder-7B-Instruct
|
|
@@ -821,7 +759,6 @@ models:
|
|
|
821
759
|
resource_type: l40s
|
|
822
760
|
vllm_args:
|
|
823
761
|
--max-model-len: 32768
|
|
824
|
-
--max-num-seqs: 256
|
|
825
762
|
Qwen2.5-Math-RM-72B:
|
|
826
763
|
model_family: Qwen2.5
|
|
827
764
|
model_variant: Math-RM-72B
|
|
@@ -834,7 +771,6 @@ models:
|
|
|
834
771
|
vllm_args:
|
|
835
772
|
--tensor-parallel-size: 4
|
|
836
773
|
--max-model-len: 4096
|
|
837
|
-
--max-num-seqs: 256
|
|
838
774
|
Qwen2.5-Math-PRM-7B:
|
|
839
775
|
model_family: Qwen2.5
|
|
840
776
|
model_variant: Math-PRM-7B
|
|
@@ -846,7 +782,6 @@ models:
|
|
|
846
782
|
resource_type: l40s
|
|
847
783
|
vllm_args:
|
|
848
784
|
--max-model-len: 4096
|
|
849
|
-
--max-num-seqs: 256
|
|
850
785
|
QwQ-32B:
|
|
851
786
|
model_family: QwQ
|
|
852
787
|
model_variant: 32B
|
|
@@ -859,7 +794,6 @@ models:
|
|
|
859
794
|
vllm_args:
|
|
860
795
|
--tensor-parallel-size: 2
|
|
861
796
|
--max-model-len: 32768
|
|
862
|
-
--max-num-seqs: 256
|
|
863
797
|
Pixtral-12B-2409:
|
|
864
798
|
model_family: Pixtral
|
|
865
799
|
model_variant: 12B-2409
|
|
@@ -871,7 +805,6 @@ models:
|
|
|
871
805
|
resource_type: l40s
|
|
872
806
|
vllm_args:
|
|
873
807
|
--max-model-len: 8192
|
|
874
|
-
--max-num-seqs: 256
|
|
875
808
|
e5-mistral-7b-instruct:
|
|
876
809
|
model_family: e5
|
|
877
810
|
model_variant: mistral-7b-instruct
|
|
@@ -883,7 +816,6 @@ models:
|
|
|
883
816
|
resource_type: l40s
|
|
884
817
|
vllm_args:
|
|
885
818
|
--max-model-len: 4096
|
|
886
|
-
--max-num-seqs: 256
|
|
887
819
|
bge-base-en-v1.5:
|
|
888
820
|
model_family: bge
|
|
889
821
|
model_variant: base-en-v1.5
|
|
@@ -895,7 +827,6 @@ models:
|
|
|
895
827
|
resource_type: l40s
|
|
896
828
|
vllm_args:
|
|
897
829
|
--max-model-len: 512
|
|
898
|
-
--max-num-seqs: 256
|
|
899
830
|
all-MiniLM-L6-v2:
|
|
900
831
|
model_family: all-MiniLM
|
|
901
832
|
model_variant: L6-v2
|
|
@@ -907,7 +838,6 @@ models:
|
|
|
907
838
|
resource_type: l40s
|
|
908
839
|
vllm_args:
|
|
909
840
|
--max-model-len: 512
|
|
910
|
-
--max-num-seqs: 256
|
|
911
841
|
Llama-3.3-70B-Instruct:
|
|
912
842
|
model_family: Llama-3.3
|
|
913
843
|
model_variant: 70B-Instruct
|
|
@@ -920,7 +850,6 @@ models:
|
|
|
920
850
|
vllm_args:
|
|
921
851
|
--tensor-parallel-size: 4
|
|
922
852
|
--max-model-len: 65536
|
|
923
|
-
--max-num-seqs: 256
|
|
924
853
|
InternVL2_5-26B:
|
|
925
854
|
model_family: InternVL2_5
|
|
926
855
|
model_variant: 26B
|
|
@@ -933,7 +862,6 @@ models:
|
|
|
933
862
|
vllm_args:
|
|
934
863
|
--tensor-parallel-size: 2
|
|
935
864
|
--max-model-len: 32768
|
|
936
|
-
--max-num-seqs: 256
|
|
937
865
|
InternVL2_5-38B:
|
|
938
866
|
model_family: InternVL2_5
|
|
939
867
|
model_variant: 38B
|
|
@@ -946,7 +874,6 @@ models:
|
|
|
946
874
|
vllm_args:
|
|
947
875
|
--tensor-parallel-size: 4
|
|
948
876
|
--max-model-len: 32768
|
|
949
|
-
--max-num-seqs: 256
|
|
950
877
|
Aya-Expanse-32B:
|
|
951
878
|
model_family: Aya-Expanse
|
|
952
879
|
model_variant: 32B
|
|
@@ -959,7 +886,6 @@ models:
|
|
|
959
886
|
vllm_args:
|
|
960
887
|
--tensor-parallel-size: 2
|
|
961
888
|
--max-model-len: 8192
|
|
962
|
-
--max-num-seqs: 256
|
|
963
889
|
DeepSeek-R1-Distill-Llama-70B:
|
|
964
890
|
model_family: DeepSeek-R1
|
|
965
891
|
model_variant: Distill-Llama-70B
|
|
@@ -972,7 +898,6 @@ models:
|
|
|
972
898
|
vllm_args:
|
|
973
899
|
--tensor-parallel-size: 4
|
|
974
900
|
--max-model-len: 65536
|
|
975
|
-
--max-num-seqs: 256
|
|
976
901
|
DeepSeek-R1-Distill-Llama-8B:
|
|
977
902
|
model_family: DeepSeek-R1
|
|
978
903
|
model_variant: Distill-Llama-8B
|
|
@@ -984,7 +909,6 @@ models:
|
|
|
984
909
|
resource_type: l40s
|
|
985
910
|
vllm_args:
|
|
986
911
|
--max-model-len: 131072
|
|
987
|
-
--max-num-seqs: 256
|
|
988
912
|
DeepSeek-R1-Distill-Qwen-32B:
|
|
989
913
|
model_family: DeepSeek-R1
|
|
990
914
|
model_variant: Distill-Qwen-32B
|
|
@@ -997,7 +921,6 @@ models:
|
|
|
997
921
|
vllm_args:
|
|
998
922
|
--tensor-parallel-size: 2
|
|
999
923
|
--max-model-len: 65536
|
|
1000
|
-
--max-num-seqs: 256
|
|
1001
924
|
DeepSeek-R1-Distill-Qwen-14B:
|
|
1002
925
|
model_family: DeepSeek-R1
|
|
1003
926
|
model_variant: Distill-Qwen-14B
|
|
@@ -1009,7 +932,6 @@ models:
|
|
|
1009
932
|
resource_type: l40s
|
|
1010
933
|
vllm_args:
|
|
1011
934
|
--max-model-len: 65536
|
|
1012
|
-
--max-num-seqs: 256
|
|
1013
935
|
DeepSeek-R1-Distill-Qwen-7B:
|
|
1014
936
|
model_family: DeepSeek-R1
|
|
1015
937
|
model_variant: Distill-Qwen-7B
|
|
@@ -1021,7 +943,6 @@ models:
|
|
|
1021
943
|
resource_type: l40s
|
|
1022
944
|
vllm_args:
|
|
1023
945
|
--max-model-len: 131072
|
|
1024
|
-
--max-num-seqs: 256
|
|
1025
946
|
DeepSeek-R1-Distill-Qwen-1.5B:
|
|
1026
947
|
model_family: DeepSeek-R1
|
|
1027
948
|
model_variant: Distill-Qwen-1.5B
|
|
@@ -1033,7 +954,6 @@ models:
|
|
|
1033
954
|
resource_type: l40s
|
|
1034
955
|
vllm_args:
|
|
1035
956
|
--max-model-len: 131072
|
|
1036
|
-
--max-num-seqs: 256
|
|
1037
957
|
Phi-3.5-vision-instruct:
|
|
1038
958
|
model_family: Phi-3.5-vision
|
|
1039
959
|
model_variant: instruct
|
|
@@ -1046,7 +966,6 @@ models:
|
|
|
1046
966
|
vllm_args:
|
|
1047
967
|
--tensor-parallel-size: 2
|
|
1048
968
|
--max-model-len: 65536
|
|
1049
|
-
--max-num-seqs: 256
|
|
1050
969
|
InternVL2_5-8B:
|
|
1051
970
|
model_family: InternVL2_5
|
|
1052
971
|
model_variant: 8B
|
|
@@ -1058,7 +977,6 @@ models:
|
|
|
1058
977
|
resource_type: l40s
|
|
1059
978
|
vllm_args:
|
|
1060
979
|
--max-model-len: 32768
|
|
1061
|
-
--max-num-seqs: 256
|
|
1062
980
|
glm-4v-9b:
|
|
1063
981
|
model_family: glm-4v
|
|
1064
982
|
model_variant: 9b
|
|
@@ -1070,7 +988,6 @@ models:
|
|
|
1070
988
|
resource_type: l40s
|
|
1071
989
|
vllm_args:
|
|
1072
990
|
--max-model-len: 8192
|
|
1073
|
-
--max-num-seqs: 256
|
|
1074
991
|
Molmo-7B-D-0924:
|
|
1075
992
|
model_family: Molmo
|
|
1076
993
|
model_variant: 7B-D-0924
|
|
@@ -1082,7 +999,6 @@ models:
|
|
|
1082
999
|
resource_type: l40s
|
|
1083
1000
|
vllm_args:
|
|
1084
1001
|
--max-model-len: 4096
|
|
1085
|
-
--max-num-seqs: 256
|
|
1086
1002
|
deepseek-vl2:
|
|
1087
1003
|
model_family: deepseek-vl2
|
|
1088
1004
|
model_type: VLM
|
|
@@ -1094,7 +1010,6 @@ models:
|
|
|
1094
1010
|
vllm_args:
|
|
1095
1011
|
--tensor-parallel-size: 2
|
|
1096
1012
|
--max-model-len: 4096
|
|
1097
|
-
--max-num-seqs: 256
|
|
1098
1013
|
deepseek-vl2-small:
|
|
1099
1014
|
model_family: deepseek-vl2
|
|
1100
1015
|
model_variant: small
|
|
@@ -1106,7 +1021,17 @@ models:
|
|
|
1106
1021
|
resource_type: l40s
|
|
1107
1022
|
vllm_args:
|
|
1108
1023
|
--max-model-len: 4096
|
|
1109
|
-
|
|
1024
|
+
Qwen3-8B:
|
|
1025
|
+
model_family: Qwen3
|
|
1026
|
+
model_variant: 8B
|
|
1027
|
+
model_type: LLM
|
|
1028
|
+
gpus_per_node: 1
|
|
1029
|
+
num_nodes: 1
|
|
1030
|
+
vocab_size: 151936
|
|
1031
|
+
time: 08:00:00
|
|
1032
|
+
resource_type: l40s
|
|
1033
|
+
vllm_args:
|
|
1034
|
+
--max-model-len: 40960
|
|
1110
1035
|
Qwen3-14B:
|
|
1111
1036
|
model_family: Qwen3
|
|
1112
1037
|
model_variant: 14B
|
|
@@ -1118,4 +1043,28 @@ models:
|
|
|
1118
1043
|
resource_type: l40s
|
|
1119
1044
|
vllm_args:
|
|
1120
1045
|
--max-model-len: 40960
|
|
1121
|
-
|
|
1046
|
+
Qwen3-32B:
|
|
1047
|
+
model_family: Qwen3
|
|
1048
|
+
model_variant: 32B
|
|
1049
|
+
model_type: LLM
|
|
1050
|
+
gpus_per_node: 2
|
|
1051
|
+
num_nodes: 1
|
|
1052
|
+
vocab_size: 151936
|
|
1053
|
+
time: 08:00:00
|
|
1054
|
+
resource_type: l40s
|
|
1055
|
+
vllm_args:
|
|
1056
|
+
--tensor-parallel-size: 2
|
|
1057
|
+
--max-model-len: 40960
|
|
1058
|
+
gpt-oss-120b:
|
|
1059
|
+
model_family: gpt-oss
|
|
1060
|
+
model_variant: 120b
|
|
1061
|
+
model_type: LLM
|
|
1062
|
+
gpus_per_node: 4
|
|
1063
|
+
num_nodes: 2
|
|
1064
|
+
vocab_size: 201088
|
|
1065
|
+
time: 08:00:00
|
|
1066
|
+
resource_type: l40s
|
|
1067
|
+
vllm_args:
|
|
1068
|
+
--tensor-parallel-size: 4
|
|
1069
|
+
--pipeline-parallel-size: 2
|
|
1070
|
+
--max-model-len: 40960
|