vec-inf 0.6.0__py3-none-any.whl → 0.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,20 +1,4 @@
1
1
  models:
2
- c4ai-command-r-plus:
3
- model_family: c4ai-command-r
4
- model_variant: plus
5
- model_type: LLM
6
- gpus_per_node: 4
7
- num_nodes: 2
8
- vocab_size: 256000
9
- qos: m2
10
- time: 08:00:00
11
- partition: a40
12
- vllm_args:
13
- --pipeline-parallel-size: 2
14
- --tensor-parallel-size: 4
15
- --max-model-len: 8192
16
- --max-num-seqs: 256
17
- --compilation-config: 3
18
2
  c4ai-command-r-plus-08-2024:
19
3
  model_family: c4ai-command-r
20
4
  model_variant: plus-08-2024
@@ -22,15 +6,13 @@ models:
22
6
  gpus_per_node: 4
23
7
  num_nodes: 2
24
8
  vocab_size: 256000
25
- qos: m2
26
9
  time: 08:00:00
27
- partition: a40
10
+ resource_type: l40s
28
11
  vllm_args:
29
12
  --pipeline-parallel-size: 2
30
13
  --tensor-parallel-size: 4
31
14
  --max-model-len: 65536
32
15
  --max-num-seqs: 256
33
- --compilation-config: 3
34
16
  c4ai-command-r-08-2024:
35
17
  model_family: c4ai-command-r
36
18
  model_variant: 08-2024
@@ -38,14 +20,12 @@ models:
38
20
  gpus_per_node: 2
39
21
  num_nodes: 1
40
22
  vocab_size: 256000
41
- qos: m2
42
23
  time: 08:00:00
43
- partition: a40
24
+ resource_type: l40s
44
25
  vllm_args:
45
26
  --tensor-parallel-size: 2
46
27
  --max-model-len: 32768
47
28
  --max-num-seqs: 256
48
- --compilation-config: 3
49
29
  CodeLlama-7b-hf:
50
30
  model_family: CodeLlama
51
31
  model_variant: 7b-hf
@@ -53,13 +33,11 @@ models:
53
33
  gpus_per_node: 1
54
34
  num_nodes: 1
55
35
  vocab_size: 32000
56
- qos: m2
57
36
  time: 08:00:00
58
- partition: a40
37
+ resource_type: l40s
59
38
  vllm_args:
60
39
  --max-model-len: 16384
61
40
  --max-num-seqs: 256
62
- --compilation-config: 3
63
41
  CodeLlama-7b-Instruct-hf:
64
42
  model_family: CodeLlama
65
43
  model_variant: 7b-Instruct-hf
@@ -67,13 +45,11 @@ models:
67
45
  gpus_per_node: 1
68
46
  num_nodes: 1
69
47
  vocab_size: 32000
70
- qos: m2
71
48
  time: 08:00:00
72
- partition: a40
49
+ resource_type: l40s
73
50
  vllm_args:
74
51
  --max-model-len: 16384
75
52
  --max-num-seqs: 256
76
- --compilation-config: 3
77
53
  CodeLlama-13b-hf:
78
54
  model_family: CodeLlama
79
55
  model_variant: 13b-hf
@@ -81,13 +57,11 @@ models:
81
57
  gpus_per_node: 1
82
58
  num_nodes: 1
83
59
  vocab_size: 32000
84
- qos: m2
85
60
  time: 08:00:00
86
- partition: a40
61
+ resource_type: l40s
87
62
  vllm_args:
88
63
  --max-model-len: 16384
89
64
  --max-num-seqs: 256
90
- --compilation-config: 3
91
65
  CodeLlama-13b-Instruct-hf:
92
66
  model_family: CodeLlama
93
67
  model_variant: 13b-Instruct-hf
@@ -95,13 +69,11 @@ models:
95
69
  gpus_per_node: 1
96
70
  num_nodes: 1
97
71
  vocab_size: 32000
98
- qos: m2
99
72
  time: 08:00:00
100
- partition: a40
73
+ resource_type: l40s
101
74
  vllm_args:
102
75
  --max-model-len: 16384
103
76
  --max-num-seqs: 256
104
- --compilation-config: 3
105
77
  CodeLlama-34b-hf:
106
78
  model_family: CodeLlama
107
79
  model_variant: 34b-hf
@@ -109,14 +81,12 @@ models:
109
81
  gpus_per_node: 2
110
82
  num_nodes: 1
111
83
  vocab_size: 32000
112
- qos: m2
113
84
  time: 08:00:00
114
- partition: a40
85
+ resource_type: l40s
115
86
  vllm_args:
116
87
  --tensor-parallel-size: 2
117
88
  --max-model-len: 16384
118
89
  --max-num-seqs: 256
119
- --compilation-config: 3
120
90
  CodeLlama-34b-Instruct-hf:
121
91
  model_family: CodeLlama
122
92
  model_variant: 34b-Instruct-hf
@@ -124,14 +94,12 @@ models:
124
94
  gpus_per_node: 2
125
95
  num_nodes: 1
126
96
  vocab_size: 32000
127
- qos: m2
128
97
  time: 08:00:00
129
- partition: a40
98
+ resource_type: l40s
130
99
  vllm_args:
131
100
  --tensor-parallel-size: 2
132
101
  --max-model-len: 16384
133
102
  --max-num-seqs: 256
134
- --compilation-config: 3
135
103
  CodeLlama-70b-hf:
136
104
  model_family: CodeLlama
137
105
  model_variant: 70b-hf
@@ -139,14 +107,12 @@ models:
139
107
  gpus_per_node: 4
140
108
  num_nodes: 1
141
109
  vocab_size: 32016
142
- qos: m2
143
110
  time: 08:00:00
144
- partition: a40
111
+ resource_type: l40s
145
112
  vllm_args:
146
113
  --tensor-parallel-size: 4
147
114
  --max-model-len: 4096
148
115
  --max-num-seqs: 256
149
- --compilation-config: 3
150
116
  CodeLlama-70b-Instruct-hf:
151
117
  model_family: CodeLlama
152
118
  model_variant: 70b-Instruct-hf
@@ -154,14 +120,12 @@ models:
154
120
  gpus_per_node: 4
155
121
  num_nodes: 1
156
122
  vocab_size: 32016
157
- qos: m2
158
123
  time: 08:00:00
159
- partition: a40
124
+ resource_type: l40s
160
125
  vllm_args:
161
126
  --tensor-parallel-size: 4
162
127
  --max-model-len: 4096
163
128
  --max-num-seqs: 256
164
- --compilation-config: 3
165
129
  gemma-2-9b:
166
130
  model_family: gemma-2
167
131
  model_variant: 9b
@@ -169,13 +133,11 @@ models:
169
133
  gpus_per_node: 1
170
134
  num_nodes: 1
171
135
  vocab_size: 256000
172
- qos: m2
173
136
  time: 08:00:00
174
- partition: a40
137
+ resource_type: l40s
175
138
  vllm_args:
176
139
  --max-model-len: 4096
177
140
  --max-num-seqs: 256
178
- --compilation-config: 3
179
141
  gemma-2-9b-it:
180
142
  model_family: gemma-2
181
143
  model_variant: 9b-it
@@ -183,13 +145,11 @@ models:
183
145
  gpus_per_node: 1
184
146
  num_nodes: 1
185
147
  vocab_size: 256000
186
- qos: m2
187
148
  time: 08:00:00
188
- partition: a40
149
+ resource_type: l40s
189
150
  vllm_args:
190
151
  --max-model-len: 4096
191
152
  --max-num-seqs: 256
192
- --compilation-config: 3
193
153
  gemma-2-27b:
194
154
  model_family: gemma-2
195
155
  model_variant: 27b
@@ -197,14 +157,12 @@ models:
197
157
  gpus_per_node: 2
198
158
  num_nodes: 1
199
159
  vocab_size: 256000
200
- qos: m2
201
160
  time: 08:00:00
202
- partition: a40
161
+ resource_type: l40s
203
162
  vllm_args:
204
163
  --tensor-parallel-size: 2
205
164
  --max-model-len: 4096
206
165
  --max-num-seqs: 256
207
- --compilation-config: 3
208
166
  gemma-2-27b-it:
209
167
  model_family: gemma-2
210
168
  model_variant: 27b-it
@@ -212,14 +170,12 @@ models:
212
170
  gpus_per_node: 2
213
171
  num_nodes: 1
214
172
  vocab_size: 256000
215
- qos: m2
216
173
  time: 08:00:00
217
- partition: a40
174
+ resource_type: l40s
218
175
  vllm_args:
219
176
  --tensor-parallel-size: 2
220
177
  --max-model-len: 4096
221
178
  --max-num-seqs: 256
222
- --compilation-config: 3
223
179
  Llama-2-7b-hf:
224
180
  model_family: Llama-2
225
181
  model_variant: 7b-hf
@@ -227,13 +183,11 @@ models:
227
183
  gpus_per_node: 1
228
184
  num_nodes: 1
229
185
  vocab_size: 32000
230
- qos: m2
231
186
  time: 08:00:00
232
- partition: a40
187
+ resource_type: l40s
233
188
  vllm_args:
234
189
  --max-model-len: 4096
235
190
  --max-num-seqs: 256
236
- --compilation-config: 3
237
191
  Llama-2-7b-chat-hf:
238
192
  model_family: Llama-2
239
193
  model_variant: 7b-chat-hf
@@ -241,13 +195,11 @@ models:
241
195
  gpus_per_node: 1
242
196
  num_nodes: 1
243
197
  vocab_size: 32000
244
- qos: m2
245
198
  time: 08:00:00
246
- partition: a40
199
+ resource_type: l40s
247
200
  vllm_args:
248
201
  --max-model-len: 4096
249
202
  --max-num-seqs: 256
250
- --compilation-config: 3
251
203
  Llama-2-13b-hf:
252
204
  model_family: Llama-2
253
205
  model_variant: 13b-hf
@@ -255,13 +207,11 @@ models:
255
207
  gpus_per_node: 1
256
208
  num_nodes: 1
257
209
  vocab_size: 32000
258
- qos: m2
259
210
  time: 08:00:00
260
- partition: a40
211
+ resource_type: l40s
261
212
  vllm_args:
262
213
  --max-model-len: 4096
263
214
  --max-num-seqs: 256
264
- --compilation-config: 3
265
215
  Llama-2-13b-chat-hf:
266
216
  model_family: Llama-2
267
217
  model_variant: 13b-chat-hf
@@ -269,13 +219,11 @@ models:
269
219
  gpus_per_node: 1
270
220
  num_nodes: 1
271
221
  vocab_size: 32000
272
- qos: m2
273
222
  time: 08:00:00
274
- partition: a40
223
+ resource_type: l40s
275
224
  vllm_args:
276
225
  --max-model-len: 4096
277
226
  --max-num-seqs: 256
278
- --compilation-config: 3
279
227
  Llama-2-70b-hf:
280
228
  model_family: Llama-2
281
229
  model_variant: 70b-hf
@@ -283,14 +231,12 @@ models:
283
231
  gpus_per_node: 4
284
232
  num_nodes: 1
285
233
  vocab_size: 32000
286
- qos: m2
287
234
  time: 08:00:00
288
- partition: a40
235
+ resource_type: l40s
289
236
  vllm_args:
290
237
  --tensor-parallel-size: 4
291
238
  --max-model-len: 4096
292
239
  --max-num-seqs: 256
293
- --compilation-config: 3
294
240
  Llama-2-70b-chat-hf:
295
241
  model_family: Llama-2
296
242
  model_variant: 70b-chat-hf
@@ -298,14 +244,12 @@ models:
298
244
  gpus_per_node: 4
299
245
  num_nodes: 1
300
246
  vocab_size: 32000
301
- qos: m2
302
247
  time: 08:00:00
303
- partition: a40
248
+ resource_type: l40s
304
249
  vllm_args:
305
250
  --tensor-parallel-size: 4
306
251
  --max-model-len: 4096
307
252
  --max-num-seqs: 256
308
- --compilation-config: 3
309
253
  llava-1.5-7b-hf:
310
254
  model_family: llava-1.5
311
255
  model_variant: 7b-hf
@@ -313,13 +257,11 @@ models:
313
257
  gpus_per_node: 1
314
258
  num_nodes: 1
315
259
  vocab_size: 32000
316
- qos: m2
317
260
  time: 08:00:00
318
- partition: a40
261
+ resource_type: l40s
319
262
  vllm_args:
320
263
  --max-model-len: 4096
321
264
  --max-num-seqs: 256
322
- --compilation-config: 3
323
265
  llava-1.5-13b-hf:
324
266
  model_family: llava-1.5
325
267
  model_variant: 13b-hf
@@ -327,13 +269,11 @@ models:
327
269
  gpus_per_node: 1
328
270
  num_nodes: 1
329
271
  vocab_size: 32000
330
- qos: m2
331
272
  time: 08:00:00
332
- partition: a40
273
+ resource_type: l40s
333
274
  vllm_args:
334
275
  --max-model-len: 4096
335
276
  --max-num-seqs: 256
336
- --compilation-config: 3
337
277
  llava-v1.6-mistral-7b-hf:
338
278
  model_family: llava-v1.6
339
279
  model_variant: mistral-7b-hf
@@ -341,13 +281,11 @@ models:
341
281
  gpus_per_node: 1
342
282
  num_nodes: 1
343
283
  vocab_size: 32064
344
- qos: m2
345
284
  time: 08:00:00
346
- partition: a40
285
+ resource_type: l40s
347
286
  vllm_args:
348
287
  --max-model-len: 32768
349
288
  --max-num-seqs: 256
350
- --compilation-config: 3
351
289
  llava-v1.6-34b-hf:
352
290
  model_family: llava-v1.6
353
291
  model_variant: 34b-hf
@@ -355,14 +293,12 @@ models:
355
293
  gpus_per_node: 2
356
294
  num_nodes: 1
357
295
  vocab_size: 64064
358
- qos: m2
359
296
  time: 08:00:00
360
- partition: a40
297
+ resource_type: l40s
361
298
  vllm_args:
362
299
  --tensor-parallel-size: 2
363
300
  --max-model-len: 4096
364
301
  --max-num-seqs: 256
365
- --compilation-config: 3
366
302
  Meta-Llama-3-8B:
367
303
  model_family: Meta-Llama-3
368
304
  model_variant: 8B
@@ -370,13 +306,11 @@ models:
370
306
  gpus_per_node: 1
371
307
  num_nodes: 1
372
308
  vocab_size: 128256
373
- qos: m2
374
309
  time: 08:00:00
375
- partition: a40
310
+ resource_type: l40s
376
311
  vllm_args:
377
312
  --max-model-len: 8192
378
313
  --max-num-seqs: 256
379
- --compilation-config: 3
380
314
  Meta-Llama-3-8B-Instruct:
381
315
  model_family: Meta-Llama-3
382
316
  model_variant: 8B-Instruct
@@ -384,13 +318,11 @@ models:
384
318
  gpus_per_node: 1
385
319
  num_nodes: 1
386
320
  vocab_size: 128256
387
- qos: m2
388
321
  time: 08:00:00
389
- partition: a40
322
+ resource_type: l40s
390
323
  vllm_args:
391
324
  --max-model-len: 8192
392
325
  --max-num-seqs: 256
393
- --compilation-config: 3
394
326
  Meta-Llama-3-70B:
395
327
  model_family: Meta-Llama-3
396
328
  model_variant: 70B
@@ -398,14 +330,12 @@ models:
398
330
  gpus_per_node: 4
399
331
  num_nodes: 1
400
332
  vocab_size: 128256
401
- qos: m2
402
333
  time: 08:00:00
403
- partition: a40
334
+ resource_type: l40s
404
335
  vllm_args:
405
336
  --tensor-parallel-size: 4
406
337
  --max-model-len: 8192
407
338
  --max-num-seqs: 256
408
- --compilation-config: 3
409
339
  Meta-Llama-3-70B-Instruct:
410
340
  model_family: Meta-Llama-3
411
341
  model_variant: 70B-Instruct
@@ -413,14 +343,12 @@ models:
413
343
  gpus_per_node: 4
414
344
  num_nodes: 1
415
345
  vocab_size: 128256
416
- qos: m2
417
346
  time: 08:00:00
418
- partition: a40
347
+ resource_type: l40s
419
348
  vllm_args:
420
349
  --tensor-parallel-size: 4
421
350
  --max-model-len: 8192
422
351
  --max-num-seqs: 256
423
- --compilation-config: 3
424
352
  Meta-Llama-3.1-8B:
425
353
  model_family: Meta-Llama-3.1
426
354
  model_variant: 8B
@@ -428,13 +356,11 @@ models:
428
356
  gpus_per_node: 1
429
357
  num_nodes: 1
430
358
  vocab_size: 128256
431
- qos: m2
432
359
  time: 08:00:00
433
- partition: a40
360
+ resource_type: l40s
434
361
  vllm_args:
435
362
  --max-model-len: 131072
436
363
  --max-num-seqs: 256
437
- --compilation-config: 3
438
364
  Meta-Llama-3.1-8B-Instruct:
439
365
  model_family: Meta-Llama-3.1
440
366
  model_variant: 8B-Instruct
@@ -442,13 +368,11 @@ models:
442
368
  gpus_per_node: 1
443
369
  num_nodes: 1
444
370
  vocab_size: 128256
445
- qos: m2
446
371
  time: 08:00:00
447
- partition: a40
372
+ resource_type: l40s
448
373
  vllm_args:
449
374
  --max-model-len: 131072
450
375
  --max-num-seqs: 256
451
- --compilation-config: 3
452
376
  Meta-Llama-3.1-70B:
453
377
  model_family: Meta-Llama-3.1
454
378
  model_variant: 70B
@@ -456,14 +380,12 @@ models:
456
380
  gpus_per_node: 4
457
381
  num_nodes: 1
458
382
  vocab_size: 128256
459
- qos: m2
460
383
  time: 08:00:00
461
- partition: a40
384
+ resource_type: l40s
462
385
  vllm_args:
463
386
  --tensor-parallel-size: 4
464
387
  --max-model-len: 65536
465
388
  --max-num-seqs: 256
466
- --compilation-config: 3
467
389
  Meta-Llama-3.1-70B-Instruct:
468
390
  model_family: Meta-Llama-3.1
469
391
  model_variant: 70B-Instruct
@@ -471,14 +393,12 @@ models:
471
393
  gpus_per_node: 4
472
394
  num_nodes: 1
473
395
  vocab_size: 128256
474
- qos: m2
475
396
  time: 08:00:00
476
- partition: a40
397
+ resource_type: l40s
477
398
  vllm_args:
478
399
  --tensor-parallel-size: 4
479
400
  --max-model-len: 65536
480
401
  --max-num-seqs: 256
481
- --compilation-config: 3
482
402
  Meta-Llama-3.1-405B-Instruct:
483
403
  model_family: Meta-Llama-3.1
484
404
  model_variant: 405B-Instruct
@@ -488,13 +408,12 @@ models:
488
408
  vocab_size: 128256
489
409
  qos: m4
490
410
  time: 02:00:00
491
- partition: a40
411
+ resource_type: l40s
492
412
  vllm_args:
493
413
  --pipeline-parallel-size: 8
494
414
  --tensor-parallel-size: 4
495
415
  --max-model-len: 16384
496
416
  --max-num-seqs: 256
497
- --compilation-config: 3
498
417
  Mistral-7B-Instruct-v0.1:
499
418
  model_family: Mistral
500
419
  model_variant: 7B-Instruct-v0.1
@@ -502,13 +421,11 @@ models:
502
421
  gpus_per_node: 1
503
422
  num_nodes: 1
504
423
  vocab_size: 32000
505
- qos: m2
506
424
  time: 08:00:00
507
- partition: a40
425
+ resource_type: l40s
508
426
  vllm_args:
509
427
  --max-model-len: 32768
510
428
  --max-num-seqs: 256
511
- --compilation-config: 3
512
429
  Mistral-7B-Instruct-v0.2:
513
430
  model_family: Mistral
514
431
  model_variant: 7B-Instruct-v0.2
@@ -516,13 +433,11 @@ models:
516
433
  gpus_per_node: 1
517
434
  num_nodes: 1
518
435
  vocab_size: 32000
519
- qos: m2
520
436
  time: 08:00:00
521
- partition: a40
437
+ resource_type: l40s
522
438
  vllm_args:
523
439
  --max-model-len: 32768
524
440
  --max-num-seqs: 256
525
- --compilation-config: 3
526
441
  Mistral-7B-v0.3:
527
442
  model_family: Mistral
528
443
  model_variant: 7B-v0.3
@@ -530,13 +445,11 @@ models:
530
445
  gpus_per_node: 1
531
446
  num_nodes: 1
532
447
  vocab_size: 32768
533
- qos: m2
534
448
  time: 08:00:00
535
- partition: a40
449
+ resource_type: l40s
536
450
  vllm_args:
537
451
  --max-model-len: 32768
538
452
  --max-num-seqs: 256
539
- --compilation-config: 3
540
453
  Mistral-7B-Instruct-v0.3:
541
454
  model_family: Mistral
542
455
  model_variant: 7B-Instruct-v0.3
@@ -544,13 +457,11 @@ models:
544
457
  gpus_per_node: 1
545
458
  num_nodes: 1
546
459
  vocab_size: 32768
547
- qos: m2
548
460
  time: 08:00:00
549
- partition: a40
461
+ resource_type: l40s
550
462
  vllm_args:
551
463
  --max-model-len: 32768
552
464
  --max-num-seqs: 256
553
- --compilation-config: 3
554
465
  Mistral-Large-Instruct-2407:
555
466
  model_family: Mistral
556
467
  model_variant: Large-Instruct-2407
@@ -558,15 +469,13 @@ models:
558
469
  gpus_per_node: 4
559
470
  num_nodes: 2
560
471
  vocab_size: 32768
561
- qos: m2
562
472
  time: 08:00:00
563
- partition: a40
473
+ resource_type: l40s
564
474
  vllm_args:
565
475
  --pipeline-parallel-size: 2
566
476
  --tensor-parallel-size: 4
567
477
  --max-model-len: 32768
568
478
  --max-num-seqs: 256
569
- --compilation-config: 3
570
479
  Mistral-Large-Instruct-2411:
571
480
  model_family: Mistral
572
481
  model_variant: Large-Instruct-2411
@@ -574,15 +483,13 @@ models:
574
483
  gpus_per_node: 4
575
484
  num_nodes: 2
576
485
  vocab_size: 32768
577
- qos: m2
578
486
  time: 08:00:00
579
- partition: a40
487
+ resource_type: l40s
580
488
  vllm_args:
581
489
  --pipeline-parallel-size: 2
582
490
  --tensor-parallel-size: 4
583
491
  --max-model-len: 32768
584
492
  --max-num-seqs: 256
585
- --compilation-config: 3
586
493
  Mixtral-8x7B-Instruct-v0.1:
587
494
  model_family: Mixtral
588
495
  model_variant: 8x7B-Instruct-v0.1
@@ -590,14 +497,12 @@ models:
590
497
  gpus_per_node: 4
591
498
  num_nodes: 1
592
499
  vocab_size: 32000
593
- qos: m2
594
500
  time: 08:00:00
595
- partition: a40
501
+ resource_type: l40s
596
502
  vllm_args:
597
503
  --tensor-parallel-size: 4
598
504
  --max-model-len: 32768
599
505
  --max-num-seqs: 256
600
- --compilation-config: 3
601
506
  Mixtral-8x22B-v0.1:
602
507
  model_family: Mixtral
603
508
  model_variant: 8x22B-v0.1
@@ -605,15 +510,13 @@ models:
605
510
  gpus_per_node: 4
606
511
  num_nodes: 2
607
512
  vocab_size: 32768
608
- qos: m2
609
513
  time: 08:00:00
610
- partition: a40
514
+ resource_type: l40s
611
515
  vllm_args:
612
516
  --pipeline-parallel-size: 2
613
517
  --tensor-parallel-size: 4
614
518
  --max-model-len: 65536
615
519
  --max-num-seqs: 256
616
- --compilation-config: 3
617
520
  Mixtral-8x22B-Instruct-v0.1:
618
521
  model_family: Mixtral
619
522
  model_variant: 8x22B-Instruct-v0.1
@@ -621,15 +524,13 @@ models:
621
524
  gpus_per_node: 4
622
525
  num_nodes: 2
623
526
  vocab_size: 32768
624
- qos: m2
625
527
  time: 08:00:00
626
- partition: a40
528
+ resource_type: l40s
627
529
  vllm_args:
628
530
  --pipeline-parallel-size: 2
629
531
  --tensor-parallel-size: 4
630
532
  --max-model-len: 65536
631
533
  --max-num-seqs: 256
632
- --compilation-config: 3
633
534
  Phi-3-medium-128k-instruct:
634
535
  model_family: Phi-3
635
536
  model_variant: medium-128k-instruct
@@ -637,14 +538,12 @@ models:
637
538
  gpus_per_node: 2
638
539
  num_nodes: 1
639
540
  vocab_size: 32064
640
- qos: m2
641
541
  time: 08:00:00
642
- partition: a40
542
+ resource_type: l40s
643
543
  vllm_args:
644
544
  --tensor-parallel-size: 2
645
545
  --max-model-len: 131072
646
546
  --max-num-seqs: 256
647
- --compilation-config: 3
648
547
  Phi-3-vision-128k-instruct:
649
548
  model_family: Phi-3-vision
650
549
  model_variant: 128k-instruct
@@ -652,14 +551,12 @@ models:
652
551
  gpus_per_node: 2
653
552
  num_nodes: 1
654
553
  vocab_size: 32064
655
- qos: m2
656
554
  time: 08:00:00
657
- partition: a40
555
+ resource_type: l40s
658
556
  vllm_args:
659
557
  --tensor-parallel-size: 2
660
558
  --max-model-len: 65536
661
559
  --max-num-seqs: 256
662
- --compilation-config: 3
663
560
  Llama3-OpenBioLLM-70B:
664
561
  model_family: Llama3-OpenBioLLM
665
562
  model_variant: 70B
@@ -667,14 +564,12 @@ models:
667
564
  gpus_per_node: 4
668
565
  num_nodes: 1
669
566
  vocab_size: 128256
670
- qos: m2
671
567
  time: 08:00:00
672
- partition: a40
568
+ resource_type: l40s
673
569
  vllm_args:
674
570
  --tensor-parallel-size: 4
675
571
  --max-model-len: 8192
676
572
  --max-num-seqs: 256
677
- --compilation-config: 3
678
573
  Llama-3.1-Nemotron-70B-Instruct-HF:
679
574
  model_family: Llama-3.1-Nemotron
680
575
  model_variant: 70B-Instruct-HF
@@ -682,14 +577,12 @@ models:
682
577
  gpus_per_node: 4
683
578
  num_nodes: 1
684
579
  vocab_size: 128256
685
- qos: m2
686
580
  time: 08:00:00
687
- partition: a40
581
+ resource_type: l40s
688
582
  vllm_args:
689
583
  --tensor-parallel-size: 4
690
584
  --max-model-len: 65536
691
585
  --max-num-seqs: 256
692
- --compilation-config: 3
693
586
  Llama-3.2-1B:
694
587
  model_family: Llama-3.2
695
588
  model_variant: 1B
@@ -697,13 +590,11 @@ models:
697
590
  gpus_per_node: 1
698
591
  num_nodes: 1
699
592
  vocab_size: 128256
700
- qos: m2
701
593
  time: 08:00:00
702
- partition: a40
594
+ resource_type: l40s
703
595
  vllm_args:
704
596
  --max-model-len: 131072
705
597
  --max-num-seqs: 256
706
- --compilation-config: 3
707
598
  Llama-3.2-1B-Instruct:
708
599
  model_family: Llama-3.2
709
600
  model_variant: 1B-Instruct
@@ -711,13 +602,11 @@ models:
711
602
  gpus_per_node: 1
712
603
  num_nodes: 1
713
604
  vocab_size: 128256
714
- qos: m2
715
605
  time: 08:00:00
716
- partition: a40
606
+ resource_type: l40s
717
607
  vllm_args:
718
608
  --max-model-len: 131072
719
609
  --max-num-seqs: 256
720
- --compilation-config: 3
721
610
  Llama-3.2-3B:
722
611
  model_family: Llama-3.2
723
612
  model_variant: 3B
@@ -725,13 +614,11 @@ models:
725
614
  gpus_per_node: 1
726
615
  num_nodes: 1
727
616
  vocab_size: 128256
728
- qos: m2
729
617
  time: 08:00:00
730
- partition: a40
618
+ resource_type: l40s
731
619
  vllm_args:
732
620
  --max-model-len: 131072
733
621
  --max-num-seqs: 256
734
- --compilation-config: 3
735
622
  Llama-3.2-3B-Instruct:
736
623
  model_family: Llama-3.2
737
624
  model_variant: 3B-Instruct
@@ -739,13 +626,11 @@ models:
739
626
  gpus_per_node: 1
740
627
  num_nodes: 1
741
628
  vocab_size: 128256
742
- qos: m2
743
629
  time: 08:00:00
744
- partition: a40
630
+ resource_type: l40s
745
631
  vllm_args:
746
632
  --max-model-len: 131072
747
633
  --max-num-seqs: 256
748
- --compilation-config: 3
749
634
  Llama-3.2-11B-Vision:
750
635
  model_family: Llama-3.2
751
636
  model_variant: 11B-Vision
@@ -753,14 +638,12 @@ models:
753
638
  gpus_per_node: 2
754
639
  num_nodes: 1
755
640
  vocab_size: 128256
756
- qos: m2
757
641
  time: 08:00:00
758
- partition: a40
642
+ resource_type: l40s
759
643
  vllm_args:
760
644
  --tensor-parallel-size: 2
761
645
  --max-model-len: 4096
762
646
  --max-num-seqs: 64
763
- --compilation-config: 3
764
647
  --enforce-eager: true
765
648
  Llama-3.2-11B-Vision-Instruct:
766
649
  model_family: Llama-3.2
@@ -769,14 +652,12 @@ models:
769
652
  gpus_per_node: 2
770
653
  num_nodes: 1
771
654
  vocab_size: 128256
772
- qos: m2
773
655
  time: 08:00:00
774
- partition: a40
656
+ resource_type: l40s
775
657
  vllm_args:
776
658
  --tensor-parallel-size: 2
777
659
  --max-model-len: 4096
778
660
  --max-num-seqs: 64
779
- --compilation-config: 3
780
661
  --enforce-eager: true
781
662
  Llama-3.2-90B-Vision:
782
663
  model_family: Llama-3.2
@@ -785,14 +666,12 @@ models:
785
666
  gpus_per_node: 4
786
667
  num_nodes: 2
787
668
  vocab_size: 128256
788
- qos: m2
789
669
  time: 08:00:00
790
- partition: a40
670
+ resource_type: l40s
791
671
  vllm_args:
792
672
  --tensor-parallel-size: 8
793
673
  --max-model-len: 4096
794
674
  --max-num-seqs: 32
795
- --compilation-config: 3
796
675
  --enforce-eager: true
797
676
  Llama-3.2-90B-Vision-Instruct:
798
677
  model_family: Llama-3.2
@@ -801,14 +680,12 @@ models:
801
680
  gpus_per_node: 4
802
681
  num_nodes: 2
803
682
  vocab_size: 128256
804
- qos: m2
805
683
  time: 08:00:00
806
- partition: a40
684
+ resource_type: l40s
807
685
  vllm_args:
808
686
  --tensor-parallel-size: 8
809
687
  --max-model-len: 4096
810
688
  --max-num-seqs: 32
811
- --compilation-config: 3
812
689
  --enforce-eager: true
813
690
  Qwen2.5-0.5B-Instruct:
814
691
  model_family: Qwen2.5
@@ -817,13 +694,11 @@ models:
817
694
  gpus_per_node: 1
818
695
  num_nodes: 1
819
696
  vocab_size: 152064
820
- qos: m2
821
697
  time: 08:00:00
822
- partition: a40
698
+ resource_type: l40s
823
699
  vllm_args:
824
700
  --max-model-len: 32768
825
701
  --max-num-seqs: 256
826
- --compilation-config: 3
827
702
  Qwen2.5-1.5B-Instruct:
828
703
  model_family: Qwen2.5
829
704
  model_variant: 1.5B-Instruct
@@ -831,13 +706,11 @@ models:
831
706
  gpus_per_node: 1
832
707
  num_nodes: 1
833
708
  vocab_size: 152064
834
- qos: m2
835
709
  time: 08:00:00
836
- partition: a40
710
+ resource_type: l40s
837
711
  vllm_args:
838
712
  --max-model-len: 32768
839
713
  --max-num-seqs: 256
840
- --compilation-config: 3
841
714
  Qwen2.5-3B-Instruct:
842
715
  model_family: Qwen2.5
843
716
  model_variant: 3B-Instruct
@@ -845,13 +718,11 @@ models:
845
718
  gpus_per_node: 1
846
719
  num_nodes: 1
847
720
  vocab_size: 152064
848
- qos: m2
849
721
  time: 08:00:00
850
- partition: a40
722
+ resource_type: l40s
851
723
  vllm_args:
852
724
  --max-model-len: 32768
853
725
  --max-num-seqs: 256
854
- --compilation-config: 3
855
726
  Qwen2.5-7B-Instruct:
856
727
  model_family: Qwen2.5
857
728
  model_variant: 7B-Instruct
@@ -859,13 +730,11 @@ models:
859
730
  gpus_per_node: 1
860
731
  num_nodes: 1
861
732
  vocab_size: 152064
862
- qos: m2
863
733
  time: 08:00:00
864
- partition: a40
734
+ resource_type: l40s
865
735
  vllm_args:
866
736
  --max-model-len: 32768
867
737
  --max-num-seqs: 256
868
- --compilation-config: 3
869
738
  Qwen2.5-14B-Instruct:
870
739
  model_family: Qwen2.5
871
740
  model_variant: 14B-Instruct
@@ -873,13 +742,11 @@ models:
873
742
  gpus_per_node: 1
874
743
  num_nodes: 1
875
744
  vocab_size: 152064
876
- qos: m2
877
745
  time: 08:00:00
878
- partition: a40
746
+ resource_type: l40s
879
747
  vllm_args:
880
748
  --max-model-len: 32768
881
749
  --max-num-seqs: 256
882
- --compilation-config: 3
883
750
  Qwen2.5-32B-Instruct:
884
751
  model_family: Qwen2.5
885
752
  model_variant: 32B-Instruct
@@ -887,14 +754,12 @@ models:
887
754
  gpus_per_node: 2
888
755
  num_nodes: 1
889
756
  vocab_size: 152064
890
- qos: m2
891
757
  time: 08:00:00
892
- partition: a40
758
+ resource_type: l40s
893
759
  vllm_args:
894
760
  --tensor-parallel-size: 2
895
761
  --max-model-len: 32768
896
762
  --max-num-seqs: 256
897
- --compilation-config: 3
898
763
  Qwen2.5-72B-Instruct:
899
764
  model_family: Qwen2.5
900
765
  model_variant: 72B-Instruct
@@ -902,14 +767,12 @@ models:
902
767
  gpus_per_node: 4
903
768
  num_nodes: 1
904
769
  vocab_size: 152064
905
- qos: m2
906
770
  time: 08:00:00
907
- partition: a40
771
+ resource_type: l40s
908
772
  vllm_args:
909
773
  --tensor-parallel-size: 4
910
774
  --max-model-len: 16384
911
775
  --max-num-seqs: 256
912
- --compilation-config: 3
913
776
  Qwen2.5-Math-1.5B-Instruct:
914
777
  model_family: Qwen2.5
915
778
  model_variant: Math-1.5B-Instruct
@@ -917,13 +780,11 @@ models:
917
780
  gpus_per_node: 1
918
781
  num_nodes: 1
919
782
  vocab_size: 152064
920
- qos: m2
921
783
  time: 08:00:00
922
- partition: a40
784
+ resource_type: l40s
923
785
  vllm_args:
924
786
  --max-model-len: 4096
925
787
  --max-num-seqs: 256
926
- --compilation-config: 3
927
788
  Qwen2.5-Math-7B-Instruct:
928
789
  model_family: Qwen2.5
929
790
  model_variant: Math-7B-Instruct
@@ -931,13 +792,11 @@ models:
931
792
  gpus_per_node: 1
932
793
  num_nodes: 1
933
794
  vocab_size: 152064
934
- qos: m2
935
795
  time: 08:00:00
936
- partition: a40
796
+ resource_type: l40s
937
797
  vllm_args:
938
798
  --max-model-len: 4096
939
799
  --max-num-seqs: 256
940
- --compilation-config: 3
941
800
  Qwen2.5-Math-72B-Instruct:
942
801
  model_family: Qwen2.5
943
802
  model_variant: Math-72B-Instruct
@@ -945,14 +804,12 @@ models:
945
804
  gpus_per_node: 4
946
805
  num_nodes: 1
947
806
  vocab_size: 152064
948
- qos: m2
949
807
  time: 08:00:00
950
- partition: a40
808
+ resource_type: l40s
951
809
  vllm_args:
952
810
  --tensor-parallel-size: 4
953
811
  --max-model-len: 4096
954
812
  --max-num-seqs: 256
955
- --compilation-config: 3
956
813
  Qwen2.5-Coder-7B-Instruct:
957
814
  model_family: Qwen2.5
958
815
  model_variant: Coder-7B-Instruct
@@ -960,13 +817,11 @@ models:
960
817
  gpus_per_node: 1
961
818
  num_nodes: 1
962
819
  vocab_size: 152064
963
- qos: m2
964
820
  time: 08:00:00
965
- partition: a40
821
+ resource_type: l40s
966
822
  vllm_args:
967
823
  --max-model-len: 32768
968
824
  --max-num-seqs: 256
969
- --compilation-config: 3
970
825
  Qwen2.5-Math-RM-72B:
971
826
  model_family: Qwen2.5
972
827
  model_variant: Math-RM-72B
@@ -974,14 +829,12 @@ models:
974
829
  gpus_per_node: 4
975
830
  num_nodes: 1
976
831
  vocab_size: 152064
977
- qos: m2
978
832
  time: 08:00:00
979
- partition: a40
833
+ resource_type: l40s
980
834
  vllm_args:
981
835
  --tensor-parallel-size: 4
982
836
  --max-model-len: 4096
983
837
  --max-num-seqs: 256
984
- --compilation-config: 3
985
838
  Qwen2.5-Math-PRM-7B:
986
839
  model_family: Qwen2.5
987
840
  model_variant: Math-PRM-7B
@@ -989,28 +842,24 @@ models:
989
842
  gpus_per_node: 1
990
843
  num_nodes: 1
991
844
  vocab_size: 152064
992
- qos: m2
993
845
  time: 08:00:00
994
- partition: a40
846
+ resource_type: l40s
995
847
  vllm_args:
996
848
  --max-model-len: 4096
997
849
  --max-num-seqs: 256
998
- --compilation-config: 3
999
- QwQ-32B-Preview:
850
+ QwQ-32B:
1000
851
  model_family: QwQ
1001
- model_variant: 32B-Preview
852
+ model_variant: 32B
1002
853
  model_type: LLM
1003
854
  gpus_per_node: 2
1004
855
  num_nodes: 1
1005
856
  vocab_size: 152064
1006
- qos: m2
1007
857
  time: 08:00:00
1008
- partition: a40
858
+ resource_type: l40s
1009
859
  vllm_args:
1010
860
  --tensor-parallel-size: 2
1011
861
  --max-model-len: 32768
1012
862
  --max-num-seqs: 256
1013
- --compilation-config: 3
1014
863
  Pixtral-12B-2409:
1015
864
  model_family: Pixtral
1016
865
  model_variant: 12B-2409
@@ -1018,13 +867,11 @@ models:
1018
867
  gpus_per_node: 1
1019
868
  num_nodes: 1
1020
869
  vocab_size: 131072
1021
- qos: m2
1022
870
  time: 08:00:00
1023
- partition: a40
871
+ resource_type: l40s
1024
872
  vllm_args:
1025
873
  --max-model-len: 8192
1026
874
  --max-num-seqs: 256
1027
- --compilation-config: 3
1028
875
  e5-mistral-7b-instruct:
1029
876
  model_family: e5
1030
877
  model_variant: mistral-7b-instruct
@@ -1032,13 +879,11 @@ models:
1032
879
  gpus_per_node: 1
1033
880
  num_nodes: 1
1034
881
  vocab_size: 32000
1035
- qos: m2
1036
882
  time: 08:00:00
1037
- partition: a40
883
+ resource_type: l40s
1038
884
  vllm_args:
1039
885
  --max-model-len: 4096
1040
886
  --max-num-seqs: 256
1041
- --compilation-config: 3
1042
887
  bge-base-en-v1.5:
1043
888
  model_family: bge
1044
889
  model_variant: base-en-v1.5
@@ -1046,13 +891,11 @@ models:
1046
891
  gpus_per_node: 1
1047
892
  num_nodes: 1
1048
893
  vocab_size: 30522
1049
- qos: m2
1050
894
  time: 08:00:00
1051
- partition: a40
895
+ resource_type: l40s
1052
896
  vllm_args:
1053
897
  --max-model-len: 512
1054
898
  --max-num-seqs: 256
1055
- --compilation-config: 3
1056
899
  all-MiniLM-L6-v2:
1057
900
  model_family: all-MiniLM
1058
901
  model_variant: L6-v2
@@ -1060,13 +903,11 @@ models:
1060
903
  gpus_per_node: 1
1061
904
  num_nodes: 1
1062
905
  vocab_size: 30522
1063
- qos: m2
1064
906
  time: 08:00:00
1065
- partition: a40
907
+ resource_type: l40s
1066
908
  vllm_args:
1067
909
  --max-model-len: 512
1068
910
  --max-num-seqs: 256
1069
- --compilation-config: 3
1070
911
  Llama-3.3-70B-Instruct:
1071
912
  model_family: Llama-3.3
1072
913
  model_variant: 70B-Instruct
@@ -1074,14 +915,12 @@ models:
1074
915
  gpus_per_node: 4
1075
916
  num_nodes: 1
1076
917
  vocab_size: 128256
1077
- qos: m2
1078
918
  time: 08:00:00
1079
- partition: a40
919
+ resource_type: l40s
1080
920
  vllm_args:
1081
921
  --tensor-parallel-size: 4
1082
922
  --max-model-len: 65536
1083
923
  --max-num-seqs: 256
1084
- --compilation-config: 3
1085
924
  InternVL2_5-26B:
1086
925
  model_family: InternVL2_5
1087
926
  model_variant: 26B
@@ -1089,14 +928,12 @@ models:
1089
928
  gpus_per_node: 2
1090
929
  num_nodes: 1
1091
930
  vocab_size: 92553
1092
- qos: m2
1093
931
  time: 08:00:00
1094
- partition: a40
932
+ resource_type: l40s
1095
933
  vllm_args:
1096
934
  --tensor-parallel-size: 2
1097
935
  --max-model-len: 32768
1098
936
  --max-num-seqs: 256
1099
- --compilation-config: 3
1100
937
  InternVL2_5-38B:
1101
938
  model_family: InternVL2_5
1102
939
  model_variant: 38B
@@ -1104,14 +941,12 @@ models:
1104
941
  gpus_per_node: 4
1105
942
  num_nodes: 1
1106
943
  vocab_size: 92553
1107
- qos: m2
1108
944
  time: 08:00:00
1109
- partition: a40
945
+ resource_type: l40s
1110
946
  vllm_args:
1111
947
  --tensor-parallel-size: 4
1112
948
  --max-model-len: 32768
1113
949
  --max-num-seqs: 256
1114
- --compilation-config: 3
1115
950
  Aya-Expanse-32B:
1116
951
  model_family: Aya-Expanse
1117
952
  model_variant: 32B
@@ -1119,14 +954,12 @@ models:
1119
954
  gpus_per_node: 2
1120
955
  num_nodes: 1
1121
956
  vocab_size: 256000
1122
- qos: m2
1123
957
  time: 08:00:00
1124
- partition: a40
958
+ resource_type: l40s
1125
959
  vllm_args:
1126
960
  --tensor-parallel-size: 2
1127
961
  --max-model-len: 8192
1128
962
  --max-num-seqs: 256
1129
- --compilation-config: 3
1130
963
  DeepSeek-R1-Distill-Llama-70B:
1131
964
  model_family: DeepSeek-R1
1132
965
  model_variant: Distill-Llama-70B
@@ -1134,14 +967,12 @@ models:
1134
967
  gpus_per_node: 4
1135
968
  num_nodes: 1
1136
969
  vocab_size: 128256
1137
- qos: m2
1138
970
  time: 08:00:00
1139
- partition: a40
971
+ resource_type: l40s
1140
972
  vllm_args:
1141
973
  --tensor-parallel-size: 4
1142
974
  --max-model-len: 65536
1143
975
  --max-num-seqs: 256
1144
- --compilation-config: 3
1145
976
  DeepSeek-R1-Distill-Llama-8B:
1146
977
  model_family: DeepSeek-R1
1147
978
  model_variant: Distill-Llama-8B
@@ -1149,13 +980,11 @@ models:
1149
980
  gpus_per_node: 1
1150
981
  num_nodes: 1
1151
982
  vocab_size: 128256
1152
- qos: m2
1153
983
  time: 08:00:00
1154
- partition: a40
984
+ resource_type: l40s
1155
985
  vllm_args:
1156
986
  --max-model-len: 131072
1157
987
  --max-num-seqs: 256
1158
- --compilation-config: 3
1159
988
  DeepSeek-R1-Distill-Qwen-32B:
1160
989
  model_family: DeepSeek-R1
1161
990
  model_variant: Distill-Qwen-32B
@@ -1163,14 +992,12 @@ models:
1163
992
  gpus_per_node: 2
1164
993
  num_nodes: 1
1165
994
  vocab_size: 152064
1166
- qos: m2
1167
995
  time: 08:00:00
1168
- partition: a40
996
+ resource_type: l40s
1169
997
  vllm_args:
1170
998
  --tensor-parallel-size: 2
1171
999
  --max-model-len: 65536
1172
1000
  --max-num-seqs: 256
1173
- --compilation-config: 3
1174
1001
  DeepSeek-R1-Distill-Qwen-14B:
1175
1002
  model_family: DeepSeek-R1
1176
1003
  model_variant: Distill-Qwen-14B
@@ -1178,13 +1005,11 @@ models:
1178
1005
  gpus_per_node: 1
1179
1006
  num_nodes: 1
1180
1007
  vocab_size: 152064
1181
- qos: m2
1182
1008
  time: 08:00:00
1183
- partition: a40
1009
+ resource_type: l40s
1184
1010
  vllm_args:
1185
1011
  --max-model-len: 65536
1186
1012
  --max-num-seqs: 256
1187
- --compilation-config: 3
1188
1013
  DeepSeek-R1-Distill-Qwen-7B:
1189
1014
  model_family: DeepSeek-R1
1190
1015
  model_variant: Distill-Qwen-7B
@@ -1192,13 +1017,11 @@ models:
1192
1017
  gpus_per_node: 1
1193
1018
  num_nodes: 1
1194
1019
  vocab_size: 152064
1195
- qos: m2
1196
1020
  time: 08:00:00
1197
- partition: a40
1021
+ resource_type: l40s
1198
1022
  vllm_args:
1199
1023
  --max-model-len: 131072
1200
1024
  --max-num-seqs: 256
1201
- --compilation-config: 3
1202
1025
  DeepSeek-R1-Distill-Qwen-1.5B:
1203
1026
  model_family: DeepSeek-R1
1204
1027
  model_variant: Distill-Qwen-1.5B
@@ -1206,13 +1029,11 @@ models:
1206
1029
  gpus_per_node: 1
1207
1030
  num_nodes: 1
1208
1031
  vocab_size: 152064
1209
- qos: m2
1210
1032
  time: 08:00:00
1211
- partition: a40
1033
+ resource_type: l40s
1212
1034
  vllm_args:
1213
1035
  --max-model-len: 131072
1214
1036
  --max-num-seqs: 256
1215
- --compilation-config: 3
1216
1037
  Phi-3.5-vision-instruct:
1217
1038
  model_family: Phi-3.5-vision
1218
1039
  model_variant: instruct
@@ -1220,14 +1041,12 @@ models:
1220
1041
  gpus_per_node: 2
1221
1042
  num_nodes: 1
1222
1043
  vocab_size: 32064
1223
- qos: m2
1224
1044
  time: 08:00:00
1225
- partition: a40
1045
+ resource_type: l40s
1226
1046
  vllm_args:
1227
1047
  --tensor-parallel-size: 2
1228
1048
  --max-model-len: 65536
1229
1049
  --max-num-seqs: 256
1230
- --compilation-config: 3
1231
1050
  InternVL2_5-8B:
1232
1051
  model_family: InternVL2_5
1233
1052
  model_variant: 8B
@@ -1235,13 +1054,11 @@ models:
1235
1054
  gpus_per_node: 1
1236
1055
  num_nodes: 1
1237
1056
  vocab_size: 92553
1238
- qos: m2
1239
1057
  time: 08:00:00
1240
- partition: a40
1058
+ resource_type: l40s
1241
1059
  vllm_args:
1242
1060
  --max-model-len: 32768
1243
1061
  --max-num-seqs: 256
1244
- --compilation-config: 3
1245
1062
  glm-4v-9b:
1246
1063
  model_family: glm-4v
1247
1064
  model_variant: 9b
@@ -1249,13 +1066,11 @@ models:
1249
1066
  gpus_per_node: 1
1250
1067
  num_nodes: 1
1251
1068
  vocab_size: 151552
1252
- qos: m2
1253
1069
  time: 08:00:00
1254
- partition: a40
1070
+ resource_type: l40s
1255
1071
  vllm_args:
1256
1072
  --max-model-len: 8192
1257
1073
  --max-num-seqs: 256
1258
- --compilation-config: 3
1259
1074
  Molmo-7B-D-0924:
1260
1075
  model_family: Molmo
1261
1076
  model_variant: 7B-D-0924
@@ -1263,27 +1078,23 @@ models:
1263
1078
  gpus_per_node: 1
1264
1079
  num_nodes: 1
1265
1080
  vocab_size: 152064
1266
- qos: m2
1267
1081
  time: 08:00:00
1268
- partition: a40
1082
+ resource_type: l40s
1269
1083
  vllm_args:
1270
1084
  --max-model-len: 4096
1271
1085
  --max-num-seqs: 256
1272
- --compilation-config: 3
1273
1086
  deepseek-vl2:
1274
1087
  model_family: deepseek-vl2
1275
1088
  model_type: VLM
1276
1089
  gpus_per_node: 2
1277
1090
  num_nodes: 1
1278
1091
  vocab_size: 129280
1279
- qos: m2
1280
1092
  time: 08:00:00
1281
- partition: a40
1093
+ resource_type: l40s
1282
1094
  vllm_args:
1283
1095
  --tensor-parallel-size: 2
1284
1096
  --max-model-len: 4096
1285
1097
  --max-num-seqs: 256
1286
- --compilation-config: 3
1287
1098
  deepseek-vl2-small:
1288
1099
  model_family: deepseek-vl2
1289
1100
  model_variant: small
@@ -1291,10 +1102,20 @@ models:
1291
1102
  gpus_per_node: 1
1292
1103
  num_nodes: 1
1293
1104
  vocab_size: 129280
1294
- qos: m2
1295
1105
  time: 08:00:00
1296
- partition: a40
1106
+ resource_type: l40s
1297
1107
  vllm_args:
1298
1108
  --max-model-len: 4096
1299
1109
  --max-num-seqs: 256
1300
- --compilation-config: 3
1110
+ Qwen3-14B:
1111
+ model_family: Qwen3
1112
+ model_variant: 14B
1113
+ model_type: LLM
1114
+ gpus_per_node: 1
1115
+ num_nodes: 1
1116
+ vocab_size: 151936
1117
+ time: 08:00:00
1118
+ resource_type: l40s
1119
+ vllm_args:
1120
+ --max-model-len: 40960
1121
+ --max-num-seqs: 256