vec-inf 0.4.1__py3-none-any.whl → 0.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1300 @@
1
+ models:
2
+ c4ai-command-r-plus:
3
+ model_family: c4ai-command-r
4
+ model_variant: plus
5
+ model_type: LLM
6
+ gpus_per_node: 4
7
+ num_nodes: 2
8
+ vocab_size: 256000
9
+ qos: m2
10
+ time: 08:00:00
11
+ partition: a40
12
+ vllm_args:
13
+ --pipeline-parallel-size: 2
14
+ --tensor-parallel-size: 4
15
+ --max-model-len: 8192
16
+ --max-num-seqs: 256
17
+ --compilation-config: 3
18
+ c4ai-command-r-plus-08-2024:
19
+ model_family: c4ai-command-r
20
+ model_variant: plus-08-2024
21
+ model_type: LLM
22
+ gpus_per_node: 4
23
+ num_nodes: 2
24
+ vocab_size: 256000
25
+ qos: m2
26
+ time: 08:00:00
27
+ partition: a40
28
+ vllm_args:
29
+ --pipeline-parallel-size: 2
30
+ --tensor-parallel-size: 4
31
+ --max-model-len: 65536
32
+ --max-num-seqs: 256
33
+ --compilation-config: 3
34
+ c4ai-command-r-08-2024:
35
+ model_family: c4ai-command-r
36
+ model_variant: 08-2024
37
+ model_type: LLM
38
+ gpus_per_node: 2
39
+ num_nodes: 1
40
+ vocab_size: 256000
41
+ qos: m2
42
+ time: 08:00:00
43
+ partition: a40
44
+ vllm_args:
45
+ --tensor-parallel-size: 2
46
+ --max-model-len: 32768
47
+ --max-num-seqs: 256
48
+ --compilation-config: 3
49
+ CodeLlama-7b-hf:
50
+ model_family: CodeLlama
51
+ model_variant: 7b-hf
52
+ model_type: LLM
53
+ gpus_per_node: 1
54
+ num_nodes: 1
55
+ vocab_size: 32000
56
+ qos: m2
57
+ time: 08:00:00
58
+ partition: a40
59
+ vllm_args:
60
+ --max-model-len: 16384
61
+ --max-num-seqs: 256
62
+ --compilation-config: 3
63
+ CodeLlama-7b-Instruct-hf:
64
+ model_family: CodeLlama
65
+ model_variant: 7b-Instruct-hf
66
+ model_type: LLM
67
+ gpus_per_node: 1
68
+ num_nodes: 1
69
+ vocab_size: 32000
70
+ qos: m2
71
+ time: 08:00:00
72
+ partition: a40
73
+ vllm_args:
74
+ --max-model-len: 16384
75
+ --max-num-seqs: 256
76
+ --compilation-config: 3
77
+ CodeLlama-13b-hf:
78
+ model_family: CodeLlama
79
+ model_variant: 13b-hf
80
+ model_type: LLM
81
+ gpus_per_node: 1
82
+ num_nodes: 1
83
+ vocab_size: 32000
84
+ qos: m2
85
+ time: 08:00:00
86
+ partition: a40
87
+ vllm_args:
88
+ --max-model-len: 16384
89
+ --max-num-seqs: 256
90
+ --compilation-config: 3
91
+ CodeLlama-13b-Instruct-hf:
92
+ model_family: CodeLlama
93
+ model_variant: 13b-Instruct-hf
94
+ model_type: LLM
95
+ gpus_per_node: 1
96
+ num_nodes: 1
97
+ vocab_size: 32000
98
+ qos: m2
99
+ time: 08:00:00
100
+ partition: a40
101
+ vllm_args:
102
+ --max-model-len: 16384
103
+ --max-num-seqs: 256
104
+ --compilation-config: 3
105
+ CodeLlama-34b-hf:
106
+ model_family: CodeLlama
107
+ model_variant: 34b-hf
108
+ model_type: LLM
109
+ gpus_per_node: 2
110
+ num_nodes: 1
111
+ vocab_size: 32000
112
+ qos: m2
113
+ time: 08:00:00
114
+ partition: a40
115
+ vllm_args:
116
+ --tensor-parallel-size: 2
117
+ --max-model-len: 16384
118
+ --max-num-seqs: 256
119
+ --compilation-config: 3
120
+ CodeLlama-34b-Instruct-hf:
121
+ model_family: CodeLlama
122
+ model_variant: 34b-Instruct-hf
123
+ model_type: LLM
124
+ gpus_per_node: 2
125
+ num_nodes: 1
126
+ vocab_size: 32000
127
+ qos: m2
128
+ time: 08:00:00
129
+ partition: a40
130
+ vllm_args:
131
+ --tensor-parallel-size: 2
132
+ --max-model-len: 16384
133
+ --max-num-seqs: 256
134
+ --compilation-config: 3
135
+ CodeLlama-70b-hf:
136
+ model_family: CodeLlama
137
+ model_variant: 70b-hf
138
+ model_type: LLM
139
+ gpus_per_node: 4
140
+ num_nodes: 1
141
+ vocab_size: 32016
142
+ qos: m2
143
+ time: 08:00:00
144
+ partition: a40
145
+ vllm_args:
146
+ --tensor-parallel-size: 4
147
+ --max-model-len: 4096
148
+ --max-num-seqs: 256
149
+ --compilation-config: 3
150
+ CodeLlama-70b-Instruct-hf:
151
+ model_family: CodeLlama
152
+ model_variant: 70b-Instruct-hf
153
+ model_type: LLM
154
+ gpus_per_node: 4
155
+ num_nodes: 1
156
+ vocab_size: 32016
157
+ qos: m2
158
+ time: 08:00:00
159
+ partition: a40
160
+ vllm_args:
161
+ --tensor-parallel-size: 4
162
+ --max-model-len: 4096
163
+ --max-num-seqs: 256
164
+ --compilation-config: 3
165
+ gemma-2-9b:
166
+ model_family: gemma-2
167
+ model_variant: 9b
168
+ model_type: LLM
169
+ gpus_per_node: 1
170
+ num_nodes: 1
171
+ vocab_size: 256000
172
+ qos: m2
173
+ time: 08:00:00
174
+ partition: a40
175
+ vllm_args:
176
+ --max-model-len: 4096
177
+ --max-num-seqs: 256
178
+ --compilation-config: 3
179
+ gemma-2-9b-it:
180
+ model_family: gemma-2
181
+ model_variant: 9b-it
182
+ model_type: LLM
183
+ gpus_per_node: 1
184
+ num_nodes: 1
185
+ vocab_size: 256000
186
+ qos: m2
187
+ time: 08:00:00
188
+ partition: a40
189
+ vllm_args:
190
+ --max-model-len: 4096
191
+ --max-num-seqs: 256
192
+ --compilation-config: 3
193
+ gemma-2-27b:
194
+ model_family: gemma-2
195
+ model_variant: 27b
196
+ model_type: LLM
197
+ gpus_per_node: 2
198
+ num_nodes: 1
199
+ vocab_size: 256000
200
+ qos: m2
201
+ time: 08:00:00
202
+ partition: a40
203
+ vllm_args:
204
+ --tensor-parallel-size: 2
205
+ --max-model-len: 4096
206
+ --max-num-seqs: 256
207
+ --compilation-config: 3
208
+ gemma-2-27b-it:
209
+ model_family: gemma-2
210
+ model_variant: 27b-it
211
+ model_type: LLM
212
+ gpus_per_node: 2
213
+ num_nodes: 1
214
+ vocab_size: 256000
215
+ qos: m2
216
+ time: 08:00:00
217
+ partition: a40
218
+ vllm_args:
219
+ --tensor-parallel-size: 2
220
+ --max-model-len: 4096
221
+ --max-num-seqs: 256
222
+ --compilation-config: 3
223
+ Llama-2-7b-hf:
224
+ model_family: Llama-2
225
+ model_variant: 7b-hf
226
+ model_type: LLM
227
+ gpus_per_node: 1
228
+ num_nodes: 1
229
+ vocab_size: 32000
230
+ qos: m2
231
+ time: 08:00:00
232
+ partition: a40
233
+ vllm_args:
234
+ --max-model-len: 4096
235
+ --max-num-seqs: 256
236
+ --compilation-config: 3
237
+ Llama-2-7b-chat-hf:
238
+ model_family: Llama-2
239
+ model_variant: 7b-chat-hf
240
+ model_type: LLM
241
+ gpus_per_node: 1
242
+ num_nodes: 1
243
+ vocab_size: 32000
244
+ qos: m2
245
+ time: 08:00:00
246
+ partition: a40
247
+ vllm_args:
248
+ --max-model-len: 4096
249
+ --max-num-seqs: 256
250
+ --compilation-config: 3
251
+ Llama-2-13b-hf:
252
+ model_family: Llama-2
253
+ model_variant: 13b-hf
254
+ model_type: LLM
255
+ gpus_per_node: 1
256
+ num_nodes: 1
257
+ vocab_size: 32000
258
+ qos: m2
259
+ time: 08:00:00
260
+ partition: a40
261
+ vllm_args:
262
+ --max-model-len: 4096
263
+ --max-num-seqs: 256
264
+ --compilation-config: 3
265
+ Llama-2-13b-chat-hf:
266
+ model_family: Llama-2
267
+ model_variant: 13b-chat-hf
268
+ model_type: LLM
269
+ gpus_per_node: 1
270
+ num_nodes: 1
271
+ vocab_size: 32000
272
+ qos: m2
273
+ time: 08:00:00
274
+ partition: a40
275
+ vllm_args:
276
+ --max-model-len: 4096
277
+ --max-num-seqs: 256
278
+ --compilation-config: 3
279
+ Llama-2-70b-hf:
280
+ model_family: Llama-2
281
+ model_variant: 70b-hf
282
+ model_type: LLM
283
+ gpus_per_node: 4
284
+ num_nodes: 1
285
+ vocab_size: 32000
286
+ qos: m2
287
+ time: 08:00:00
288
+ partition: a40
289
+ vllm_args:
290
+ --tensor-parallel-size: 4
291
+ --max-model-len: 4096
292
+ --max-num-seqs: 256
293
+ --compilation-config: 3
294
+ Llama-2-70b-chat-hf:
295
+ model_family: Llama-2
296
+ model_variant: 70b-chat-hf
297
+ model_type: LLM
298
+ gpus_per_node: 4
299
+ num_nodes: 1
300
+ vocab_size: 32000
301
+ qos: m2
302
+ time: 08:00:00
303
+ partition: a40
304
+ vllm_args:
305
+ --tensor-parallel-size: 4
306
+ --max-model-len: 4096
307
+ --max-num-seqs: 256
308
+ --compilation-config: 3
309
+ llava-1.5-7b-hf:
310
+ model_family: llava-1.5
311
+ model_variant: 7b-hf
312
+ model_type: VLM
313
+ gpus_per_node: 1
314
+ num_nodes: 1
315
+ vocab_size: 32000
316
+ qos: m2
317
+ time: 08:00:00
318
+ partition: a40
319
+ vllm_args:
320
+ --max-model-len: 4096
321
+ --max-num-seqs: 256
322
+ --compilation-config: 3
323
+ llava-1.5-13b-hf:
324
+ model_family: llava-1.5
325
+ model_variant: 13b-hf
326
+ model_type: VLM
327
+ gpus_per_node: 1
328
+ num_nodes: 1
329
+ vocab_size: 32000
330
+ qos: m2
331
+ time: 08:00:00
332
+ partition: a40
333
+ vllm_args:
334
+ --max-model-len: 4096
335
+ --max-num-seqs: 256
336
+ --compilation-config: 3
337
+ llava-v1.6-mistral-7b-hf:
338
+ model_family: llava-v1.6
339
+ model_variant: mistral-7b-hf
340
+ model_type: VLM
341
+ gpus_per_node: 1
342
+ num_nodes: 1
343
+ vocab_size: 32064
344
+ qos: m2
345
+ time: 08:00:00
346
+ partition: a40
347
+ vllm_args:
348
+ --max-model-len: 32768
349
+ --max-num-seqs: 256
350
+ --compilation-config: 3
351
+ llava-v1.6-34b-hf:
352
+ model_family: llava-v1.6
353
+ model_variant: 34b-hf
354
+ model_type: VLM
355
+ gpus_per_node: 2
356
+ num_nodes: 1
357
+ vocab_size: 64064
358
+ qos: m2
359
+ time: 08:00:00
360
+ partition: a40
361
+ vllm_args:
362
+ --tensor-parallel-size: 2
363
+ --max-model-len: 4096
364
+ --max-num-seqs: 256
365
+ --compilation-config: 3
366
+ Meta-Llama-3-8B:
367
+ model_family: Meta-Llama-3
368
+ model_variant: 8B
369
+ model_type: LLM
370
+ gpus_per_node: 1
371
+ num_nodes: 1
372
+ vocab_size: 128256
373
+ qos: m2
374
+ time: 08:00:00
375
+ partition: a40
376
+ vllm_args:
377
+ --max-model-len: 8192
378
+ --max-num-seqs: 256
379
+ --compilation-config: 3
380
+ Meta-Llama-3-8B-Instruct:
381
+ model_family: Meta-Llama-3
382
+ model_variant: 8B-Instruct
383
+ model_type: LLM
384
+ gpus_per_node: 1
385
+ num_nodes: 1
386
+ vocab_size: 128256
387
+ qos: m2
388
+ time: 08:00:00
389
+ partition: a40
390
+ vllm_args:
391
+ --max-model-len: 8192
392
+ --max-num-seqs: 256
393
+ --compilation-config: 3
394
+ Meta-Llama-3-70B:
395
+ model_family: Meta-Llama-3
396
+ model_variant: 70B
397
+ model_type: LLM
398
+ gpus_per_node: 4
399
+ num_nodes: 1
400
+ vocab_size: 128256
401
+ qos: m2
402
+ time: 08:00:00
403
+ partition: a40
404
+ vllm_args:
405
+ --tensor-parallel-size: 4
406
+ --max-model-len: 8192
407
+ --max-num-seqs: 256
408
+ --compilation-config: 3
409
+ Meta-Llama-3-70B-Instruct:
410
+ model_family: Meta-Llama-3
411
+ model_variant: 70B-Instruct
412
+ model_type: LLM
413
+ gpus_per_node: 4
414
+ num_nodes: 1
415
+ vocab_size: 128256
416
+ qos: m2
417
+ time: 08:00:00
418
+ partition: a40
419
+ vllm_args:
420
+ --tensor-parallel-size: 4
421
+ --max-model-len: 8192
422
+ --max-num-seqs: 256
423
+ --compilation-config: 3
424
+ Meta-Llama-3.1-8B:
425
+ model_family: Meta-Llama-3.1
426
+ model_variant: 8B
427
+ model_type: LLM
428
+ gpus_per_node: 1
429
+ num_nodes: 1
430
+ vocab_size: 128256
431
+ qos: m2
432
+ time: 08:00:00
433
+ partition: a40
434
+ vllm_args:
435
+ --max-model-len: 131072
436
+ --max-num-seqs: 256
437
+ --compilation-config: 3
438
+ Meta-Llama-3.1-8B-Instruct:
439
+ model_family: Meta-Llama-3.1
440
+ model_variant: 8B-Instruct
441
+ model_type: LLM
442
+ gpus_per_node: 1
443
+ num_nodes: 1
444
+ vocab_size: 128256
445
+ qos: m2
446
+ time: 08:00:00
447
+ partition: a40
448
+ vllm_args:
449
+ --max-model-len: 131072
450
+ --max-num-seqs: 256
451
+ --compilation-config: 3
452
+ Meta-Llama-3.1-70B:
453
+ model_family: Meta-Llama-3.1
454
+ model_variant: 70B
455
+ model_type: LLM
456
+ gpus_per_node: 4
457
+ num_nodes: 1
458
+ vocab_size: 128256
459
+ qos: m2
460
+ time: 08:00:00
461
+ partition: a40
462
+ vllm_args:
463
+ --tensor-parallel-size: 4
464
+ --max-model-len: 65536
465
+ --max-num-seqs: 256
466
+ --compilation-config: 3
467
+ Meta-Llama-3.1-70B-Instruct:
468
+ model_family: Meta-Llama-3.1
469
+ model_variant: 70B-Instruct
470
+ model_type: LLM
471
+ gpus_per_node: 4
472
+ num_nodes: 1
473
+ vocab_size: 128256
474
+ qos: m2
475
+ time: 08:00:00
476
+ partition: a40
477
+ vllm_args:
478
+ --tensor-parallel-size: 4
479
+ --max-model-len: 65536
480
+ --max-num-seqs: 256
481
+ --compilation-config: 3
482
+ Meta-Llama-3.1-405B-Instruct:
483
+ model_family: Meta-Llama-3.1
484
+ model_variant: 405B-Instruct
485
+ model_type: LLM
486
+ gpus_per_node: 4
487
+ num_nodes: 8
488
+ vocab_size: 128256
489
+ qos: m4
490
+ time: 02:00:00
491
+ partition: a40
492
+ vllm_args:
493
+ --pipeline-parallel-size: 8
494
+ --tensor-parallel-size: 4
495
+ --max-model-len: 16384
496
+ --max-num-seqs: 256
497
+ --compilation-config: 3
498
+ Mistral-7B-Instruct-v0.1:
499
+ model_family: Mistral
500
+ model_variant: 7B-Instruct-v0.1
501
+ model_type: LLM
502
+ gpus_per_node: 1
503
+ num_nodes: 1
504
+ vocab_size: 32000
505
+ qos: m2
506
+ time: 08:00:00
507
+ partition: a40
508
+ vllm_args:
509
+ --max-model-len: 32768
510
+ --max-num-seqs: 256
511
+ --compilation-config: 3
512
+ Mistral-7B-Instruct-v0.2:
513
+ model_family: Mistral
514
+ model_variant: 7B-Instruct-v0.2
515
+ model_type: LLM
516
+ gpus_per_node: 1
517
+ num_nodes: 1
518
+ vocab_size: 32000
519
+ qos: m2
520
+ time: 08:00:00
521
+ partition: a40
522
+ vllm_args:
523
+ --max-model-len: 32768
524
+ --max-num-seqs: 256
525
+ --compilation-config: 3
526
+ Mistral-7B-v0.3:
527
+ model_family: Mistral
528
+ model_variant: 7B-v0.3
529
+ model_type: LLM
530
+ gpus_per_node: 1
531
+ num_nodes: 1
532
+ vocab_size: 32768
533
+ qos: m2
534
+ time: 08:00:00
535
+ partition: a40
536
+ vllm_args:
537
+ --max-model-len: 32768
538
+ --max-num-seqs: 256
539
+ --compilation-config: 3
540
+ Mistral-7B-Instruct-v0.3:
541
+ model_family: Mistral
542
+ model_variant: 7B-Instruct-v0.3
543
+ model_type: LLM
544
+ gpus_per_node: 1
545
+ num_nodes: 1
546
+ vocab_size: 32768
547
+ qos: m2
548
+ time: 08:00:00
549
+ partition: a40
550
+ vllm_args:
551
+ --max-model-len: 32768
552
+ --max-num-seqs: 256
553
+ --compilation-config: 3
554
+ Mistral-Large-Instruct-2407:
555
+ model_family: Mistral
556
+ model_variant: Large-Instruct-2407
557
+ model_type: LLM
558
+ gpus_per_node: 4
559
+ num_nodes: 2
560
+ vocab_size: 32768
561
+ qos: m2
562
+ time: 08:00:00
563
+ partition: a40
564
+ vllm_args:
565
+ --pipeline-parallel-size: 2
566
+ --tensor-parallel-size: 4
567
+ --max-model-len: 32768
568
+ --max-num-seqs: 256
569
+ --compilation-config: 3
570
+ Mistral-Large-Instruct-2411:
571
+ model_family: Mistral
572
+ model_variant: Large-Instruct-2411
573
+ model_type: LLM
574
+ gpus_per_node: 4
575
+ num_nodes: 2
576
+ vocab_size: 32768
577
+ qos: m2
578
+ time: 08:00:00
579
+ partition: a40
580
+ vllm_args:
581
+ --pipeline-parallel-size: 2
582
+ --tensor-parallel-size: 4
583
+ --max-model-len: 32768
584
+ --max-num-seqs: 256
585
+ --compilation-config: 3
586
+ Mixtral-8x7B-Instruct-v0.1:
587
+ model_family: Mixtral
588
+ model_variant: 8x7B-Instruct-v0.1
589
+ model_type: LLM
590
+ gpus_per_node: 4
591
+ num_nodes: 1
592
+ vocab_size: 32000
593
+ qos: m2
594
+ time: 08:00:00
595
+ partition: a40
596
+ vllm_args:
597
+ --tensor-parallel-size: 4
598
+ --max-model-len: 32768
599
+ --max-num-seqs: 256
600
+ --compilation-config: 3
601
+ Mixtral-8x22B-v0.1:
602
+ model_family: Mixtral
603
+ model_variant: 8x22B-v0.1
604
+ model_type: LLM
605
+ gpus_per_node: 4
606
+ num_nodes: 2
607
+ vocab_size: 32768
608
+ qos: m2
609
+ time: 08:00:00
610
+ partition: a40
611
+ vllm_args:
612
+ --pipeline-parallel-size: 2
613
+ --tensor-parallel-size: 4
614
+ --max-model-len: 65536
615
+ --max-num-seqs: 256
616
+ --compilation-config: 3
617
+ Mixtral-8x22B-Instruct-v0.1:
618
+ model_family: Mixtral
619
+ model_variant: 8x22B-Instruct-v0.1
620
+ model_type: LLM
621
+ gpus_per_node: 4
622
+ num_nodes: 2
623
+ vocab_size: 32768
624
+ qos: m2
625
+ time: 08:00:00
626
+ partition: a40
627
+ vllm_args:
628
+ --pipeline-parallel-size: 2
629
+ --tensor-parallel-size: 4
630
+ --max-model-len: 65536
631
+ --max-num-seqs: 256
632
+ --compilation-config: 3
633
+ Phi-3-medium-128k-instruct:
634
+ model_family: Phi-3
635
+ model_variant: medium-128k-instruct
636
+ model_type: LLM
637
+ gpus_per_node: 2
638
+ num_nodes: 1
639
+ vocab_size: 32064
640
+ qos: m2
641
+ time: 08:00:00
642
+ partition: a40
643
+ vllm_args:
644
+ --tensor-parallel-size: 2
645
+ --max-model-len: 131072
646
+ --max-num-seqs: 256
647
+ --compilation-config: 3
648
+ Phi-3-vision-128k-instruct:
649
+ model_family: Phi-3-vision
650
+ model_variant: 128k-instruct
651
+ model_type: VLM
652
+ gpus_per_node: 2
653
+ num_nodes: 1
654
+ vocab_size: 32064
655
+ qos: m2
656
+ time: 08:00:00
657
+ partition: a40
658
+ vllm_args:
659
+ --tensor-parallel-size: 2
660
+ --max-model-len: 65536
661
+ --max-num-seqs: 256
662
+ --compilation-config: 3
663
+ Llama3-OpenBioLLM-70B:
664
+ model_family: Llama3-OpenBioLLM
665
+ model_variant: 70B
666
+ model_type: LLM
667
+ gpus_per_node: 4
668
+ num_nodes: 1
669
+ vocab_size: 128256
670
+ qos: m2
671
+ time: 08:00:00
672
+ partition: a40
673
+ vllm_args:
674
+ --tensor-parallel-size: 4
675
+ --max-model-len: 8192
676
+ --max-num-seqs: 256
677
+ --compilation-config: 3
678
+ Llama-3.1-Nemotron-70B-Instruct-HF:
679
+ model_family: Llama-3.1-Nemotron
680
+ model_variant: 70B-Instruct-HF
681
+ model_type: LLM
682
+ gpus_per_node: 4
683
+ num_nodes: 1
684
+ vocab_size: 128256
685
+ qos: m2
686
+ time: 08:00:00
687
+ partition: a40
688
+ vllm_args:
689
+ --tensor-parallel-size: 4
690
+ --max-model-len: 65536
691
+ --max-num-seqs: 256
692
+ --compilation-config: 3
693
+ Llama-3.2-1B:
694
+ model_family: Llama-3.2
695
+ model_variant: 1B
696
+ model_type: LLM
697
+ gpus_per_node: 1
698
+ num_nodes: 1
699
+ vocab_size: 128256
700
+ qos: m2
701
+ time: 08:00:00
702
+ partition: a40
703
+ vllm_args:
704
+ --max-model-len: 131072
705
+ --max-num-seqs: 256
706
+ --compilation-config: 3
707
+ Llama-3.2-1B-Instruct:
708
+ model_family: Llama-3.2
709
+ model_variant: 1B-Instruct
710
+ model_type: LLM
711
+ gpus_per_node: 1
712
+ num_nodes: 1
713
+ vocab_size: 128256
714
+ qos: m2
715
+ time: 08:00:00
716
+ partition: a40
717
+ vllm_args:
718
+ --max-model-len: 131072
719
+ --max-num-seqs: 256
720
+ --compilation-config: 3
721
+ Llama-3.2-3B:
722
+ model_family: Llama-3.2
723
+ model_variant: 3B
724
+ model_type: LLM
725
+ gpus_per_node: 1
726
+ num_nodes: 1
727
+ vocab_size: 128256
728
+ qos: m2
729
+ time: 08:00:00
730
+ partition: a40
731
+ vllm_args:
732
+ --max-model-len: 131072
733
+ --max-num-seqs: 256
734
+ --compilation-config: 3
735
+ Llama-3.2-3B-Instruct:
736
+ model_family: Llama-3.2
737
+ model_variant: 3B-Instruct
738
+ model_type: LLM
739
+ gpus_per_node: 1
740
+ num_nodes: 1
741
+ vocab_size: 128256
742
+ qos: m2
743
+ time: 08:00:00
744
+ partition: a40
745
+ vllm_args:
746
+ --max-model-len: 131072
747
+ --max-num-seqs: 256
748
+ --compilation-config: 3
749
+ Llama-3.2-11B-Vision:
750
+ model_family: Llama-3.2
751
+ model_variant: 11B-Vision
752
+ model_type: VLM
753
+ gpus_per_node: 2
754
+ num_nodes: 1
755
+ vocab_size: 128256
756
+ qos: m2
757
+ time: 08:00:00
758
+ partition: a40
759
+ vllm_args:
760
+ --tensor-parallel-size: 2
761
+ --max-model-len: 4096
762
+ --max-num-seqs: 64
763
+ --compilation-config: 3
764
+ --enforce-eager: true
765
+ Llama-3.2-11B-Vision-Instruct:
766
+ model_family: Llama-3.2
767
+ model_variant: 11B-Vision-Instruct
768
+ model_type: VLM
769
+ gpus_per_node: 2
770
+ num_nodes: 1
771
+ vocab_size: 128256
772
+ qos: m2
773
+ time: 08:00:00
774
+ partition: a40
775
+ vllm_args:
776
+ --tensor-parallel-size: 2
777
+ --max-model-len: 4096
778
+ --max-num-seqs: 64
779
+ --compilation-config: 3
780
+ --enforce-eager: true
781
+ Llama-3.2-90B-Vision:
782
+ model_family: Llama-3.2
783
+ model_variant: 90B-Vision
784
+ model_type: VLM
785
+ gpus_per_node: 4
786
+ num_nodes: 2
787
+ vocab_size: 128256
788
+ qos: m2
789
+ time: 08:00:00
790
+ partition: a40
791
+ vllm_args:
792
+ --tensor-parallel-size: 8
793
+ --max-model-len: 4096
794
+ --max-num-seqs: 32
795
+ --compilation-config: 3
796
+ --enforce-eager: true
797
+ Llama-3.2-90B-Vision-Instruct:
798
+ model_family: Llama-3.2
799
+ model_variant: 90B-Vision-Instruct
800
+ model_type: VLM
801
+ gpus_per_node: 4
802
+ num_nodes: 2
803
+ vocab_size: 128256
804
+ qos: m2
805
+ time: 08:00:00
806
+ partition: a40
807
+ vllm_args:
808
+ --tensor-parallel-size: 8
809
+ --max-model-len: 4096
810
+ --max-num-seqs: 32
811
+ --compilation-config: 3
812
+ --enforce-eager: true
813
+ Qwen2.5-0.5B-Instruct:
814
+ model_family: Qwen2.5
815
+ model_variant: 0.5B-Instruct
816
+ model_type: LLM
817
+ gpus_per_node: 1
818
+ num_nodes: 1
819
+ vocab_size: 152064
820
+ qos: m2
821
+ time: 08:00:00
822
+ partition: a40
823
+ vllm_args:
824
+ --max-model-len: 32768
825
+ --max-num-seqs: 256
826
+ --compilation-config: 3
827
+ Qwen2.5-1.5B-Instruct:
828
+ model_family: Qwen2.5
829
+ model_variant: 1.5B-Instruct
830
+ model_type: LLM
831
+ gpus_per_node: 1
832
+ num_nodes: 1
833
+ vocab_size: 152064
834
+ qos: m2
835
+ time: 08:00:00
836
+ partition: a40
837
+ vllm_args:
838
+ --max-model-len: 32768
839
+ --max-num-seqs: 256
840
+ --compilation-config: 3
841
+ Qwen2.5-3B-Instruct:
842
+ model_family: Qwen2.5
843
+ model_variant: 3B-Instruct
844
+ model_type: LLM
845
+ gpus_per_node: 1
846
+ num_nodes: 1
847
+ vocab_size: 152064
848
+ qos: m2
849
+ time: 08:00:00
850
+ partition: a40
851
+ vllm_args:
852
+ --max-model-len: 32768
853
+ --max-num-seqs: 256
854
+ --compilation-config: 3
855
+ Qwen2.5-7B-Instruct:
856
+ model_family: Qwen2.5
857
+ model_variant: 7B-Instruct
858
+ model_type: LLM
859
+ gpus_per_node: 1
860
+ num_nodes: 1
861
+ vocab_size: 152064
862
+ qos: m2
863
+ time: 08:00:00
864
+ partition: a40
865
+ vllm_args:
866
+ --max-model-len: 32768
867
+ --max-num-seqs: 256
868
+ --compilation-config: 3
869
+ Qwen2.5-14B-Instruct:
870
+ model_family: Qwen2.5
871
+ model_variant: 14B-Instruct
872
+ model_type: LLM
873
+ gpus_per_node: 1
874
+ num_nodes: 1
875
+ vocab_size: 152064
876
+ qos: m2
877
+ time: 08:00:00
878
+ partition: a40
879
+ vllm_args:
880
+ --max-model-len: 32768
881
+ --max-num-seqs: 256
882
+ --compilation-config: 3
883
+ Qwen2.5-32B-Instruct:
884
+ model_family: Qwen2.5
885
+ model_variant: 32B-Instruct
886
+ model_type: LLM
887
+ gpus_per_node: 2
888
+ num_nodes: 1
889
+ vocab_size: 152064
890
+ qos: m2
891
+ time: 08:00:00
892
+ partition: a40
893
+ vllm_args:
894
+ --tensor-parallel-size: 2
895
+ --max-model-len: 32768
896
+ --max-num-seqs: 256
897
+ --compilation-config: 3
898
+ Qwen2.5-72B-Instruct:
899
+ model_family: Qwen2.5
900
+ model_variant: 72B-Instruct
901
+ model_type: LLM
902
+ gpus_per_node: 4
903
+ num_nodes: 1
904
+ vocab_size: 152064
905
+ qos: m2
906
+ time: 08:00:00
907
+ partition: a40
908
+ vllm_args:
909
+ --tensor-parallel-size: 4
910
+ --max-model-len: 16384
911
+ --max-num-seqs: 256
912
+ --compilation-config: 3
913
+ Qwen2.5-Math-1.5B-Instruct:
914
+ model_family: Qwen2.5
915
+ model_variant: Math-1.5B-Instruct
916
+ model_type: LLM
917
+ gpus_per_node: 1
918
+ num_nodes: 1
919
+ vocab_size: 152064
920
+ qos: m2
921
+ time: 08:00:00
922
+ partition: a40
923
+ vllm_args:
924
+ --max-model-len: 4096
925
+ --max-num-seqs: 256
926
+ --compilation-config: 3
927
+ Qwen2.5-Math-7B-Instruct:
928
+ model_family: Qwen2.5
929
+ model_variant: Math-7B-Instruct
930
+ model_type: LLM
931
+ gpus_per_node: 1
932
+ num_nodes: 1
933
+ vocab_size: 152064
934
+ qos: m2
935
+ time: 08:00:00
936
+ partition: a40
937
+ vllm_args:
938
+ --max-model-len: 4096
939
+ --max-num-seqs: 256
940
+ --compilation-config: 3
941
+ Qwen2.5-Math-72B-Instruct:
942
+ model_family: Qwen2.5
943
+ model_variant: Math-72B-Instruct
944
+ model_type: LLM
945
+ gpus_per_node: 4
946
+ num_nodes: 1
947
+ vocab_size: 152064
948
+ qos: m2
949
+ time: 08:00:00
950
+ partition: a40
951
+ vllm_args:
952
+ --tensor-parallel-size: 4
953
+ --max-model-len: 4096
954
+ --max-num-seqs: 256
955
+ --compilation-config: 3
956
+ Qwen2.5-Coder-7B-Instruct:
957
+ model_family: Qwen2.5
958
+ model_variant: Coder-7B-Instruct
959
+ model_type: LLM
960
+ gpus_per_node: 1
961
+ num_nodes: 1
962
+ vocab_size: 152064
963
+ qos: m2
964
+ time: 08:00:00
965
+ partition: a40
966
+ vllm_args:
967
+ --max-model-len: 32768
968
+ --max-num-seqs: 256
969
+ --compilation-config: 3
970
+ Qwen2.5-Math-RM-72B:
971
+ model_family: Qwen2.5
972
+ model_variant: Math-RM-72B
973
+ model_type: Reward_Modeling
974
+ gpus_per_node: 4
975
+ num_nodes: 1
976
+ vocab_size: 152064
977
+ qos: m2
978
+ time: 08:00:00
979
+ partition: a40
980
+ vllm_args:
981
+ --tensor-parallel-size: 4
982
+ --max-model-len: 4096
983
+ --max-num-seqs: 256
984
+ --compilation-config: 3
985
+ Qwen2.5-Math-PRM-7B:
986
+ model_family: Qwen2.5
987
+ model_variant: Math-PRM-7B
988
+ model_type: Reward_Modeling
989
+ gpus_per_node: 1
990
+ num_nodes: 1
991
+ vocab_size: 152064
992
+ qos: m2
993
+ time: 08:00:00
994
+ partition: a40
995
+ vllm_args:
996
+ --max-model-len: 4096
997
+ --max-num-seqs: 256
998
+ --compilation-config: 3
999
+ QwQ-32B-Preview:
1000
+ model_family: QwQ
1001
+ model_variant: 32B-Preview
1002
+ model_type: LLM
1003
+ gpus_per_node: 2
1004
+ num_nodes: 1
1005
+ vocab_size: 152064
1006
+ qos: m2
1007
+ time: 08:00:00
1008
+ partition: a40
1009
+ vllm_args:
1010
+ --tensor-parallel-size: 2
1011
+ --max-model-len: 32768
1012
+ --max-num-seqs: 256
1013
+ --compilation-config: 3
1014
+ Pixtral-12B-2409:
1015
+ model_family: Pixtral
1016
+ model_variant: 12B-2409
1017
+ model_type: VLM
1018
+ gpus_per_node: 1
1019
+ num_nodes: 1
1020
+ vocab_size: 131072
1021
+ qos: m2
1022
+ time: 08:00:00
1023
+ partition: a40
1024
+ vllm_args:
1025
+ --max-model-len: 8192
1026
+ --max-num-seqs: 256
1027
+ --compilation-config: 3
1028
+ e5-mistral-7b-instruct:
1029
+ model_family: e5
1030
+ model_variant: mistral-7b-instruct
1031
+ model_type: Text_Embedding
1032
+ gpus_per_node: 1
1033
+ num_nodes: 1
1034
+ vocab_size: 32000
1035
+ qos: m2
1036
+ time: 08:00:00
1037
+ partition: a40
1038
+ vllm_args:
1039
+ --max-model-len: 4096
1040
+ --max-num-seqs: 256
1041
+ --compilation-config: 3
1042
+ bge-base-en-v1.5:
1043
+ model_family: bge
1044
+ model_variant: base-en-v1.5
1045
+ model_type: Text_Embedding
1046
+ gpus_per_node: 1
1047
+ num_nodes: 1
1048
+ vocab_size: 30522
1049
+ qos: m2
1050
+ time: 08:00:00
1051
+ partition: a40
1052
+ vllm_args:
1053
+ --max-model-len: 512
1054
+ --max-num-seqs: 256
1055
+ --compilation-config: 3
1056
+ all-MiniLM-L6-v2:
1057
+ model_family: all-MiniLM
1058
+ model_variant: L6-v2
1059
+ model_type: Text_Embedding
1060
+ gpus_per_node: 1
1061
+ num_nodes: 1
1062
+ vocab_size: 30522
1063
+ qos: m2
1064
+ time: 08:00:00
1065
+ partition: a40
1066
+ vllm_args:
1067
+ --max-model-len: 512
1068
+ --max-num-seqs: 256
1069
+ --compilation-config: 3
1070
+ Llama-3.3-70B-Instruct:
1071
+ model_family: Llama-3.3
1072
+ model_variant: 70B-Instruct
1073
+ model_type: LLM
1074
+ gpus_per_node: 4
1075
+ num_nodes: 1
1076
+ vocab_size: 128256
1077
+ qos: m2
1078
+ time: 08:00:00
1079
+ partition: a40
1080
+ vllm_args:
1081
+ --tensor-parallel-size: 4
1082
+ --max-model-len: 65536
1083
+ --max-num-seqs: 256
1084
+ --compilation-config: 3
1085
+ InternVL2_5-26B:
1086
+ model_family: InternVL2_5
1087
+ model_variant: 26B
1088
+ model_type: VLM
1089
+ gpus_per_node: 2
1090
+ num_nodes: 1
1091
+ vocab_size: 92553
1092
+ qos: m2
1093
+ time: 08:00:00
1094
+ partition: a40
1095
+ vllm_args:
1096
+ --tensor-parallel-size: 2
1097
+ --max-model-len: 32768
1098
+ --max-num-seqs: 256
1099
+ --compilation-config: 3
1100
+ InternVL2_5-38B:
1101
+ model_family: InternVL2_5
1102
+ model_variant: 38B
1103
+ model_type: VLM
1104
+ gpus_per_node: 4
1105
+ num_nodes: 1
1106
+ vocab_size: 92553
1107
+ qos: m2
1108
+ time: 08:00:00
1109
+ partition: a40
1110
+ vllm_args:
1111
+ --tensor-parallel-size: 4
1112
+ --max-model-len: 32768
1113
+ --max-num-seqs: 256
1114
+ --compilation-config: 3
1115
+ Aya-Expanse-32B:
1116
+ model_family: Aya-Expanse
1117
+ model_variant: 32B
1118
+ model_type: LLM
1119
+ gpus_per_node: 2
1120
+ num_nodes: 1
1121
+ vocab_size: 256000
1122
+ qos: m2
1123
+ time: 08:00:00
1124
+ partition: a40
1125
+ vllm_args:
1126
+ --tensor-parallel-size: 2
1127
+ --max-model-len: 8192
1128
+ --max-num-seqs: 256
1129
+ --compilation-config: 3
1130
+ DeepSeek-R1-Distill-Llama-70B:
1131
+ model_family: DeepSeek-R1
1132
+ model_variant: Distill-Llama-70B
1133
+ model_type: LLM
1134
+ gpus_per_node: 4
1135
+ num_nodes: 1
1136
+ vocab_size: 128256
1137
+ qos: m2
1138
+ time: 08:00:00
1139
+ partition: a40
1140
+ vllm_args:
1141
+ --tensor-parallel-size: 4
1142
+ --max-model-len: 65536
1143
+ --max-num-seqs: 256
1144
+ --compilation-config: 3
1145
+ DeepSeek-R1-Distill-Llama-8B:
1146
+ model_family: DeepSeek-R1
1147
+ model_variant: Distill-Llama-8B
1148
+ model_type: LLM
1149
+ gpus_per_node: 1
1150
+ num_nodes: 1
1151
+ vocab_size: 128256
1152
+ qos: m2
1153
+ time: 08:00:00
1154
+ partition: a40
1155
+ vllm_args:
1156
+ --max-model-len: 131072
1157
+ --max-num-seqs: 256
1158
+ --compilation-config: 3
1159
+ DeepSeek-R1-Distill-Qwen-32B:
1160
+ model_family: DeepSeek-R1
1161
+ model_variant: Distill-Qwen-32B
1162
+ model_type: LLM
1163
+ gpus_per_node: 2
1164
+ num_nodes: 1
1165
+ vocab_size: 152064
1166
+ qos: m2
1167
+ time: 08:00:00
1168
+ partition: a40
1169
+ vllm_args:
1170
+ --tensor-parallel-size: 2
1171
+ --max-model-len: 65536
1172
+ --max-num-seqs: 256
1173
+ --compilation-config: 3
1174
+ DeepSeek-R1-Distill-Qwen-14B:
1175
+ model_family: DeepSeek-R1
1176
+ model_variant: Distill-Qwen-14B
1177
+ model_type: LLM
1178
+ gpus_per_node: 1
1179
+ num_nodes: 1
1180
+ vocab_size: 152064
1181
+ qos: m2
1182
+ time: 08:00:00
1183
+ partition: a40
1184
+ vllm_args:
1185
+ --max-model-len: 65536
1186
+ --max-num-seqs: 256
1187
+ --compilation-config: 3
1188
+ DeepSeek-R1-Distill-Qwen-7B:
1189
+ model_family: DeepSeek-R1
1190
+ model_variant: Distill-Qwen-7B
1191
+ model_type: LLM
1192
+ gpus_per_node: 1
1193
+ num_nodes: 1
1194
+ vocab_size: 152064
1195
+ qos: m2
1196
+ time: 08:00:00
1197
+ partition: a40
1198
+ vllm_args:
1199
+ --max-model-len: 131072
1200
+ --max-num-seqs: 256
1201
+ --compilation-config: 3
1202
+ DeepSeek-R1-Distill-Qwen-1.5B:
1203
+ model_family: DeepSeek-R1
1204
+ model_variant: Distill-Qwen-1.5B
1205
+ model_type: LLM
1206
+ gpus_per_node: 1
1207
+ num_nodes: 1
1208
+ vocab_size: 152064
1209
+ qos: m2
1210
+ time: 08:00:00
1211
+ partition: a40
1212
+ vllm_args:
1213
+ --max-model-len: 131072
1214
+ --max-num-seqs: 256
1215
+ --compilation-config: 3
1216
+ Phi-3.5-vision-instruct:
1217
+ model_family: Phi-3.5-vision
1218
+ model_variant: instruct
1219
+ model_type: VLM
1220
+ gpus_per_node: 2
1221
+ num_nodes: 1
1222
+ vocab_size: 32064
1223
+ qos: m2
1224
+ time: 08:00:00
1225
+ partition: a40
1226
+ vllm_args:
1227
+ --tensor-parallel-size: 2
1228
+ --max-model-len: 65536
1229
+ --max-num-seqs: 256
1230
+ --compilation-config: 3
1231
+ InternVL2_5-8B:
1232
+ model_family: InternVL2_5
1233
+ model_variant: 8B
1234
+ model_type: VLM
1235
+ gpus_per_node: 1
1236
+ num_nodes: 1
1237
+ vocab_size: 92553
1238
+ qos: m2
1239
+ time: 08:00:00
1240
+ partition: a40
1241
+ vllm_args:
1242
+ --max-model-len: 32768
1243
+ --max-num-seqs: 256
1244
+ --compilation-config: 3
1245
+ glm-4v-9b:
1246
+ model_family: glm-4v
1247
+ model_variant: 9b
1248
+ model_type: VLM
1249
+ gpus_per_node: 1
1250
+ num_nodes: 1
1251
+ vocab_size: 151552
1252
+ qos: m2
1253
+ time: 08:00:00
1254
+ partition: a40
1255
+ vllm_args:
1256
+ --max-model-len: 8192
1257
+ --max-num-seqs: 256
1258
+ --compilation-config: 3
1259
+ Molmo-7B-D-0924:
1260
+ model_family: Molmo
1261
+ model_variant: 7B-D-0924
1262
+ model_type: VLM
1263
+ gpus_per_node: 1
1264
+ num_nodes: 1
1265
+ vocab_size: 152064
1266
+ qos: m2
1267
+ time: 08:00:00
1268
+ partition: a40
1269
+ vllm_args:
1270
+ --max-model-len: 4096
1271
+ --max-num-seqs: 256
1272
+ --compilation-config: 3
1273
+ deepseek-vl2:
1274
+ model_family: deepseek-vl2
1275
+ model_type: VLM
1276
+ gpus_per_node: 2
1277
+ num_nodes: 1
1278
+ vocab_size: 129280
1279
+ qos: m2
1280
+ time: 08:00:00
1281
+ partition: a40
1282
+ vllm_args:
1283
+ --tensor-parallel-size: 2
1284
+ --max-model-len: 4096
1285
+ --max-num-seqs: 256
1286
+ --compilation-config: 3
1287
+ deepseek-vl2-small:
1288
+ model_family: deepseek-vl2
1289
+ model_variant: small
1290
+ model_type: VLM
1291
+ gpus_per_node: 1
1292
+ num_nodes: 1
1293
+ vocab_size: 129280
1294
+ qos: m2
1295
+ time: 08:00:00
1296
+ partition: a40
1297
+ vllm_args:
1298
+ --max-model-len: 4096
1299
+ --max-num-seqs: 256
1300
+ --compilation-config: 3