PyPI - vec-inf - Versions diffs - 0.4.1__py3-none-any.whl → 0.6.0__py3-none-any.whl - Mend

vec-inf 0.4.1py3-none-any.whl → 0.6.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (28) hide show

vec_inf/README.md +3 -3
vec_inf/cli/_cli.py +227 -325
vec_inf/cli/_helper.py +400 -0
vec_inf/cli/_utils.py +26 -135
vec_inf/cli/_vars.py +32 -0
vec_inf/client/__init__.py +31 -0
vec_inf/client/_client_vars.py +213 -0
vec_inf/client/_exceptions.py +37 -0
vec_inf/client/_helper.py +674 -0
vec_inf/client/_slurm_script_generator.py +179 -0
vec_inf/client/_utils.py +287 -0
vec_inf/client/api.py +302 -0
vec_inf/client/config.py +128 -0
vec_inf/client/models.py +225 -0
vec_inf/client/slurm_vars.py +49 -0
vec_inf/{models → config}/README.md +30 -12
vec_inf/config/models.yaml +1300 -0
vec_inf-0.6.0.dist-info/METADATA +193 -0
vec_inf-0.6.0.dist-info/RECORD +25 -0
vec_inf/launch_server.sh +0 -145
vec_inf/models/models.csv +0 -85
vec_inf/multinode_vllm.slurm +0 -124
vec_inf/vllm.slurm +0 -59
vec_inf-0.4.1.dist-info/METADATA +0 -121
vec_inf-0.4.1.dist-info/RECORD +0 -16
{vec_inf-0.4.1.dist-info → vec_inf-0.6.0.dist-info}/WHEEL +0 -0
{vec_inf-0.4.1.dist-info → vec_inf-0.6.0.dist-info}/entry_points.txt +0 -0
{vec_inf-0.4.1.dist-info → vec_inf-0.6.0.dist-info}/licenses/LICENSE +0 -0

vec_inf/config/models.yaml ADDED Viewed

@@ -0,0 +1,1300 @@
+models:
+  c4ai-command-r-plus:
+    model_family: c4ai-command-r
+    model_variant: plus
+    model_type: LLM
+    gpus_per_node: 4
+    num_nodes: 2
+    vocab_size: 256000
+    qos: m2
+    time: 08:00:00
+    partition: a40
+    vllm_args:
+      --pipeline-parallel-size: 2
+      --tensor-parallel-size: 4
+      --max-model-len: 8192
+      --max-num-seqs: 256
+      --compilation-config: 3
+  c4ai-command-r-plus-08-2024:
+    model_family: c4ai-command-r
+    model_variant: plus-08-2024
+    model_type: LLM
+    gpus_per_node: 4
+    num_nodes: 2
+    vocab_size: 256000
+    qos: m2
+    time: 08:00:00
+    partition: a40
+    vllm_args:
+      --pipeline-parallel-size: 2
+      --tensor-parallel-size: 4
+      --max-model-len: 65536
+      --max-num-seqs: 256
+      --compilation-config: 3
+  c4ai-command-r-08-2024:
+    model_family: c4ai-command-r
+    model_variant: 08-2024
+    model_type: LLM
+    gpus_per_node: 2
+    num_nodes: 1
+    vocab_size: 256000
+    qos: m2
+    time: 08:00:00
+    partition: a40
+    vllm_args:
+      --tensor-parallel-size: 2
+      --max-model-len: 32768
+      --max-num-seqs: 256
+      --compilation-config: 3
+  CodeLlama-7b-hf:
+    model_family: CodeLlama
+    model_variant: 7b-hf
+    model_type: LLM
+    gpus_per_node: 1
+    num_nodes: 1
+    vocab_size: 32000
+    qos: m2
+    time: 08:00:00
+    partition: a40
+    vllm_args:
+      --max-model-len: 16384
+      --max-num-seqs: 256
+      --compilation-config: 3
+  CodeLlama-7b-Instruct-hf:
+    model_family: CodeLlama
+    model_variant: 7b-Instruct-hf
+    model_type: LLM
+    gpus_per_node: 1
+    num_nodes: 1
+    vocab_size: 32000
+    qos: m2
+    time: 08:00:00
+    partition: a40
+    vllm_args:
+      --max-model-len: 16384
+      --max-num-seqs: 256
+      --compilation-config: 3
+  CodeLlama-13b-hf:
+    model_family: CodeLlama
+    model_variant: 13b-hf
+    model_type: LLM
+    gpus_per_node: 1
+    num_nodes: 1
+    vocab_size: 32000
+    qos: m2
+    time: 08:00:00
+    partition: a40
+    vllm_args:
+      --max-model-len: 16384
+      --max-num-seqs: 256
+      --compilation-config: 3
+  CodeLlama-13b-Instruct-hf:
+    model_family: CodeLlama
+    model_variant: 13b-Instruct-hf
+    model_type: LLM
+    gpus_per_node: 1
+    num_nodes: 1
+    vocab_size: 32000
+    qos: m2
+    time: 08:00:00
+    partition: a40
+    vllm_args:
+      --max-model-len: 16384
+      --max-num-seqs: 256
+      --compilation-config: 3
+  CodeLlama-34b-hf:
+    model_family: CodeLlama
+    model_variant: 34b-hf
+    model_type: LLM
+    gpus_per_node: 2
+    num_nodes: 1
+    vocab_size: 32000
+    qos: m2
+    time: 08:00:00
+    partition: a40
+    vllm_args:
+      --tensor-parallel-size: 2
+      --max-model-len: 16384
+      --max-num-seqs: 256
+      --compilation-config: 3
+  CodeLlama-34b-Instruct-hf:
+    model_family: CodeLlama
+    model_variant: 34b-Instruct-hf
+    model_type: LLM
+    gpus_per_node: 2
+    num_nodes: 1
+    vocab_size: 32000
+    qos: m2
+    time: 08:00:00
+    partition: a40
+    vllm_args:
+      --tensor-parallel-size: 2
+      --max-model-len: 16384
+      --max-num-seqs: 256
+      --compilation-config: 3
+  CodeLlama-70b-hf:
+    model_family: CodeLlama
+    model_variant: 70b-hf
+    model_type: LLM
+    gpus_per_node: 4
+    num_nodes: 1
+    vocab_size: 32016
+    qos: m2
+    time: 08:00:00
+    partition: a40
+    vllm_args:
+      --tensor-parallel-size: 4
+      --max-model-len: 4096
+      --max-num-seqs: 256
+      --compilation-config: 3
+  CodeLlama-70b-Instruct-hf:
+    model_family: CodeLlama
+    model_variant: 70b-Instruct-hf
+    model_type: LLM
+    gpus_per_node: 4
+    num_nodes: 1
+    vocab_size: 32016
+    qos: m2
+    time: 08:00:00
+    partition: a40
+    vllm_args:
+      --tensor-parallel-size: 4
+      --max-model-len: 4096
+      --max-num-seqs: 256
+      --compilation-config: 3
+  gemma-2-9b:
+    model_family: gemma-2
+    model_variant: 9b
+    model_type: LLM
+    gpus_per_node: 1
+    num_nodes: 1
+    vocab_size: 256000
+    qos: m2
+    time: 08:00:00
+    partition: a40
+    vllm_args:
+      --max-model-len: 4096
+      --max-num-seqs: 256
+      --compilation-config: 3
+  gemma-2-9b-it:
+    model_family: gemma-2
+    model_variant: 9b-it
+    model_type: LLM
+    gpus_per_node: 1
+    num_nodes: 1
+    vocab_size: 256000
+    qos: m2
+    time: 08:00:00
+    partition: a40
+    vllm_args:
+      --max-model-len: 4096
+      --max-num-seqs: 256
+      --compilation-config: 3
+  gemma-2-27b:
+    model_family: gemma-2
+    model_variant: 27b
+    model_type: LLM
+    gpus_per_node: 2
+    num_nodes: 1
+    vocab_size: 256000
+    qos: m2
+    time: 08:00:00
+    partition: a40
+    vllm_args:
+      --tensor-parallel-size: 2
+      --max-model-len: 4096
+      --max-num-seqs: 256
+      --compilation-config: 3
+  gemma-2-27b-it:
+    model_family: gemma-2
+    model_variant: 27b-it
+    model_type: LLM
+    gpus_per_node: 2
+    num_nodes: 1
+    vocab_size: 256000
+    qos: m2
+    time: 08:00:00
+    partition: a40
+    vllm_args:
+      --tensor-parallel-size: 2
+      --max-model-len: 4096
+      --max-num-seqs: 256
+      --compilation-config: 3
+  Llama-2-7b-hf:
+    model_family: Llama-2
+    model_variant: 7b-hf
+    model_type: LLM
+    gpus_per_node: 1
+    num_nodes: 1
+    vocab_size: 32000
+    qos: m2
+    time: 08:00:00
+    partition: a40
+    vllm_args:
+      --max-model-len: 4096
+      --max-num-seqs: 256
+      --compilation-config: 3
+  Llama-2-7b-chat-hf:
+    model_family: Llama-2
+    model_variant: 7b-chat-hf
+    model_type: LLM
+    gpus_per_node: 1
+    num_nodes: 1
+    vocab_size: 32000
+    qos: m2
+    time: 08:00:00
+    partition: a40
+    vllm_args:
+      --max-model-len: 4096
+      --max-num-seqs: 256
+      --compilation-config: 3
+  Llama-2-13b-hf:
+    model_family: Llama-2
+    model_variant: 13b-hf
+    model_type: LLM
+    gpus_per_node: 1
+    num_nodes: 1
+    vocab_size: 32000
+    qos: m2
+    time: 08:00:00
+    partition: a40
+    vllm_args:
+      --max-model-len: 4096
+      --max-num-seqs: 256
+      --compilation-config: 3
+  Llama-2-13b-chat-hf:
+    model_family: Llama-2
+    model_variant: 13b-chat-hf
+    model_type: LLM
+    gpus_per_node: 1
+    num_nodes: 1
+    vocab_size: 32000
+    qos: m2
+    time: 08:00:00
+    partition: a40
+    vllm_args:
+      --max-model-len: 4096
+      --max-num-seqs: 256
+      --compilation-config: 3
+  Llama-2-70b-hf:
+    model_family: Llama-2
+    model_variant: 70b-hf
+    model_type: LLM
+    gpus_per_node: 4
+    num_nodes: 1
+    vocab_size: 32000
+    qos: m2
+    time: 08:00:00
+    partition: a40
+    vllm_args:
+      --tensor-parallel-size: 4
+      --max-model-len: 4096
+      --max-num-seqs: 256
+      --compilation-config: 3
+  Llama-2-70b-chat-hf:
+    model_family: Llama-2
+    model_variant: 70b-chat-hf
+    model_type: LLM
+    gpus_per_node: 4
+    num_nodes: 1
+    vocab_size: 32000
+    qos: m2
+    time: 08:00:00
+    partition: a40
+    vllm_args:
+      --tensor-parallel-size: 4
+      --max-model-len: 4096
+      --max-num-seqs: 256
+      --compilation-config: 3
+  llava-1.5-7b-hf:
+    model_family: llava-1.5
+    model_variant: 7b-hf
+    model_type: VLM
+    gpus_per_node: 1
+    num_nodes: 1
+    vocab_size: 32000
+    qos: m2
+    time: 08:00:00
+    partition: a40
+    vllm_args:
+      --max-model-len: 4096
+      --max-num-seqs: 256
+      --compilation-config: 3
+  llava-1.5-13b-hf:
+    model_family: llava-1.5
+    model_variant: 13b-hf
+    model_type: VLM
+    gpus_per_node: 1
+    num_nodes: 1
+    vocab_size: 32000
+    qos: m2
+    time: 08:00:00
+    partition: a40
+    vllm_args:
+      --max-model-len: 4096
+      --max-num-seqs: 256
+      --compilation-config: 3
+  llava-v1.6-mistral-7b-hf:
+    model_family: llava-v1.6
+    model_variant: mistral-7b-hf
+    model_type: VLM
+    gpus_per_node: 1
+    num_nodes: 1
+    vocab_size: 32064
+    qos: m2
+    time: 08:00:00
+    partition: a40
+    vllm_args:
+      --max-model-len: 32768
+      --max-num-seqs: 256
+      --compilation-config: 3
+  llava-v1.6-34b-hf:
+    model_family: llava-v1.6
+    model_variant: 34b-hf
+    model_type: VLM
+    gpus_per_node: 2
+    num_nodes: 1
+    vocab_size: 64064
+    qos: m2
+    time: 08:00:00
+    partition: a40
+    vllm_args:
+      --tensor-parallel-size: 2
+      --max-model-len: 4096
+      --max-num-seqs: 256
+      --compilation-config: 3
+  Meta-Llama-3-8B:
+    model_family: Meta-Llama-3
+    model_variant: 8B
+    model_type: LLM
+    gpus_per_node: 1
+    num_nodes: 1
+    vocab_size: 128256
+    qos: m2
+    time: 08:00:00
+    partition: a40
+    vllm_args:
+      --max-model-len: 8192
+      --max-num-seqs: 256
+      --compilation-config: 3
+  Meta-Llama-3-8B-Instruct:
+    model_family: Meta-Llama-3
+    model_variant: 8B-Instruct
+    model_type: LLM
+    gpus_per_node: 1
+    num_nodes: 1
+    vocab_size: 128256
+    qos: m2
+    time: 08:00:00
+    partition: a40
+    vllm_args:
+      --max-model-len: 8192
+      --max-num-seqs: 256
+      --compilation-config: 3
+  Meta-Llama-3-70B:
+    model_family: Meta-Llama-3
+    model_variant: 70B
+    model_type: LLM
+    gpus_per_node: 4
+    num_nodes: 1
+    vocab_size: 128256
+    qos: m2
+    time: 08:00:00
+    partition: a40
+    vllm_args:
+      --tensor-parallel-size: 4
+      --max-model-len: 8192
+      --max-num-seqs: 256
+      --compilation-config: 3
+  Meta-Llama-3-70B-Instruct:
+    model_family: Meta-Llama-3
+    model_variant: 70B-Instruct
+    model_type: LLM
+    gpus_per_node: 4
+    num_nodes: 1
+    vocab_size: 128256
+    qos: m2
+    time: 08:00:00
+    partition: a40
+    vllm_args:
+      --tensor-parallel-size: 4
+      --max-model-len: 8192
+      --max-num-seqs: 256
+      --compilation-config: 3
+  Meta-Llama-3.1-8B:
+    model_family: Meta-Llama-3.1
+    model_variant: 8B
+    model_type: LLM
+    gpus_per_node: 1
+    num_nodes: 1
+    vocab_size: 128256
+    qos: m2
+    time: 08:00:00
+    partition: a40
+    vllm_args:
+      --max-model-len: 131072
+      --max-num-seqs: 256
+      --compilation-config: 3
+  Meta-Llama-3.1-8B-Instruct:
+    model_family: Meta-Llama-3.1
+    model_variant: 8B-Instruct
+    model_type: LLM
+    gpus_per_node: 1
+    num_nodes: 1
+    vocab_size: 128256
+    qos: m2
+    time: 08:00:00
+    partition: a40
+    vllm_args:
+      --max-model-len: 131072
+      --max-num-seqs: 256
+      --compilation-config: 3
+  Meta-Llama-3.1-70B:
+    model_family: Meta-Llama-3.1
+    model_variant: 70B
+    model_type: LLM
+    gpus_per_node: 4
+    num_nodes: 1
+    vocab_size: 128256
+    qos: m2
+    time: 08:00:00
+    partition: a40
+    vllm_args:
+      --tensor-parallel-size: 4
+      --max-model-len: 65536
+      --max-num-seqs: 256
+      --compilation-config: 3
+  Meta-Llama-3.1-70B-Instruct:
+    model_family: Meta-Llama-3.1
+    model_variant: 70B-Instruct
+    model_type: LLM
+    gpus_per_node: 4
+    num_nodes: 1
+    vocab_size: 128256
+    qos: m2
+    time: 08:00:00
+    partition: a40
+    vllm_args:
+      --tensor-parallel-size: 4
+      --max-model-len: 65536
+      --max-num-seqs: 256
+      --compilation-config: 3
+  Meta-Llama-3.1-405B-Instruct:
+    model_family: Meta-Llama-3.1
+    model_variant: 405B-Instruct
+    model_type: LLM
+    gpus_per_node: 4
+    num_nodes: 8
+    vocab_size: 128256
+    qos: m4
+    time: 02:00:00
+    partition: a40
+    vllm_args:
+      --pipeline-parallel-size: 8
+      --tensor-parallel-size: 4
+      --max-model-len: 16384
+      --max-num-seqs: 256
+      --compilation-config: 3
+  Mistral-7B-Instruct-v0.1:
+    model_family: Mistral
+    model_variant: 7B-Instruct-v0.1
+    model_type: LLM
+    gpus_per_node: 1
+    num_nodes: 1
+    vocab_size: 32000
+    qos: m2
+    time: 08:00:00
+    partition: a40
+    vllm_args:
+      --max-model-len: 32768
+      --max-num-seqs: 256
+      --compilation-config: 3
+  Mistral-7B-Instruct-v0.2:
+    model_family: Mistral
+    model_variant: 7B-Instruct-v0.2
+    model_type: LLM
+    gpus_per_node: 1
+    num_nodes: 1
+    vocab_size: 32000
+    qos: m2
+    time: 08:00:00
+    partition: a40
+    vllm_args:
+      --max-model-len: 32768
+      --max-num-seqs: 256
+      --compilation-config: 3
+  Mistral-7B-v0.3:
+    model_family: Mistral
+    model_variant: 7B-v0.3
+    model_type: LLM
+    gpus_per_node: 1
+    num_nodes: 1
+    vocab_size: 32768
+    qos: m2
+    time: 08:00:00
+    partition: a40
+    vllm_args:
+      --max-model-len: 32768
+      --max-num-seqs: 256
+      --compilation-config: 3
+  Mistral-7B-Instruct-v0.3:
+    model_family: Mistral
+    model_variant: 7B-Instruct-v0.3
+    model_type: LLM
+    gpus_per_node: 1
+    num_nodes: 1
+    vocab_size: 32768
+    qos: m2
+    time: 08:00:00
+    partition: a40
+    vllm_args:
+      --max-model-len: 32768
+      --max-num-seqs: 256
+      --compilation-config: 3
+  Mistral-Large-Instruct-2407:
+    model_family: Mistral
+    model_variant: Large-Instruct-2407
+    model_type: LLM
+    gpus_per_node: 4
+    num_nodes: 2
+    vocab_size: 32768
+    qos: m2
+    time: 08:00:00
+    partition: a40
+    vllm_args:
+      --pipeline-parallel-size: 2
+      --tensor-parallel-size: 4
+      --max-model-len: 32768
+      --max-num-seqs: 256
+      --compilation-config: 3
+  Mistral-Large-Instruct-2411:
+    model_family: Mistral
+    model_variant: Large-Instruct-2411
+    model_type: LLM
+    gpus_per_node: 4
+    num_nodes: 2
+    vocab_size: 32768
+    qos: m2
+    time: 08:00:00
+    partition: a40
+    vllm_args:
+      --pipeline-parallel-size: 2
+      --tensor-parallel-size: 4
+      --max-model-len: 32768
+      --max-num-seqs: 256
+      --compilation-config: 3
+  Mixtral-8x7B-Instruct-v0.1:
+    model_family: Mixtral
+    model_variant: 8x7B-Instruct-v0.1
+    model_type: LLM
+    gpus_per_node: 4
+    num_nodes: 1
+    vocab_size: 32000
+    qos: m2
+    time: 08:00:00
+    partition: a40
+    vllm_args:
+      --tensor-parallel-size: 4
+      --max-model-len: 32768
+      --max-num-seqs: 256
+      --compilation-config: 3
+  Mixtral-8x22B-v0.1:
+    model_family: Mixtral
+    model_variant: 8x22B-v0.1
+    model_type: LLM
+    gpus_per_node: 4
+    num_nodes: 2
+    vocab_size: 32768
+    qos: m2
+    time: 08:00:00
+    partition: a40
+    vllm_args:
+      --pipeline-parallel-size: 2
+      --tensor-parallel-size: 4
+      --max-model-len: 65536
+      --max-num-seqs: 256
+      --compilation-config: 3
+  Mixtral-8x22B-Instruct-v0.1:
+    model_family: Mixtral
+    model_variant: 8x22B-Instruct-v0.1
+    model_type: LLM
+    gpus_per_node: 4
+    num_nodes: 2
+    vocab_size: 32768
+    qos: m2
+    time: 08:00:00
+    partition: a40
+    vllm_args:
+      --pipeline-parallel-size: 2
+      --tensor-parallel-size: 4
+      --max-model-len: 65536
+      --max-num-seqs: 256
+      --compilation-config: 3
+  Phi-3-medium-128k-instruct:
+    model_family: Phi-3
+    model_variant: medium-128k-instruct
+    model_type: LLM
+    gpus_per_node: 2
+    num_nodes: 1
+    vocab_size: 32064
+    qos: m2
+    time: 08:00:00
+    partition: a40
+    vllm_args:
+      --tensor-parallel-size: 2
+      --max-model-len: 131072
+      --max-num-seqs: 256
+      --compilation-config: 3
+  Phi-3-vision-128k-instruct:
+    model_family: Phi-3-vision
+    model_variant: 128k-instruct
+    model_type: VLM
+    gpus_per_node: 2
+    num_nodes: 1
+    vocab_size: 32064
+    qos: m2
+    time: 08:00:00
+    partition: a40
+    vllm_args:
+      --tensor-parallel-size: 2
+      --max-model-len: 65536
+      --max-num-seqs: 256
+      --compilation-config: 3
+  Llama3-OpenBioLLM-70B:
+    model_family: Llama3-OpenBioLLM
+    model_variant: 70B
+    model_type: LLM
+    gpus_per_node: 4
+    num_nodes: 1
+    vocab_size: 128256
+    qos: m2
+    time: 08:00:00
+    partition: a40
+    vllm_args:
+      --tensor-parallel-size: 4
+      --max-model-len: 8192
+      --max-num-seqs: 256
+      --compilation-config: 3
+  Llama-3.1-Nemotron-70B-Instruct-HF:
+    model_family: Llama-3.1-Nemotron
+    model_variant: 70B-Instruct-HF
+    model_type: LLM
+    gpus_per_node: 4
+    num_nodes: 1
+    vocab_size: 128256
+    qos: m2
+    time: 08:00:00
+    partition: a40
+    vllm_args:
+      --tensor-parallel-size: 4
+      --max-model-len: 65536
+      --max-num-seqs: 256
+      --compilation-config: 3
+  Llama-3.2-1B:
+    model_family: Llama-3.2
+    model_variant: 1B
+    model_type: LLM
+    gpus_per_node: 1
+    num_nodes: 1
+    vocab_size: 128256
+    qos: m2
+    time: 08:00:00
+    partition: a40
+    vllm_args:
+      --max-model-len: 131072
+      --max-num-seqs: 256
+      --compilation-config: 3
+  Llama-3.2-1B-Instruct:
+    model_family: Llama-3.2
+    model_variant: 1B-Instruct
+    model_type: LLM
+    gpus_per_node: 1
+    num_nodes: 1
+    vocab_size: 128256
+    qos: m2
+    time: 08:00:00
+    partition: a40
+    vllm_args:
+      --max-model-len: 131072
+      --max-num-seqs: 256
+      --compilation-config: 3
+  Llama-3.2-3B:
+    model_family: Llama-3.2
+    model_variant: 3B
+    model_type: LLM
+    gpus_per_node: 1
+    num_nodes: 1
+    vocab_size: 128256
+    qos: m2
+    time: 08:00:00
+    partition: a40
+    vllm_args:
+      --max-model-len: 131072
+      --max-num-seqs: 256
+      --compilation-config: 3
+  Llama-3.2-3B-Instruct:
+    model_family: Llama-3.2
+    model_variant: 3B-Instruct
+    model_type: LLM
+    gpus_per_node: 1
+    num_nodes: 1
+    vocab_size: 128256
+    qos: m2
+    time: 08:00:00
+    partition: a40
+    vllm_args:
+      --max-model-len: 131072
+      --max-num-seqs: 256
+      --compilation-config: 3
+  Llama-3.2-11B-Vision:
+    model_family: Llama-3.2
+    model_variant: 11B-Vision
+    model_type: VLM
+    gpus_per_node: 2
+    num_nodes: 1
+    vocab_size: 128256
+    qos: m2
+    time: 08:00:00
+    partition: a40
+    vllm_args:
+      --tensor-parallel-size: 2
+      --max-model-len: 4096
+      --max-num-seqs: 64
+      --compilation-config: 3
+      --enforce-eager: true
+  Llama-3.2-11B-Vision-Instruct:
+    model_family: Llama-3.2
+    model_variant: 11B-Vision-Instruct
+    model_type: VLM
+    gpus_per_node: 2
+    num_nodes: 1
+    vocab_size: 128256
+    qos: m2
+    time: 08:00:00
+    partition: a40
+    vllm_args:
+      --tensor-parallel-size: 2
+      --max-model-len: 4096
+      --max-num-seqs: 64
+      --compilation-config: 3
+      --enforce-eager: true
+  Llama-3.2-90B-Vision:
+    model_family: Llama-3.2
+    model_variant: 90B-Vision
+    model_type: VLM
+    gpus_per_node: 4
+    num_nodes: 2
+    vocab_size: 128256
+    qos: m2
+    time: 08:00:00
+    partition: a40
+    vllm_args:
+      --tensor-parallel-size: 8
+      --max-model-len: 4096
+      --max-num-seqs: 32
+      --compilation-config: 3
+      --enforce-eager: true
+  Llama-3.2-90B-Vision-Instruct:
+    model_family: Llama-3.2
+    model_variant: 90B-Vision-Instruct
+    model_type: VLM
+    gpus_per_node: 4
+    num_nodes: 2
+    vocab_size: 128256
+    qos: m2
+    time: 08:00:00
+    partition: a40
+    vllm_args:
+      --tensor-parallel-size: 8
+      --max-model-len: 4096
+      --max-num-seqs: 32
+      --compilation-config: 3
+      --enforce-eager: true
+  Qwen2.5-0.5B-Instruct:
+    model_family: Qwen2.5
+    model_variant: 0.5B-Instruct
+    model_type: LLM
+    gpus_per_node: 1
+    num_nodes: 1
+    vocab_size: 152064
+    qos: m2
+    time: 08:00:00
+    partition: a40
+    vllm_args:
+      --max-model-len: 32768
+      --max-num-seqs: 256
+      --compilation-config: 3
+  Qwen2.5-1.5B-Instruct:
+    model_family: Qwen2.5
+    model_variant: 1.5B-Instruct
+    model_type: LLM
+    gpus_per_node: 1
+    num_nodes: 1
+    vocab_size: 152064
+    qos: m2
+    time: 08:00:00
+    partition: a40
+    vllm_args:
+      --max-model-len: 32768
+      --max-num-seqs: 256
+      --compilation-config: 3
+  Qwen2.5-3B-Instruct:
+    model_family: Qwen2.5
+    model_variant: 3B-Instruct
+    model_type: LLM
+    gpus_per_node: 1
+    num_nodes: 1
+    vocab_size: 152064
+    qos: m2
+    time: 08:00:00
+    partition: a40
+    vllm_args:
+      --max-model-len: 32768
+      --max-num-seqs: 256
+      --compilation-config: 3
+  Qwen2.5-7B-Instruct:
+    model_family: Qwen2.5
+    model_variant: 7B-Instruct
+    model_type: LLM
+    gpus_per_node: 1
+    num_nodes: 1
+    vocab_size: 152064
+    qos: m2
+    time: 08:00:00
+    partition: a40
+    vllm_args:
+      --max-model-len: 32768
+      --max-num-seqs: 256
+      --compilation-config: 3
+  Qwen2.5-14B-Instruct:
+    model_family: Qwen2.5
+    model_variant: 14B-Instruct
+    model_type: LLM
+    gpus_per_node: 1
+    num_nodes: 1
+    vocab_size: 152064
+    qos: m2
+    time: 08:00:00
+    partition: a40
+    vllm_args:
+      --max-model-len: 32768
+      --max-num-seqs: 256
+      --compilation-config: 3
+  Qwen2.5-32B-Instruct:
+    model_family: Qwen2.5
+    model_variant: 32B-Instruct
+    model_type: LLM
+    gpus_per_node: 2
+    num_nodes: 1
+    vocab_size: 152064
+    qos: m2
+    time: 08:00:00
+    partition: a40
+    vllm_args:
+      --tensor-parallel-size: 2
+      --max-model-len: 32768
+      --max-num-seqs: 256
+      --compilation-config: 3
+  Qwen2.5-72B-Instruct:
+    model_family: Qwen2.5
+    model_variant: 72B-Instruct
+    model_type: LLM
+    gpus_per_node: 4
+    num_nodes: 1
+    vocab_size: 152064
+    qos: m2
+    time: 08:00:00
+    partition: a40
+    vllm_args:
+      --tensor-parallel-size: 4
+      --max-model-len: 16384
+      --max-num-seqs: 256
+      --compilation-config: 3
+  Qwen2.5-Math-1.5B-Instruct:
+    model_family: Qwen2.5
+    model_variant: Math-1.5B-Instruct
+    model_type: LLM
+    gpus_per_node: 1
+    num_nodes: 1
+    vocab_size: 152064
+    qos: m2
+    time: 08:00:00
+    partition: a40
+    vllm_args:
+      --max-model-len: 4096
+      --max-num-seqs: 256
+      --compilation-config: 3
+  Qwen2.5-Math-7B-Instruct:
+    model_family: Qwen2.5
+    model_variant: Math-7B-Instruct
+    model_type: LLM
+    gpus_per_node: 1
+    num_nodes: 1
+    vocab_size: 152064
+    qos: m2
+    time: 08:00:00
+    partition: a40
+    vllm_args:
+      --max-model-len: 4096
+      --max-num-seqs: 256
+      --compilation-config: 3
+  Qwen2.5-Math-72B-Instruct:
+    model_family: Qwen2.5
+    model_variant: Math-72B-Instruct
+    model_type: LLM
+    gpus_per_node: 4
+    num_nodes: 1
+    vocab_size: 152064
+    qos: m2
+    time: 08:00:00
+    partition: a40
+    vllm_args:
+      --tensor-parallel-size: 4
+      --max-model-len: 4096
+      --max-num-seqs: 256
+      --compilation-config: 3
+  Qwen2.5-Coder-7B-Instruct:
+    model_family: Qwen2.5
+    model_variant: Coder-7B-Instruct
+    model_type: LLM
+    gpus_per_node: 1
+    num_nodes: 1
+    vocab_size: 152064
+    qos: m2
+    time: 08:00:00
+    partition: a40
+    vllm_args:
+      --max-model-len: 32768
+      --max-num-seqs: 256
+      --compilation-config: 3
+  Qwen2.5-Math-RM-72B:
+    model_family: Qwen2.5
+    model_variant: Math-RM-72B
+    model_type: Reward_Modeling
+    gpus_per_node: 4
+    num_nodes: 1
+    vocab_size: 152064
+    qos: m2
+    time: 08:00:00
+    partition: a40
+    vllm_args:
+      --tensor-parallel-size: 4
+      --max-model-len: 4096
+      --max-num-seqs: 256
+      --compilation-config: 3
+  Qwen2.5-Math-PRM-7B:
+    model_family: Qwen2.5
+    model_variant: Math-PRM-7B
+    model_type: Reward_Modeling
+    gpus_per_node: 1
+    num_nodes: 1
+    vocab_size: 152064
+    qos: m2
+    time: 08:00:00
+    partition: a40
+    vllm_args:
+      --max-model-len: 4096
+      --max-num-seqs: 256
+      --compilation-config: 3
+  QwQ-32B-Preview:
+    model_family: QwQ
+    model_variant: 32B-Preview
+    model_type: LLM
+    gpus_per_node: 2
+    num_nodes: 1
+    vocab_size: 152064
+    qos: m2
+    time: 08:00:00
+    partition: a40
+    vllm_args:
+      --tensor-parallel-size: 2
+      --max-model-len: 32768
+      --max-num-seqs: 256
+      --compilation-config: 3
+  Pixtral-12B-2409:
+    model_family: Pixtral
+    model_variant: 12B-2409
+    model_type: VLM
+    gpus_per_node: 1
+    num_nodes: 1
+    vocab_size: 131072
+    qos: m2
+    time: 08:00:00
+    partition: a40
+    vllm_args:
+      --max-model-len: 8192
+      --max-num-seqs: 256
+      --compilation-config: 3
+  e5-mistral-7b-instruct:
+    model_family: e5
+    model_variant: mistral-7b-instruct
+    model_type: Text_Embedding
+    gpus_per_node: 1
+    num_nodes: 1
+    vocab_size: 32000
+    qos: m2
+    time: 08:00:00
+    partition: a40
+    vllm_args:
+      --max-model-len: 4096
+      --max-num-seqs: 256
+      --compilation-config: 3
+  bge-base-en-v1.5:
+    model_family: bge
+    model_variant: base-en-v1.5
+    model_type: Text_Embedding
+    gpus_per_node: 1
+    num_nodes: 1
+    vocab_size: 30522
+    qos: m2
+    time: 08:00:00
+    partition: a40
+    vllm_args:
+      --max-model-len: 512
+      --max-num-seqs: 256
+      --compilation-config: 3
+  all-MiniLM-L6-v2:
+    model_family: all-MiniLM
+    model_variant: L6-v2
+    model_type: Text_Embedding
+    gpus_per_node: 1
+    num_nodes: 1
+    vocab_size: 30522
+    qos: m2
+    time: 08:00:00
+    partition: a40
+    vllm_args:
+      --max-model-len: 512
+      --max-num-seqs: 256
+      --compilation-config: 3
+  Llama-3.3-70B-Instruct:
+    model_family: Llama-3.3
+    model_variant: 70B-Instruct
+    model_type: LLM
+    gpus_per_node: 4
+    num_nodes: 1
+    vocab_size: 128256
+    qos: m2
+    time: 08:00:00
+    partition: a40
+    vllm_args:
+      --tensor-parallel-size: 4
+      --max-model-len: 65536
+      --max-num-seqs: 256
+      --compilation-config: 3
+  InternVL2_5-26B:
+    model_family: InternVL2_5
+    model_variant: 26B
+    model_type: VLM
+    gpus_per_node: 2
+    num_nodes: 1
+    vocab_size: 92553
+    qos: m2
+    time: 08:00:00
+    partition: a40
+    vllm_args:
+      --tensor-parallel-size: 2
+      --max-model-len: 32768
+      --max-num-seqs: 256
+      --compilation-config: 3
+  InternVL2_5-38B:
+    model_family: InternVL2_5
+    model_variant: 38B
+    model_type: VLM
+    gpus_per_node: 4
+    num_nodes: 1
+    vocab_size: 92553
+    qos: m2
+    time: 08:00:00
+    partition: a40
+    vllm_args:
+      --tensor-parallel-size: 4
+      --max-model-len: 32768
+      --max-num-seqs: 256
+      --compilation-config: 3
+  Aya-Expanse-32B:
+    model_family: Aya-Expanse
+    model_variant: 32B
+    model_type: LLM
+    gpus_per_node: 2
+    num_nodes: 1
+    vocab_size: 256000
+    qos: m2
+    time: 08:00:00
+    partition: a40
+    vllm_args:
+      --tensor-parallel-size: 2
+      --max-model-len: 8192
+      --max-num-seqs: 256
+      --compilation-config: 3
+  DeepSeek-R1-Distill-Llama-70B:
+    model_family: DeepSeek-R1
+    model_variant: Distill-Llama-70B
+    model_type: LLM
+    gpus_per_node: 4
+    num_nodes: 1
+    vocab_size: 128256
+    qos: m2
+    time: 08:00:00
+    partition: a40
+    vllm_args:
+      --tensor-parallel-size: 4
+      --max-model-len: 65536
+      --max-num-seqs: 256
+      --compilation-config: 3
+  DeepSeek-R1-Distill-Llama-8B:
+    model_family: DeepSeek-R1
+    model_variant: Distill-Llama-8B
+    model_type: LLM
+    gpus_per_node: 1
+    num_nodes: 1
+    vocab_size: 128256
+    qos: m2
+    time: 08:00:00
+    partition: a40
+    vllm_args:
+      --max-model-len: 131072
+      --max-num-seqs: 256
+      --compilation-config: 3
+  DeepSeek-R1-Distill-Qwen-32B:
+    model_family: DeepSeek-R1
+    model_variant: Distill-Qwen-32B
+    model_type: LLM
+    gpus_per_node: 2
+    num_nodes: 1
+    vocab_size: 152064
+    qos: m2
+    time: 08:00:00
+    partition: a40
+    vllm_args:
+      --tensor-parallel-size: 2
+      --max-model-len: 65536
+      --max-num-seqs: 256
+      --compilation-config: 3
+  DeepSeek-R1-Distill-Qwen-14B:
+    model_family: DeepSeek-R1
+    model_variant: Distill-Qwen-14B
+    model_type: LLM
+    gpus_per_node: 1
+    num_nodes: 1
+    vocab_size: 152064
+    qos: m2
+    time: 08:00:00
+    partition: a40
+    vllm_args:
+      --max-model-len: 65536
+      --max-num-seqs: 256
+      --compilation-config: 3
+  DeepSeek-R1-Distill-Qwen-7B:
+    model_family: DeepSeek-R1
+    model_variant: Distill-Qwen-7B
+    model_type: LLM
+    gpus_per_node: 1
+    num_nodes: 1
+    vocab_size: 152064
+    qos: m2
+    time: 08:00:00
+    partition: a40
+    vllm_args:
+      --max-model-len: 131072
+      --max-num-seqs: 256
+      --compilation-config: 3
+  DeepSeek-R1-Distill-Qwen-1.5B:
+    model_family: DeepSeek-R1
+    model_variant: Distill-Qwen-1.5B
+    model_type: LLM
+    gpus_per_node: 1
+    num_nodes: 1
+    vocab_size: 152064
+    qos: m2
+    time: 08:00:00
+    partition: a40
+    vllm_args:
+      --max-model-len: 131072
+      --max-num-seqs: 256
+      --compilation-config: 3
+  Phi-3.5-vision-instruct:
+    model_family: Phi-3.5-vision
+    model_variant: instruct
+    model_type: VLM
+    gpus_per_node: 2
+    num_nodes: 1
+    vocab_size: 32064
+    qos: m2
+    time: 08:00:00
+    partition: a40
+    vllm_args:
+      --tensor-parallel-size: 2
+      --max-model-len: 65536
+      --max-num-seqs: 256
+      --compilation-config: 3
+  InternVL2_5-8B:
+    model_family: InternVL2_5
+    model_variant: 8B
+    model_type: VLM
+    gpus_per_node: 1
+    num_nodes: 1
+    vocab_size: 92553
+    qos: m2
+    time: 08:00:00
+    partition: a40
+    vllm_args:
+      --max-model-len: 32768
+      --max-num-seqs: 256
+      --compilation-config: 3
+  glm-4v-9b:
+    model_family: glm-4v
+    model_variant: 9b
+    model_type: VLM
+    gpus_per_node: 1
+    num_nodes: 1
+    vocab_size: 151552
+    qos: m2
+    time: 08:00:00
+    partition: a40
+    vllm_args:
+      --max-model-len: 8192
+      --max-num-seqs: 256
+      --compilation-config: 3
+  Molmo-7B-D-0924:
+    model_family: Molmo
+    model_variant: 7B-D-0924
+    model_type: VLM
+    gpus_per_node: 1
+    num_nodes: 1
+    vocab_size: 152064
+    qos: m2
+    time: 08:00:00
+    partition: a40
+    vllm_args:
+      --max-model-len: 4096
+      --max-num-seqs: 256
+      --compilation-config: 3
+  deepseek-vl2:
+    model_family: deepseek-vl2
+    model_type: VLM
+    gpus_per_node: 2
+    num_nodes: 1
+    vocab_size: 129280
+    qos: m2
+    time: 08:00:00
+    partition: a40
+    vllm_args:
+      --tensor-parallel-size: 2
+      --max-model-len: 4096
+      --max-num-seqs: 256
+      --compilation-config: 3
+  deepseek-vl2-small:
+    model_family: deepseek-vl2
+    model_variant: small
+    model_type: VLM
+    gpus_per_node: 1
+    num_nodes: 1
+    vocab_size: 129280
+    qos: m2
+    time: 08:00:00
+    partition: a40
+    vllm_args:
+      --max-model-len: 4096
+      --max-num-seqs: 256
+      --compilation-config: 3

vec-inf 0.4.1__py3-none-any.whl → 0.6.0__py3-none-any.whl

vec-inf 0.4.1py3-none-any.whl → 0.6.0py3-none-any.whl