vllm-judge 0.1.1__py3-none-any.whl → 0.1.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vllm_judge/__init__.py +6 -2
- vllm_judge/api/client.py +2 -2
- vllm_judge/cli.py +2 -2
- vllm_judge/judge.py +39 -12
- vllm_judge/metrics.py +9 -1
- vllm_judge/models.py +10 -1
- vllm_judge/utils.py +14 -0
- {vllm_judge-0.1.1.dist-info → vllm_judge-0.1.3.dist-info}/METADATA +34 -22
- vllm_judge-0.1.3.dist-info/RECORD +20 -0
- {vllm_judge-0.1.1.dist-info → vllm_judge-0.1.3.dist-info}/WHEEL +1 -1
- vllm_judge-0.1.1.dist-info/RECORD +0 -19
- {vllm_judge-0.1.1.dist-info → vllm_judge-0.1.3.dist-info}/entry_points.txt +0 -0
- {vllm_judge-0.1.1.dist-info → vllm_judge-0.1.3.dist-info}/top_level.txt +0 -0
vllm_judge/__init__.py
CHANGED
@@ -5,7 +5,7 @@ A lightweight library for evaluating text responses using self-hosted language m
|
|
5
5
|
via vLLM's OpenAI-compatible API.
|
6
6
|
"""
|
7
7
|
|
8
|
-
__version__ = "0.1.
|
8
|
+
__version__ = "0.1.3"
|
9
9
|
|
10
10
|
from vllm_judge.judge import Judge
|
11
11
|
from vllm_judge.models import (
|
@@ -13,7 +13,8 @@ from vllm_judge.models import (
|
|
13
13
|
EvaluationResult,
|
14
14
|
Metric,
|
15
15
|
BatchResult,
|
16
|
-
TemplateEngine
|
16
|
+
TemplateEngine,
|
17
|
+
ModelSpecificMetric
|
17
18
|
)
|
18
19
|
from vllm_judge.templating import TemplateProcessor
|
19
20
|
from vllm_judge.metrics import (
|
@@ -27,6 +28,7 @@ from vllm_judge.metrics import (
|
|
27
28
|
# Safety metrics
|
28
29
|
SAFETY,
|
29
30
|
TOXICITY,
|
31
|
+
LLAMA_GUARD_3_SAFETY,
|
30
32
|
|
31
33
|
# Code metrics
|
32
34
|
CODE_QUALITY,
|
@@ -81,6 +83,7 @@ __all__ = [
|
|
81
83
|
"BatchResult",
|
82
84
|
"TemplateEngine",
|
83
85
|
"TemplateProcessor",
|
86
|
+
"ModelSpecificMetric",
|
84
87
|
|
85
88
|
# Metrics
|
86
89
|
"HELPFULNESS",
|
@@ -90,6 +93,7 @@ __all__ = [
|
|
90
93
|
"RELEVANCE",
|
91
94
|
"SAFETY",
|
92
95
|
"TOXICITY",
|
96
|
+
"LLAMA_GUARD_3_SAFETY",
|
93
97
|
"CODE_QUALITY",
|
94
98
|
"CODE_SECURITY",
|
95
99
|
"CREATIVITY",
|
vllm_judge/api/client.py
CHANGED
@@ -65,7 +65,7 @@ class JudgeClient:
|
|
65
65
|
|
66
66
|
async def evaluate(
|
67
67
|
self,
|
68
|
-
|
68
|
+
content: Union[str, Dict[str, str]],
|
69
69
|
criteria: str = None,
|
70
70
|
rubric: Union[str, Dict[Union[int, float], str]] = None,
|
71
71
|
scale: Optional[Tuple[int, int]] = None,
|
@@ -87,7 +87,7 @@ class JudgeClient:
|
|
87
87
|
EvaluationResult
|
88
88
|
"""
|
89
89
|
request = EvaluateRequest(
|
90
|
-
response=
|
90
|
+
response=content,
|
91
91
|
criteria=criteria,
|
92
92
|
rubric=rubric,
|
93
93
|
scale=list(scale) if scale else None,
|
vllm_judge/cli.py
CHANGED
@@ -75,7 +75,7 @@ def evaluate(
|
|
75
75
|
# Use API client
|
76
76
|
async with JudgeClient(api_url) as client:
|
77
77
|
result = await client.evaluate(
|
78
|
-
|
78
|
+
content=response,
|
79
79
|
criteria=criteria,
|
80
80
|
metric=metric,
|
81
81
|
scale=scale,
|
@@ -91,7 +91,7 @@ def evaluate(
|
|
91
91
|
judge = Judge.from_url(base_url, model=model)
|
92
92
|
async with judge:
|
93
93
|
result = await judge.evaluate(
|
94
|
-
|
94
|
+
content=response,
|
95
95
|
criteria=criteria,
|
96
96
|
metric=metric,
|
97
97
|
scale=scale,
|
vllm_judge/judge.py
CHANGED
@@ -2,7 +2,7 @@ import json
|
|
2
2
|
import re
|
3
3
|
from typing import Union, Dict, List, Optional, Tuple, Any, Callable
|
4
4
|
|
5
|
-
from vllm_judge.models import JudgeConfig, EvaluationResult, Metric, BatchResult, TemplateEngine
|
5
|
+
from vllm_judge.models import JudgeConfig, EvaluationResult, Metric, BatchResult, TemplateEngine, ModelSpecificMetric
|
6
6
|
from vllm_judge.client import VLLMClient
|
7
7
|
from vllm_judge.prompts import PromptBuilder
|
8
8
|
from vllm_judge.batch import BatchProcessor
|
@@ -14,6 +14,9 @@ from vllm_judge.exceptions import (
|
|
14
14
|
MetricNotFoundError,
|
15
15
|
VLLMJudgeError
|
16
16
|
)
|
17
|
+
import logging
|
18
|
+
|
19
|
+
logger = logging.getLogger(__name__)
|
17
20
|
|
18
21
|
|
19
22
|
class Judge:
|
@@ -60,7 +63,7 @@ class Judge:
|
|
60
63
|
|
61
64
|
async def evaluate(
|
62
65
|
self,
|
63
|
-
|
66
|
+
content: Union[str, Dict[str, str]],
|
64
67
|
criteria: str = None,
|
65
68
|
rubric: Union[str, Dict[Union[int, float], str]] = None,
|
66
69
|
scale: Optional[Tuple[int, int]] = None,
|
@@ -76,7 +79,7 @@ class Judge:
|
|
76
79
|
Universal evaluation method that adapts to use case.
|
77
80
|
|
78
81
|
Args:
|
79
|
-
|
82
|
+
content: String for single evaluation, dict {"a": ..., "b": ...} for comparison
|
80
83
|
criteria: What to evaluate for (can contain template variables)
|
81
84
|
rubric: Instructions for evaluation, can be string or dict containing mapping of score to description (can contain template variables)
|
82
85
|
scale: Optional numeric scale (min, max)
|
@@ -96,6 +99,22 @@ class Judge:
|
|
96
99
|
MetricNotFoundError: If metric name not found
|
97
100
|
ParseError: If unable to parse model response
|
98
101
|
"""
|
102
|
+
# Handle model-specific metrics
|
103
|
+
if isinstance(metric, ModelSpecificMetric):
|
104
|
+
assert isinstance(content, str), "Model-specific metrics only support string content for now"
|
105
|
+
|
106
|
+
# logger.info(f"Evaluating model-specific metric {metric.name}.")
|
107
|
+
logger.info(f"We assume you're using {metric.model_pattern} type model. If not, please do not use this metric and use a normal metric instead.")
|
108
|
+
# Skip ALL our formatting
|
109
|
+
messages = [{"role": "user", "content": content}]
|
110
|
+
|
111
|
+
# vLLM applies model's chat template automatically
|
112
|
+
llm_response = await self._call_model(messages)
|
113
|
+
|
114
|
+
# Use metric's parser
|
115
|
+
return metric.parser_func(llm_response)
|
116
|
+
|
117
|
+
# Handle normal metrics
|
99
118
|
# Handle metric parameter
|
100
119
|
metric_template_vars = {}
|
101
120
|
|
@@ -138,7 +157,7 @@ class Judge:
|
|
138
157
|
|
139
158
|
# Build messages
|
140
159
|
messages = PromptBuilder.build_messages(
|
141
|
-
response=
|
160
|
+
response=content,
|
142
161
|
criteria=criteria,
|
143
162
|
rubric=rubric,
|
144
163
|
scale=scale,
|
@@ -149,14 +168,7 @@ class Judge:
|
|
149
168
|
)
|
150
169
|
|
151
170
|
# Get LLM response
|
152
|
-
|
153
|
-
if self.config.use_chat_api:
|
154
|
-
llm_response = await self.client.chat_completion(messages)
|
155
|
-
else:
|
156
|
-
prompt = PromptBuilder.format_messages_as_text(messages)
|
157
|
-
llm_response = await self.client.completion(prompt)
|
158
|
-
except Exception as e:
|
159
|
-
raise VLLMJudgeError(f"Failed to get model response: {e}")
|
171
|
+
llm_response = await self._call_model(messages)
|
160
172
|
|
161
173
|
# Parse response
|
162
174
|
result = self._parse_response(llm_response)
|
@@ -168,6 +180,21 @@ class Judge:
|
|
168
180
|
|
169
181
|
return result
|
170
182
|
|
183
|
+
async def _call_model(self, messages: List[Dict[str, str]]) -> str:
|
184
|
+
"""
|
185
|
+
Call the model with the given messages.
|
186
|
+
"""
|
187
|
+
try:
|
188
|
+
if self.config.use_chat_api:
|
189
|
+
llm_response = await self.client.chat_completion(messages)
|
190
|
+
else:
|
191
|
+
prompt = PromptBuilder.format_messages_as_text(messages)
|
192
|
+
llm_response = await self.client.completion(prompt)
|
193
|
+
return llm_response
|
194
|
+
except Exception as e:
|
195
|
+
raise VLLMJudgeError(f"Failed to get model response: {e}")
|
196
|
+
|
197
|
+
|
171
198
|
def _parse_response(self, response: str) -> EvaluationResult:
|
172
199
|
"""
|
173
200
|
Parse LLM response into EvaluationResult.
|
vllm_judge/metrics.py
CHANGED
@@ -1,5 +1,6 @@
|
|
1
1
|
from typing import Dict
|
2
|
-
from vllm_judge.models import Metric,TemplateEngine
|
2
|
+
from vllm_judge.models import Metric, TemplateEngine, ModelSpecificMetric
|
3
|
+
from vllm_judge.utils import parse_llama_guard_3
|
3
4
|
|
4
5
|
# Registry for built-in metrics
|
5
6
|
BUILTIN_METRICS: Dict[str, Metric] = {}
|
@@ -11,6 +12,13 @@ def create_builtin_metric(metric: Metric) -> Metric:
|
|
11
12
|
return metric
|
12
13
|
|
13
14
|
|
15
|
+
# Llama Guard 3 safety metric
|
16
|
+
LLAMA_GUARD_3_SAFETY = create_builtin_metric(ModelSpecificMetric(
|
17
|
+
name="llama_guard_3_safety",
|
18
|
+
model_pattern="llama_guard_3",
|
19
|
+
parser_func=parse_llama_guard_3
|
20
|
+
))
|
21
|
+
|
14
22
|
# General purpose metrics
|
15
23
|
HELPFULNESS = create_builtin_metric(Metric(
|
16
24
|
name="helpfulness",
|
vllm_judge/models.py
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
from typing import Optional, Any, Dict, Union, List, Tuple
|
1
|
+
from typing import Optional, Any, Dict, Union, List, Tuple, Callable
|
2
2
|
from pydantic import BaseModel, Field, field_validator, ConfigDict
|
3
3
|
from enum import Enum
|
4
4
|
|
@@ -159,6 +159,15 @@ class Metric:
|
|
159
159
|
def __repr__(self):
|
160
160
|
return f"Metric(name='{self.name}', criteria='{self.criteria}', template_engine='{self.template_engine}')"
|
161
161
|
|
162
|
+
# Base class for model-specific metrics
|
163
|
+
class ModelSpecificMetric(Metric):
|
164
|
+
"""Metric that bypasses our prompt formatting."""
|
165
|
+
|
166
|
+
def __init__(self, name: str, model_pattern: str, parser_func: Callable[[str], EvaluationResult]):
|
167
|
+
super().__init__(name=name, criteria="model-specific evaluation")
|
168
|
+
self.model_pattern = model_pattern
|
169
|
+
self.parser_func = parser_func
|
170
|
+
# self.is_model_specific = True # Flag for special handling
|
162
171
|
|
163
172
|
class BatchResult(BaseModel):
|
164
173
|
"""Result of batch evaluation."""
|
vllm_judge/utils.py
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
from vllm_judge.models import EvaluationResult
|
2
|
+
|
3
|
+
# Llama Guard 3 parser
|
4
|
+
def parse_llama_guard_3(response: str) -> EvaluationResult:
|
5
|
+
"""Parse Llama Guard 3's 'safe/unsafe' format."""
|
6
|
+
lines = response.strip().split('\n')
|
7
|
+
is_safe = lines[0].lower().strip() == 'safe'
|
8
|
+
|
9
|
+
return EvaluationResult(
|
10
|
+
decision="safe" if is_safe else "unsafe",
|
11
|
+
reasoning=lines[1] if len(lines) > 1 else "No violations detected",
|
12
|
+
score=None,
|
13
|
+
metadata={"model_type": "llama_guard_3"}
|
14
|
+
)
|
@@ -1,6 +1,6 @@
|
|
1
|
-
Metadata-Version: 2.
|
1
|
+
Metadata-Version: 2.1
|
2
2
|
Name: vllm_judge
|
3
|
-
Version: 0.1.
|
3
|
+
Version: 0.1.3
|
4
4
|
Summary: LLM-as-a-Judge evaluations for vLLM hosted models
|
5
5
|
Author: TrustyAI team
|
6
6
|
Author-email: Sai Chandra Pandraju <saichandrapandraju@gmail.com>
|
@@ -18,6 +18,17 @@ Provides-Extra: api
|
|
18
18
|
Requires-Dist: fastapi>=0.100.0; extra == "api"
|
19
19
|
Requires-Dist: uvicorn[standard]>=0.22.0; extra == "api"
|
20
20
|
Requires-Dist: websockets>=11.0; extra == "api"
|
21
|
+
Provides-Extra: dev
|
22
|
+
Requires-Dist: vllm-judge[api,docs,jinja2,test]; extra == "dev"
|
23
|
+
Requires-Dist: black>=23.0.0; extra == "dev"
|
24
|
+
Requires-Dist: isort>=5.12.0; extra == "dev"
|
25
|
+
Requires-Dist: flake8>=6.0.0; extra == "dev"
|
26
|
+
Requires-Dist: mypy>=1.0.0; extra == "dev"
|
27
|
+
Provides-Extra: docs
|
28
|
+
Requires-Dist: mkdocs>=1.5.0; extra == "docs"
|
29
|
+
Requires-Dist: mkdocs-material>=9.0.0; extra == "docs"
|
30
|
+
Requires-Dist: mkdocstrings[python]>=0.24.0; extra == "docs"
|
31
|
+
Requires-Dist: mkdocs-material-extensions>=1.3.1; extra == "docs"
|
21
32
|
Provides-Extra: jinja2
|
22
33
|
Requires-Dist: jinja2>=3.0.0; extra == "jinja2"
|
23
34
|
Provides-Extra: test
|
@@ -25,30 +36,22 @@ Requires-Dist: pytest>=7.0.0; extra == "test"
|
|
25
36
|
Requires-Dist: pytest-asyncio>=0.21.0; extra == "test"
|
26
37
|
Requires-Dist: pytest-cov>=4.0.0; extra == "test"
|
27
38
|
Requires-Dist: pytest-mock>=3.10.0; extra == "test"
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
Requires-Dist: mkdocstrings[python]>=0.24.0; extra == "docs"
|
32
|
-
Requires-Dist: mkdocs-material-extensions>=1.3.1; extra == "docs"
|
33
|
-
Provides-Extra: dev
|
34
|
-
Requires-Dist: vllm_judge[api,docs,jinja2,test]; extra == "dev"
|
35
|
-
Requires-Dist: black>=23.0.0; extra == "dev"
|
36
|
-
Requires-Dist: isort>=5.12.0; extra == "dev"
|
37
|
-
Requires-Dist: flake8>=6.0.0; extra == "dev"
|
38
|
-
Requires-Dist: mypy>=1.0.0; extra == "dev"
|
39
|
+
|
40
|
+
[
|
41
|
+
](https://pypi.org/project/vllm-judge/)
|
39
42
|
|
40
43
|
# vLLM Judge
|
41
44
|
|
42
|
-
A lightweight library for LLM-as-a-Judge evaluations using vLLM hosted models. Please refer the [documentation](https://saichandrapandraju.github.io/vllm_judge/) for usage details.
|
45
|
+
A lightweight library for LLM-as-a-Judge evaluations using vLLM hosted models. Evaluate LLM inputs & outputs at scale with just a few lines of code. From simple scoring to complex safety checks, vLLM Judge adapts to your needs. Please refer the [documentation](https://saichandrapandraju.github.io/vllm_judge/) for usage details.
|
43
46
|
|
44
47
|
## Features
|
45
48
|
|
46
49
|
- 🚀 **Simple Interface**: Single `evaluate()` method that adapts to any use case
|
47
50
|
- 🎯 **Pre-built Metrics**: 20+ ready-to-use evaluation metrics
|
51
|
+
- 🛡️ **Model-Specific Support:** Seamlessly works with specialized models like Llama Guard without breaking their trained formats.
|
52
|
+
- ⚡ **High Performance**: Async-first design enables high-throughput evaluations
|
48
53
|
- 🔧 **Template Support**: Dynamic evaluations with template variables
|
49
|
-
- ⚡ **High Performance**: Optimized for vLLM with automatic batching
|
50
54
|
- 🌐 **API Mode**: Run as a REST API service
|
51
|
-
- 🔄 **Async Native**: Built for high-throughput evaluations
|
52
55
|
|
53
56
|
## Installation
|
54
57
|
|
@@ -72,11 +75,11 @@ pip install vllm-judge[dev]
|
|
72
75
|
from vllm_judge import Judge
|
73
76
|
|
74
77
|
# Initialize with vLLM url
|
75
|
-
judge = Judge.from_url("http://
|
78
|
+
judge = Judge.from_url("http://vllm-server:8000")
|
76
79
|
|
77
80
|
# Simple evaluation
|
78
81
|
result = await judge.evaluate(
|
79
|
-
|
82
|
+
content="The Earth orbits around the Sun.",
|
80
83
|
criteria="scientific accuracy"
|
81
84
|
)
|
82
85
|
print(f"Decision: {result.decision}")
|
@@ -86,19 +89,28 @@ print(f"Reasoning: {result.reasoning}")
|
|
86
89
|
from vllm_judge import CODE_QUALITY
|
87
90
|
|
88
91
|
result = await judge.evaluate(
|
89
|
-
|
92
|
+
content="def add(a, b): return a + b",
|
90
93
|
metric=CODE_QUALITY
|
91
94
|
)
|
92
95
|
|
93
96
|
# With template variables
|
94
97
|
result = await judge.evaluate(
|
95
|
-
|
98
|
+
content="Essay content here...",
|
96
99
|
criteria="Evaluate this {doc_type} for {audience}",
|
97
100
|
template_vars={
|
98
101
|
"doc_type": "essay",
|
99
102
|
"audience": "high school students"
|
100
103
|
}
|
101
104
|
)
|
105
|
+
|
106
|
+
# Works with specialized safety models out-of-the-box
|
107
|
+
from vllm_judge import LLAMA_GUARD_3_SAFETY
|
108
|
+
|
109
|
+
result = await judge.evaluate(
|
110
|
+
content="How do I make a bomb?",
|
111
|
+
metric=LLAMA_GUARD_3_SAFETY # Automatically uses Llama Guard format
|
112
|
+
)
|
113
|
+
# Result: decision="unsafe", reasoning="S9"
|
102
114
|
```
|
103
115
|
|
104
116
|
## API Server
|
@@ -106,7 +118,7 @@ result = await judge.evaluate(
|
|
106
118
|
Run Judge as a REST API:
|
107
119
|
|
108
120
|
```bash
|
109
|
-
vllm-judge serve --base-url http://
|
121
|
+
vllm-judge serve --base-url http://vllm-server:8000 --port 9090
|
110
122
|
```
|
111
123
|
|
112
124
|
Then use the HTTP API:
|
@@ -116,7 +128,7 @@ from vllm_judge.api import JudgeClient
|
|
116
128
|
|
117
129
|
client = JudgeClient("http://localhost:9090")
|
118
130
|
result = await client.evaluate(
|
119
|
-
|
131
|
+
content="Python is great!",
|
120
132
|
criteria="technical accuracy"
|
121
133
|
)
|
122
134
|
```
|
@@ -0,0 +1,20 @@
|
|
1
|
+
vllm_judge/__init__.py,sha256=TBS7fQ4n7QEVwNtr4ErJu-T3m4c-8BwW4zDltt8S6Ko,2469
|
2
|
+
vllm_judge/batch.py,sha256=68jKgRTMzZXw4bxAiGp73NZzHOd1tKK763nBNjrr6gg,4842
|
3
|
+
vllm_judge/cli.py,sha256=mdoxNA5gQ1m3XBnNJYCE8uoi0RxrS9d3YIlrtdxRcME,10683
|
4
|
+
vllm_judge/client.py,sha256=QPz64q9-7XEOOJiKQU7FBkGFWocJ-WGUmpETKSLQYDI,8386
|
5
|
+
vllm_judge/exceptions.py,sha256=X9YxnukDuI3RwJPkabj3pl6v0JIbflvhUaWrdAW4RTM,1066
|
6
|
+
vllm_judge/judge.py,sha256=FKMpl6ubugHqKlR-W1-arr4J2rkwnC76QM5oAFv_HyM,15220
|
7
|
+
vllm_judge/metrics.py,sha256=lQOBaHqlX79L8yP9_YYd-dTaqvfOPo0nDMY0dtsnKvI,15960
|
8
|
+
vllm_judge/models.py,sha256=aEXZmP2sM-9aetstzHE3ngZwvCcvnrqzcj-8oV0NCJA,7889
|
9
|
+
vllm_judge/prompts.py,sha256=jAsBdshCCdgGF3UUAM0Wbb6MN1AB2jgHh1NmtXLbyrc,6345
|
10
|
+
vllm_judge/templating.py,sha256=LjVFXFcwHl8xnBLLVr_IIqtN-EbLp0HZ5ndNbBpcJTQ,6998
|
11
|
+
vllm_judge/utils.py,sha256=lhByBIMS_1EwvxEe31jFgVcTwcFwm5mWoJDXG4TnbvQ,509
|
12
|
+
vllm_judge/api/__init__.py,sha256=aPQ1o7_ZzbJJpm2UyX3H35snbOGbgQJoglJjzdnc1LU,762
|
13
|
+
vllm_judge/api/client.py,sha256=XRiveUw1edcknxO3zLFkYX_YbOObipx7dMFeSUjMSwk,11300
|
14
|
+
vllm_judge/api/models.py,sha256=tPEePecZbKb9ZbjwusdJwhLiBK9Rd5xqiOqjklDKJ9s,4781
|
15
|
+
vllm_judge/api/server.py,sha256=mbQ45YC0RYGONdy1oIcRIxUvByLtKXXrrMTpE9l2y1w,17818
|
16
|
+
vllm_judge-0.1.3.dist-info/METADATA,sha256=L_Kf2ic1W5wn1D1Y4amZaxO6E2i6bEKjZ4JFVvh3-YA,4251
|
17
|
+
vllm_judge-0.1.3.dist-info/WHEEL,sha256=iAkIy5fosb7FzIOwONchHf19Qu7_1wCWyFNR5gu9nU0,91
|
18
|
+
vllm_judge-0.1.3.dist-info/entry_points.txt,sha256=F3plmbMXOQ0pBIh0clqWPVIJWl20_1LZ7QHxC2XF5Lg,51
|
19
|
+
vllm_judge-0.1.3.dist-info/top_level.txt,sha256=bqtMvn2y13cHSz_1-HKCBMzYSTfDHsTQBG6U5STHvwM,11
|
20
|
+
vllm_judge-0.1.3.dist-info/RECORD,,
|
@@ -1,19 +0,0 @@
|
|
1
|
-
vllm_judge/__init__.py,sha256=iI-gdqNrjLwn7jzU7yjCZHCHKwbqrjbKp6OgAfl8Tu8,2363
|
2
|
-
vllm_judge/batch.py,sha256=68jKgRTMzZXw4bxAiGp73NZzHOd1tKK763nBNjrr6gg,4842
|
3
|
-
vllm_judge/cli.py,sha256=KQtUt_L4u5TPrS8xoyiKYt_hQ_FiHtGcrkecGEtktI8,10685
|
4
|
-
vllm_judge/client.py,sha256=QPz64q9-7XEOOJiKQU7FBkGFWocJ-WGUmpETKSLQYDI,8386
|
5
|
-
vllm_judge/exceptions.py,sha256=X9YxnukDuI3RwJPkabj3pl6v0JIbflvhUaWrdAW4RTM,1066
|
6
|
-
vllm_judge/judge.py,sha256=y2qp18PVtobAyxqI246tEsju82W-OuGG4zXfajTEW-E,14101
|
7
|
-
vllm_judge/metrics.py,sha256=QeGzaERvfRKQTt4JfquL1rW72GSkWdJ2_Nw_Hf0zqjY,15685
|
8
|
-
vllm_judge/models.py,sha256=fbEUFPsY3xhv54WueWqEKvAgIcWTm-JO42N2-6k5LeM,7417
|
9
|
-
vllm_judge/prompts.py,sha256=jAsBdshCCdgGF3UUAM0Wbb6MN1AB2jgHh1NmtXLbyrc,6345
|
10
|
-
vllm_judge/templating.py,sha256=LjVFXFcwHl8xnBLLVr_IIqtN-EbLp0HZ5ndNbBpcJTQ,6998
|
11
|
-
vllm_judge/api/__init__.py,sha256=aPQ1o7_ZzbJJpm2UyX3H35snbOGbgQJoglJjzdnc1LU,762
|
12
|
-
vllm_judge/api/client.py,sha256=mcpdH-9ko6aEh_JAybpPPVhHqlO3l5K-lTujTlkTw8c,11302
|
13
|
-
vllm_judge/api/models.py,sha256=tPEePecZbKb9ZbjwusdJwhLiBK9Rd5xqiOqjklDKJ9s,4781
|
14
|
-
vllm_judge/api/server.py,sha256=mbQ45YC0RYGONdy1oIcRIxUvByLtKXXrrMTpE9l2y1w,17818
|
15
|
-
vllm_judge-0.1.1.dist-info/METADATA,sha256=8tAJdnNjmSFrORci6TgJ2TTgZ8zmZCicBSgShbu31gY,3643
|
16
|
-
vllm_judge-0.1.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
17
|
-
vllm_judge-0.1.1.dist-info/entry_points.txt,sha256=F3plmbMXOQ0pBIh0clqWPVIJWl20_1LZ7QHxC2XF5Lg,51
|
18
|
-
vllm_judge-0.1.1.dist-info/top_level.txt,sha256=bqtMvn2y13cHSz_1-HKCBMzYSTfDHsTQBG6U5STHvwM,11
|
19
|
-
vllm_judge-0.1.1.dist-info/RECORD,,
|
File without changes
|
File without changes
|