PyPI - vllm-judge - Versions diffs - 0.1.2__py3-none-any.whl → 0.1.3__py3-none-any.whl - Mend

vllm-judge 0.1.2py3-none-any.whl → 0.1.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

vllm_judge/__init__.py CHANGED Viewed

@@ -5,7 +5,7 @@ A lightweight library for evaluating text responses using self-hosted language m
 via vLLM's OpenAI-compatible API.
 """
-__version__ = "0.1.2"
+__version__ = "0.1.3"
 from vllm_judge.judge import Judge
 from vllm_judge.models import (

vllm_judge/api/client.py CHANGED Viewed

@@ -65,7 +65,7 @@ class JudgeClient:
     async def evaluate(
         self,
-        response: Union[str, Dict[str, str]],
+        content: Union[str, Dict[str, str]],
         criteria: str = None,
         rubric: Union[str, Dict[Union[int, float], str]] = None,
         scale: Optional[Tuple[int, int]] = None,
@@ -87,7 +87,7 @@ class JudgeClient:
             EvaluationResult
         """
         request = EvaluateRequest(
-            response=response,
+            response=content,
             criteria=criteria,
             rubric=rubric,
             scale=list(scale) if scale else None,

vllm_judge/cli.py CHANGED Viewed

@@ -75,7 +75,7 @@ def evaluate(
             # Use API client
             async with JudgeClient(api_url) as client:
                 result = await client.evaluate(
-                    response=response,
+                    content=response,
                     criteria=criteria,
                     metric=metric,
                     scale=scale,
@@ -91,7 +91,7 @@ def evaluate(
             judge = Judge.from_url(base_url, model=model)
             async with judge:
                 result = await judge.evaluate(
-                    response=response,
+                    content=response,
                     criteria=criteria,
                     metric=metric,
                     scale=scale,

vllm_judge/judge.py CHANGED Viewed

@@ -63,7 +63,7 @@ class Judge:
     async def evaluate(
         self,
-        response: Union[str, Dict[str, str]],
+        content: Union[str, Dict[str, str]],
         criteria: str = None,
         rubric: Union[str, Dict[Union[int, float], str]] = None,
         scale: Optional[Tuple[int, int]] = None,
@@ -79,7 +79,7 @@ class Judge:
         Universal evaluation method that adapts to use case.
         Args:
-            response: String for single evaluation, dict {"a": ..., "b": ...} for comparison
+            content: String for single evaluation, dict {"a": ..., "b": ...} for comparison
             criteria: What to evaluate for (can contain template variables)
             rubric: Instructions for evaluation, can be string or dict containing mapping of score to description (can contain template variables)
             scale: Optional numeric scale (min, max)
@@ -101,12 +101,12 @@ class Judge:
         """
         # Handle model-specific metrics
         if isinstance(metric, ModelSpecificMetric):
-            assert isinstance(response, str), "Model-specific metrics only support string content for now"
+            assert isinstance(content, str), "Model-specific metrics only support string content for now"
             # logger.info(f"Evaluating model-specific metric {metric.name}.")
             logger.info(f"We assume you're using {metric.model_pattern} type model. If not, please do not use this metric and use a normal metric instead.")
             # Skip ALL our formatting
-            messages = [{"role": "user", "content": response}]
+            messages = [{"role": "user", "content": content}]
             # vLLM applies model's chat template automatically
             llm_response = await self._call_model(messages)
@@ -157,7 +157,7 @@ class Judge:
         # Build messages
         messages = PromptBuilder.build_messages(
-            response=response,
+            response=content,
             criteria=criteria,
             rubric=rubric,
             scale=scale,

{vllm_judge-0.1.2.dist-info → vllm_judge-0.1.3.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: vllm_judge
-Version: 0.1.2
+Version: 0.1.3
 Summary: LLM-as-a-Judge evaluations for vLLM hosted models
 Author: TrustyAI team
 Author-email: Sai Chandra Pandraju <saichandrapandraju@gmail.com>
@@ -49,9 +49,8 @@ A lightweight library for LLM-as-a-Judge evaluations using vLLM hosted models. E
 - 🚀 **Simple Interface**: Single `evaluate()` method that adapts to any use case
 - 🎯 **Pre-built Metrics**: 20+ ready-to-use evaluation metrics
 - 🛡️ **Model-Specific Support:** Seamlessly works with specialized models like Llama Guard without breaking their trained formats.
-- 🔄 **Async Native**: Built for high-throughput evaluations
+- ⚡ **High Performance**: Async-first design enables high-throughput evaluations
 - 🔧 **Template Support**: Dynamic evaluations with template variables
-- ⚡ **High Performance**: Optimized for vLLM with automatic batching
 - 🌐 **API Mode**: Run as a REST API service
 ## Installation
@@ -80,7 +79,7 @@ judge = Judge.from_url("http://vllm-server:8000")
 # Simple evaluation
 result = await judge.evaluate(
-    response="The Earth orbits around the Sun.",
+    content="The Earth orbits around the Sun.",
     criteria="scientific accuracy"
 )
 print(f"Decision: {result.decision}")
@@ -90,13 +89,13 @@ print(f"Reasoning: {result.reasoning}")
 from vllm_judge import CODE_QUALITY
 result = await judge.evaluate(
-    response="def add(a, b): return a + b",
+    content="def add(a, b): return a + b",
     metric=CODE_QUALITY
 )
 # With template variables
 result = await judge.evaluate(
-    response="Essay content here...",
+    content="Essay content here...",
     criteria="Evaluate this {doc_type} for {audience}",
     template_vars={
         "doc_type": "essay",
@@ -108,7 +107,7 @@ result = await judge.evaluate(
 from vllm_judge import LLAMA_GUARD_3_SAFETY
 result = await judge.evaluate(
-    response="How do I make a bomb?",
+    content="How do I make a bomb?",
     metric=LLAMA_GUARD_3_SAFETY  # Automatically uses Llama Guard format
 )
 # Result: decision="unsafe", reasoning="S9"
@@ -129,7 +128,7 @@ from vllm_judge.api import JudgeClient
 client = JudgeClient("http://localhost:9090")
 result = await client.evaluate(
-    response="Python is great!",
+    content="Python is great!",
     criteria="technical accuracy"
 )
 ```

{vllm_judge-0.1.2.dist-info → vllm_judge-0.1.3.dist-info}/RECORD RENAMED Viewed

@@ -1,20 +1,20 @@
-vllm_judge/__init__.py,sha256=TcPeBC1yv3oDT5c8NvikyOL9cZyDZRnHD2Aeu0ynGuo,2469
+vllm_judge/__init__.py,sha256=TBS7fQ4n7QEVwNtr4ErJu-T3m4c-8BwW4zDltt8S6Ko,2469
 vllm_judge/batch.py,sha256=68jKgRTMzZXw4bxAiGp73NZzHOd1tKK763nBNjrr6gg,4842
-vllm_judge/cli.py,sha256=KQtUt_L4u5TPrS8xoyiKYt_hQ_FiHtGcrkecGEtktI8,10685
+vllm_judge/cli.py,sha256=mdoxNA5gQ1m3XBnNJYCE8uoi0RxrS9d3YIlrtdxRcME,10683
 vllm_judge/client.py,sha256=QPz64q9-7XEOOJiKQU7FBkGFWocJ-WGUmpETKSLQYDI,8386
 vllm_judge/exceptions.py,sha256=X9YxnukDuI3RwJPkabj3pl6v0JIbflvhUaWrdAW4RTM,1066
-vllm_judge/judge.py,sha256=Wn1ez1HJKb2U0Fu-kcIo7Ls3-ph7hVtb6K5Rlk0NfGw,15225
+vllm_judge/judge.py,sha256=FKMpl6ubugHqKlR-W1-arr4J2rkwnC76QM5oAFv_HyM,15220
 vllm_judge/metrics.py,sha256=lQOBaHqlX79L8yP9_YYd-dTaqvfOPo0nDMY0dtsnKvI,15960
 vllm_judge/models.py,sha256=aEXZmP2sM-9aetstzHE3ngZwvCcvnrqzcj-8oV0NCJA,7889
 vllm_judge/prompts.py,sha256=jAsBdshCCdgGF3UUAM0Wbb6MN1AB2jgHh1NmtXLbyrc,6345
 vllm_judge/templating.py,sha256=LjVFXFcwHl8xnBLLVr_IIqtN-EbLp0HZ5ndNbBpcJTQ,6998
 vllm_judge/utils.py,sha256=lhByBIMS_1EwvxEe31jFgVcTwcFwm5mWoJDXG4TnbvQ,509
 vllm_judge/api/__init__.py,sha256=aPQ1o7_ZzbJJpm2UyX3H35snbOGbgQJoglJjzdnc1LU,762
-vllm_judge/api/client.py,sha256=mcpdH-9ko6aEh_JAybpPPVhHqlO3l5K-lTujTlkTw8c,11302
+vllm_judge/api/client.py,sha256=XRiveUw1edcknxO3zLFkYX_YbOObipx7dMFeSUjMSwk,11300
 vllm_judge/api/models.py,sha256=tPEePecZbKb9ZbjwusdJwhLiBK9Rd5xqiOqjklDKJ9s,4781
 vllm_judge/api/server.py,sha256=mbQ45YC0RYGONdy1oIcRIxUvByLtKXXrrMTpE9l2y1w,17818
-vllm_judge-0.1.2.dist-info/METADATA,sha256=DtXmkJ_sIXp49PuIL3CZJzkPRHsR8zhhwMaJFm6bUYg,4307
-vllm_judge-0.1.2.dist-info/WHEEL,sha256=iAkIy5fosb7FzIOwONchHf19Qu7_1wCWyFNR5gu9nU0,91
-vllm_judge-0.1.2.dist-info/entry_points.txt,sha256=F3plmbMXOQ0pBIh0clqWPVIJWl20_1LZ7QHxC2XF5Lg,51
-vllm_judge-0.1.2.dist-info/top_level.txt,sha256=bqtMvn2y13cHSz_1-HKCBMzYSTfDHsTQBG6U5STHvwM,11
-vllm_judge-0.1.2.dist-info/RECORD,,
+vllm_judge-0.1.3.dist-info/METADATA,sha256=L_Kf2ic1W5wn1D1Y4amZaxO6E2i6bEKjZ4JFVvh3-YA,4251
+vllm_judge-0.1.3.dist-info/WHEEL,sha256=iAkIy5fosb7FzIOwONchHf19Qu7_1wCWyFNR5gu9nU0,91
+vllm_judge-0.1.3.dist-info/entry_points.txt,sha256=F3plmbMXOQ0pBIh0clqWPVIJWl20_1LZ7QHxC2XF5Lg,51
+vllm_judge-0.1.3.dist-info/top_level.txt,sha256=bqtMvn2y13cHSz_1-HKCBMzYSTfDHsTQBG6U5STHvwM,11
+vllm_judge-0.1.3.dist-info/RECORD,,

{vllm_judge-0.1.2.dist-info → vllm_judge-0.1.3.dist-info}/WHEEL RENAMED Viewed

File without changes

{vllm_judge-0.1.2.dist-info → vllm_judge-0.1.3.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{vllm_judge-0.1.2.dist-info → vllm_judge-0.1.3.dist-info}/top_level.txt RENAMED Viewed

File without changes

vllm-judge 0.1.2__py3-none-any.whl → 0.1.3__py3-none-any.whl

vllm-judge 0.1.2py3-none-any.whl → 0.1.3py3-none-any.whl