PyPI - vllm-judge - Versions diffs - 0.1.4__py3-none-any.whl → 0.1.5__py3-none-any.whl - Mend

vllm-judge 0.1.4py3-none-any.whl → 0.1.5py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

vllm_judge/models.py CHANGED Viewed

@@ -99,7 +99,8 @@ class Metric:
         system_prompt: Optional[str] = None,
         template_vars: Optional[Dict[str, Any]] = None,
         required_vars: Optional[List[str]] = None,
-        template_engine: Union[str, TemplateEngine] = TemplateEngine.FORMAT
+        template_engine: Union[str, TemplateEngine] = TemplateEngine.FORMAT,
+        additional_instructions: Optional[str] = None
     ):
         """
         Initialize a reusable metric.
@@ -125,7 +126,7 @@ class Metric:
         self.template_vars = template_vars or {}
         self.required_vars = required_vars or []
         self.template_engine = TemplateEngine(template_engine)
+        self.additional_instructions = additional_instructions
         # Auto-detect required variables if not specified
         if not self.required_vars and self.template_engine == TemplateEngine.FORMAT:
             self._auto_detect_required_vars()

vllm_judge/prompts.py CHANGED Viewed

@@ -48,7 +48,7 @@ class PromptBuilder:
         # Output format instructions
         system_prompt+="\nYou must respond in JSON format:\n"
         system_prompt+="""{
-    "decision": <your judgment - string|number|boolean>,
+    "decision": <your judgment - string|boolean>,
     "reasoning": "<concise explanation of your judgment>",
     "score": <numeric score if requested, otherwise null>
 }"""
@@ -105,16 +105,18 @@ class PromptBuilder:
             parts.append(f"\nResponse B:\n{content['b']}")
         else:
             if input:
-                parts.append(f"Evaluate how well this response addresses the input for: {criteria}")
+                parts.append(f"Evaluate how well this content addresses the input for: {criteria}")
             else:
-                parts.append(f"Evaluate the following response based on: {criteria}")
+                parts.append(f"Evaluate the following content based on: {criteria}")
             if context:
                 parts.append(f"\nContext: {context}")
-            parts.append(f"\nResponse to evaluate:\n{content}")
+            parts.append(f"\nContent to evaluate:\n{content}")
+        parts.append(f"\nYou must return a decision label/class (your judgement) for the `decision` field and a concise explanation for the `reasoning` field.")
         # Add scale and rubric
         if scale:
-            parts.append(f"\nProvide a score from {scale[0]} to {scale[1]}")
+            parts.append(f"\nIn addition to these, provide a score from {scale[0]} to {scale[1]}")
             if isinstance(rubric, dict):
                 parts.append("\nScoring guide:")
@@ -137,7 +139,7 @@ class PromptBuilder:
                 if "input" in ex:
                     parts.append(f"Input: {ex['input']}")
                 if "content" in ex:
-                    parts.append(f"Response: {ex['content']}")
+                    parts.append(f"Content: {ex['content']}")
                 elif "text" in ex:
                     parts.append(f"Text: {ex['text']}")
@@ -156,7 +158,7 @@ class PromptBuilder:
         # Output format instructions
         parts.append("\nYou must respond in JSON format:")
         parts.append("""{
-    "decision": <your judgment - string|number|boolean>,
+    "decision": <your judgment - string|boolean>,
     "reasoning": "<concise explanation of your judgment>",
     "score": <numeric score if requested, otherwise null>
 }""")

{vllm_judge-0.1.4.dist-info → vllm_judge-0.1.5.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: vllm_judge
-Version: 0.1.4
+Version: 0.1.5
 Summary: LLM-as-a-Judge evaluations for vLLM hosted models
 Author: TrustyAI team
 Author-email: Sai Chandra Pandraju <saichandrapandraju@gmail.com>

{vllm_judge-0.1.4.dist-info → vllm_judge-0.1.5.dist-info}/RECORD RENAMED Viewed

@@ -1,20 +1,20 @@
-vllm_judge/__init__.py,sha256=RsdlyvZ78SR3E9ytzQcdurgP-8jh_nlyw355WgUcR7M,2469
+vllm_judge/__init__.py,sha256=6OKo_RbNOov83pZIPfg12ITxiE6UZh2_UOTjQsgWbFY,2792
 vllm_judge/batch.py,sha256=3zkatZxQESCjYz99qfLhxl2Dq2tHAfhtdTiXxjVqUxE,4836
 vllm_judge/cli.py,sha256=tnMqJ2RvCFaXUY4ok4IO-d9IRNJhEck60AJNzdCaqhg,13679
 vllm_judge/client.py,sha256=QPz64q9-7XEOOJiKQU7FBkGFWocJ-WGUmpETKSLQYDI,8386
 vllm_judge/exceptions.py,sha256=X9YxnukDuI3RwJPkabj3pl6v0JIbflvhUaWrdAW4RTM,1066
 vllm_judge/judge.py,sha256=SDT_cGDZzHu8NOjG6eqHQsYqIuXR12j7ocpyrVDhHrQ,16939
-vllm_judge/metrics.py,sha256=kH5Zb5Z6bIVa26qROe1PscBMnBX98ueKMbweLhhfM9o,25646
-vllm_judge/models.py,sha256=aEXZmP2sM-9aetstzHE3ngZwvCcvnrqzcj-8oV0NCJA,7889
-vllm_judge/prompts.py,sha256=kNswJPsJtdweV-yItggsYF0FV6FWP71fREmxZFy8sjg,7085
+vllm_judge/metrics.py,sha256=WwtR6Bb4cc0gDplhZnysNzD1EfOMCEzFc8-3hJMqnJs,48709
+vllm_judge/models.py,sha256=o4OdRtRdsz9n5RhHrz-uA9ylG0cGQg99NJYay0RaeDE,7998
+vllm_judge/prompts.py,sha256=KC8AfiIgKKxQuhT1bnnyYXrSBbcU2-RnkSLqDJfrt8o,7251
 vllm_judge/templating.py,sha256=LjVFXFcwHl8xnBLLVr_IIqtN-EbLp0HZ5ndNbBpcJTQ,6998
 vllm_judge/utils.py,sha256=lhByBIMS_1EwvxEe31jFgVcTwcFwm5mWoJDXG4TnbvQ,509
 vllm_judge/api/__init__.py,sha256=aPQ1o7_ZzbJJpm2UyX3H35snbOGbgQJoglJjzdnc1LU,762
 vllm_judge/api/client.py,sha256=l46IpQHJxmbDfXpyCOXfir70c_3hPaIr6OEiOzOMk5Q,12449
 vllm_judge/api/models.py,sha256=GXj3slwytJWg5M4f5MPZ8Ft_hrkEEAZh0qgpYDy-Qe4,5102
 vllm_judge/api/server.py,sha256=1UQMV6MRdlqHS6NYdrQI41bi_wNb0QC8RZD4jCEeTkU,17888
-vllm_judge-0.1.4.dist-info/METADATA,sha256=KaiXUiIsEYbBbc4bdP1yvMwugXKPDRBoGal-Q-8ADTc,4251
-vllm_judge-0.1.4.dist-info/WHEEL,sha256=iAkIy5fosb7FzIOwONchHf19Qu7_1wCWyFNR5gu9nU0,91
-vllm_judge-0.1.4.dist-info/entry_points.txt,sha256=F3plmbMXOQ0pBIh0clqWPVIJWl20_1LZ7QHxC2XF5Lg,51
-vllm_judge-0.1.4.dist-info/top_level.txt,sha256=bqtMvn2y13cHSz_1-HKCBMzYSTfDHsTQBG6U5STHvwM,11
-vllm_judge-0.1.4.dist-info/RECORD,,
+vllm_judge-0.1.5.dist-info/METADATA,sha256=5UXUqyckWp9fGLQXcBxkI6ejmFfWpCjjpyIeMx96zTI,4251
+vllm_judge-0.1.5.dist-info/WHEEL,sha256=iAkIy5fosb7FzIOwONchHf19Qu7_1wCWyFNR5gu9nU0,91
+vllm_judge-0.1.5.dist-info/entry_points.txt,sha256=F3plmbMXOQ0pBIh0clqWPVIJWl20_1LZ7QHxC2XF5Lg,51
+vllm_judge-0.1.5.dist-info/top_level.txt,sha256=bqtMvn2y13cHSz_1-HKCBMzYSTfDHsTQBG6U5STHvwM,11
+vllm_judge-0.1.5.dist-info/RECORD,,

{vllm_judge-0.1.4.dist-info → vllm_judge-0.1.5.dist-info}/WHEEL RENAMED Viewed

File without changes

{vllm_judge-0.1.4.dist-info → vllm_judge-0.1.5.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{vllm_judge-0.1.4.dist-info → vllm_judge-0.1.5.dist-info}/top_level.txt RENAMED Viewed

File without changes

vllm-judge 0.1.4__py3-none-any.whl → 0.1.5__py3-none-any.whl

vllm-judge 0.1.4py3-none-any.whl → 0.1.5py3-none-any.whl