PyPI - vllm-judge - Versions diffs - 0.1.0__tar.gz - Mend

vllm-judge 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (24) hide show

vllm_judge-0.1.0/PKG-INFO +124 -0
vllm_judge-0.1.0/README.md +84 -0
vllm_judge-0.1.0/pyproject.toml +161 -0
vllm_judge-0.1.0/setup.cfg +4 -0
vllm_judge-0.1.0/src/vllm_judge/__init__.py +120 -0
vllm_judge-0.1.0/src/vllm_judge/api/__init__.py +39 -0
vllm_judge-0.1.0/src/vllm_judge/api/client.py +354 -0
vllm_judge-0.1.0/src/vllm_judge/api/models.py +157 -0
vllm_judge-0.1.0/src/vllm_judge/api/server.py +564 -0
vllm_judge-0.1.0/src/vllm_judge/batch.py +147 -0
vllm_judge-0.1.0/src/vllm_judge/cli.py +288 -0
vllm_judge-0.1.0/src/vllm_judge/client.py +262 -0
vllm_judge-0.1.0/src/vllm_judge/exceptions.py +42 -0
vllm_judge-0.1.0/src/vllm_judge/judge.py +421 -0
vllm_judge-0.1.0/src/vllm_judge/metrics.py +417 -0
vllm_judge-0.1.0/src/vllm_judge/models.py +185 -0
vllm_judge-0.1.0/src/vllm_judge/prompts.py +175 -0
vllm_judge-0.1.0/src/vllm_judge/templating.py +206 -0
vllm_judge-0.1.0/src/vllm_judge.egg-info/PKG-INFO +124 -0
vllm_judge-0.1.0/src/vllm_judge.egg-info/SOURCES.txt +22 -0
vllm_judge-0.1.0/src/vllm_judge.egg-info/dependency_links.txt +1 -0
vllm_judge-0.1.0/src/vllm_judge.egg-info/entry_points.txt +2 -0
vllm_judge-0.1.0/src/vllm_judge.egg-info/requires.txt +32 -0
vllm_judge-0.1.0/src/vllm_judge.egg-info/top_level.txt +1 -0

vllm_judge-0.1.0/PKG-INFO ADDED Viewed

@@ -0,0 +1,124 @@
+Metadata-Version: 2.4
+Name: vllm_judge
+Version: 0.1.0
+Summary: LLM-as-a-Judge evaluations for vLLM hosted models
+Author: TrustyAI team
+Author-email: Sai Chandra Pandraju <saichandrapandraju@gmail.com>
+Project-URL: Homepage, https://github.com/saichandrapandraju/vllm_judge
+Project-URL: Repository, https://github.com/saichandrapandraju/vllm_judge
+Project-URL: Issues, https://github.com/saichandrapandraju/vllm_judge/issues
+Keywords: llm,evaluation,vllm,judge,ai,machine-learning,nlp,llm-evaluation,llm-as-judge
+Requires-Python: >=3.8
+Description-Content-Type: text/markdown
+Requires-Dist: httpx>=0.24.0
+Requires-Dist: pydantic>=2.0.0
+Requires-Dist: tenacity>=8.0.0
+Requires-Dist: click>=8.0.0
+Provides-Extra: api
+Requires-Dist: fastapi>=0.100.0; extra == "api"
+Requires-Dist: uvicorn[standard]>=0.22.0; extra == "api"
+Requires-Dist: websockets>=11.0; extra == "api"
+Provides-Extra: jinja2
+Requires-Dist: jinja2>=3.0.0; extra == "jinja2"
+Provides-Extra: dev
+Requires-Dist: pytest>=7.0.0; extra == "dev"
+Requires-Dist: pytest-asyncio>=0.21.0; extra == "dev"
+Requires-Dist: pytest-cov>=4.0.0; extra == "dev"
+Requires-Dist: black>=23.0.0; extra == "dev"
+Requires-Dist: isort>=5.12.0; extra == "dev"
+Requires-Dist: flake8>=6.0.0; extra == "dev"
+Requires-Dist: mypy>=1.0.0; extra == "dev"
+Provides-Extra: test
+Requires-Dist: pytest>=7.0.0; extra == "test"
+Requires-Dist: pytest-asyncio>=0.21.0; extra == "test"
+Requires-Dist: pytest-cov>=4.0.0; extra == "test"
+Requires-Dist: pytest-mock>=3.10.0; extra == "test"
+Provides-Extra: docs
+Requires-Dist: mkdocs>=1.5.0; extra == "docs"
+Requires-Dist: mkdocs-material>=9.0.0; extra == "docs"
+Requires-Dist: mkdocstrings[python]>=0.24.0; extra == "docs"
+# vLLM Judge
+A lightweight library for LLM-as-a-Judge evaluations using vLLM hosted models.
+## Features
+- 🚀 **Simple Interface**: Single `evaluate()` method that adapts to any use case
+- 🎯 **Pre-built Metrics**: 20+ ready-to-use evaluation metrics
+- 🔧 **Template Support**: Dynamic evaluations with template variables
+- ⚡ **High Performance**: Optimized for vLLM with automatic batching
+- 🌐 **API Mode**: Run as a REST API service
+- 🔄 **Async Native**: Built for high-throughput evaluations
+## Installation
+```bash
+# Basic installation
+pip install vllm_judge
+# With API support
+pip install vllm_judge[api]
+# With Jinja2 template support
+pip install vllm_judge[jinja2]
+# Everything
+pip install vllm_judge[api,jinja2]
+```
+## Quick Start
+```python
+from vllm_judge import Judge
+# Initialize with vLLM url
+judge = await Judge.from_url("http://localhost:8000")
+# Simple evaluation
+result = await judge.evaluate(
+    response="The Earth orbits around the Sun.",
+    criteria="scientific accuracy"
+)
+print(f"Decision: {result.decision}")
+print(f"Reasoning: {result.reasoning}")
+# Using pre-built metrics
+from vllm_judge import CODE_QUALITY
+result = await judge.evaluate(
+    response="def add(a, b): return a + b",
+    metric=CODE_QUALITY
+)
+# With template variables
+result = await judge.evaluate(
+    response="Essay content here...",
+    criteria="Evaluate this {doc_type} for {audience}",
+    template_vars={
+        "doc_type": "essay",
+        "audience": "high school students"
+    }
+)
+```
+## API Server
+Run Judge as a REST API:
+```bash
+vllm-judge serve --base-url http://localhost:8000 --port 9090 --host localhost
+```
+Then use the HTTP API:
+```python
+from vllm_judge.api import JudgeClient
+client = JudgeClient("http://localhost:9090")
+result = await client.evaluate(
+    response="Python is great!",
+    criteria="technical accuracy"
+)
+```

vllm_judge-0.1.0/README.md ADDED Viewed

@@ -0,0 +1,84 @@
+# vLLM Judge
+A lightweight library for LLM-as-a-Judge evaluations using vLLM hosted models.
+## Features
+- 🚀 **Simple Interface**: Single `evaluate()` method that adapts to any use case
+- 🎯 **Pre-built Metrics**: 20+ ready-to-use evaluation metrics
+- 🔧 **Template Support**: Dynamic evaluations with template variables
+- ⚡ **High Performance**: Optimized for vLLM with automatic batching
+- 🌐 **API Mode**: Run as a REST API service
+- 🔄 **Async Native**: Built for high-throughput evaluations
+## Installation
+```bash
+# Basic installation
+pip install vllm_judge
+# With API support
+pip install vllm_judge[api]
+# With Jinja2 template support
+pip install vllm_judge[jinja2]
+# Everything
+pip install vllm_judge[api,jinja2]
+```
+## Quick Start
+```python
+from vllm_judge import Judge
+# Initialize with vLLM url
+judge = await Judge.from_url("http://localhost:8000")
+# Simple evaluation
+result = await judge.evaluate(
+    response="The Earth orbits around the Sun.",
+    criteria="scientific accuracy"
+)
+print(f"Decision: {result.decision}")
+print(f"Reasoning: {result.reasoning}")
+# Using pre-built metrics
+from vllm_judge import CODE_QUALITY
+result = await judge.evaluate(
+    response="def add(a, b): return a + b",
+    metric=CODE_QUALITY
+)
+# With template variables
+result = await judge.evaluate(
+    response="Essay content here...",
+    criteria="Evaluate this {doc_type} for {audience}",
+    template_vars={
+        "doc_type": "essay",
+        "audience": "high school students"
+    }
+)
+```
+## API Server
+Run Judge as a REST API:
+```bash
+vllm-judge serve --base-url http://localhost:8000 --port 9090 --host localhost
+```
+Then use the HTTP API:
+```python
+from vllm_judge.api import JudgeClient
+client = JudgeClient("http://localhost:9090")
+result = await client.evaluate(
+    response="Python is great!",
+    criteria="technical accuracy"
+)
+```

vllm_judge-0.1.0/pyproject.toml ADDED Viewed

@@ -0,0 +1,161 @@
+[build-system]
+requires = ["setuptools>=61.0", "wheel"]
+build-backend = "setuptools.build_meta"
+[project]
+name = "vllm_judge"
+version = "0.1.0"
+description = "LLM-as-a-Judge evaluations for vLLM hosted models"
+readme = "README.md"
+authors = [
+    {name = "Sai Chandra Pandraju", email = "saichandrapandraju@gmail.com"},
+    {name = "TrustyAI team"}
+]
+keywords = [
+    "llm",
+    "evaluation",
+    "vllm",
+    "judge",
+    "ai",
+    "machine-learning",
+    "nlp",
+    "llm-evaluation",
+    "llm-as-judge"
+]
+requires-python = ">=3.8"
+dependencies = [
+    "httpx>=0.24.0",
+    "pydantic>=2.0.0",
+    "tenacity>=8.0.0",
+    "click>=8.0.0",
+]
+[project.optional-dependencies]
+api = [
+    "fastapi>=0.100.0",
+    "uvicorn[standard]>=0.22.0",
+    "websockets>=11.0",
+]
+jinja2 = [
+    "jinja2>=3.0.0",
+]
+dev = [
+    "pytest>=7.0.0",
+    "pytest-asyncio>=0.21.0",
+    "pytest-cov>=4.0.0",
+    "black>=23.0.0",
+    "isort>=5.12.0",
+    "flake8>=6.0.0",
+    "mypy>=1.0.0",
+]
+test = [
+    "pytest>=7.0.0",
+    "pytest-asyncio>=0.21.0",
+    "pytest-cov>=4.0.0",
+    "pytest-mock>=3.10.0",
+]
+docs = [
+    "mkdocs>=1.5.0",
+    "mkdocs-material>=9.0.0",
+    "mkdocstrings[python]>=0.24.0",
+]
+[project.scripts]
+vllm-judge = "vllm_judge.cli:main"
+[project.urls]
+Homepage = "https://github.com/saichandrapandraju/vllm_judge"
+Repository = "https://github.com/saichandrapandraju/vllm_judge"
+Issues = "https://github.com/saichandrapandraju/vllm_judge/issues"
+[tool.setuptools.packages.find]
+where = ["src"]
+[tool.setuptools.package-data]
+vllm_judge = ["py.typed"]
+[tool.black]
+line-length = 88
+target-version = ['py38', 'py39', 'py310', 'py311', 'py312']
+include = '\.pyi?$'
+exclude = '''
+/(
+    \.eggs
+  | \.git
+  | \.hg
+  | \.mypy_cache
+  | \.tox
+  | \.venv
+  | _build
+  | buck-out
+  | build
+  | dist
+)/
+'''
+[tool.isort]
+profile = "black"
+line_length = 88
+[tool.pytest.ini_options]
+testpaths = ["tests"]
+python_files = ["test_*.py", "*_test.py"]
+python_classes = ["Test*"]
+python_functions = ["test_*"]
+asyncio_mode = "auto"
+addopts = [
+    "--verbose",
+    "--cov=vllm_judge",
+    "--cov-report=term-missing",
+    "--cov-report=html",
+    "--cov-report=xml",
+]
+[tool.mypy]
+python_version = "3.8"
+warn_return_any = true
+warn_unused_configs = true
+disallow_untyped_defs = true
+disallow_incomplete_defs = true
+check_untyped_defs = true
+disallow_untyped_decorators = true
+no_implicit_optional = true
+warn_redundant_casts = true
+warn_unused_ignores = true
+warn_no_return = true
+warn_unreachable = true
+strict_equality = true
+[tool.coverage.run]
+source = ["src/vllm_judge"]
+omit = ["*/tests/*", "*/test_*"]
+[tool.coverage.report]
+precision = 2
+show_missing = true
+skip_covered = false
+[tool.ruff]
+target-version = "py38"
+line-length = 88
+select = [
+    "E",  # pycodestyle errors
+    "W",  # pycodestyle warnings
+    "F",  # pyflakes
+    "I",  # isort
+    "B",  # flake8-bugbear
+    "C4", # flake8-comprehensions
+    "UP", # pyupgrade
+]
+ignore = [
+    "E501",  # line too long, handled by black
+    "B008",  # do not perform function calls in argument defaults
+    "C901",  # too complex
+]
+[tool.ruff.per-file-ignores]
+"__init__.py" = ["F401"]
+[tool.ruff.isort]
+known-third-party = ["httpx", "pydantic", "tenacity"]

vllm_judge-0.1.0/setup.cfg ADDED Viewed

@@ -0,0 +1,4 @@
+[egg_info]
+tag_build =
+tag_date = 0

vllm_judge-0.1.0/src/vllm_judge/__init__.py ADDED Viewed

@@ -0,0 +1,120 @@
+"""
+vLLM Judge - LLM-as-a-Judge evaluations for vLLM hosted models.
+A lightweight library for evaluating text responses using self-hosted language models
+via vLLM's OpenAI-compatible API.
+"""
+__version__ = "0.1.0"
+from vllm_judge.judge import Judge
+from vllm_judge.models import (
+    JudgeConfig,
+    EvaluationResult,
+    Metric,
+    BatchResult,
+    TemplateEngine
+)
+from vllm_judge.templating import TemplateProcessor
+from vllm_judge.metrics import (
+    # General metrics
+    HELPFULNESS,
+    ACCURACY,
+    CLARITY,
+    CONCISENESS,
+    RELEVANCE,
+    # Safety metrics
+    SAFETY,
+    TOXICITY,
+    # Code metrics
+    CODE_QUALITY,
+    CODE_SECURITY,
+    # Content metrics
+    CREATIVITY,
+    PROFESSIONALISM,
+    EDUCATIONAL_VALUE,
+    # Comparison metrics
+    PREFERENCE,
+    # Binary metrics
+    APPROPRIATE,
+    FACTUAL,
+    # Domain metrics
+    MEDICAL_ACCURACY,
+    LEGAL_APPROPRIATENESS,
+    # Utility
+    BUILTIN_METRICS,
+    # Template metrics
+    EDUCATIONAL_CONTENT_TEMPLATE,
+    CODE_REVIEW_TEMPLATE,
+    CUSTOMER_SERVICE_TEMPLATE,
+    WRITING_QUALITY_TEMPLATE,
+    PRODUCT_REVIEW_TEMPLATE,
+    MEDICAL_INFO_TEMPLATE,
+    API_DOCS_TEMPLATE,
+)
+from vllm_judge.exceptions import (
+    VLLMJudgeError,
+    ConfigurationError,
+    ConnectionError,
+    TimeoutError,
+    ParseError,
+    MetricNotFoundError,
+    InvalidInputError,
+    RetryExhaustedError
+)
+__all__ = [
+    # Main classes
+    "Judge",
+    "JudgeConfig",
+    "EvaluationResult",
+    "Metric",
+    "BatchResult",
+    "TemplateEngine",
+    "TemplateProcessor",
+    # Metrics
+    "HELPFULNESS",
+    "ACCURACY",
+    "CLARITY",
+    "CONCISENESS",
+    "RELEVANCE",
+    "SAFETY",
+    "TOXICITY",
+    "CODE_QUALITY",
+    "CODE_SECURITY",
+    "CREATIVITY",
+    "PROFESSIONALISM",
+    "EDUCATIONAL_VALUE",
+    "PREFERENCE",
+    "APPROPRIATE",
+    "FACTUAL",
+    "MEDICAL_ACCURACY",
+    "LEGAL_APPROPRIATENESS",
+    "BUILTIN_METRICS",
+    "EDUCATIONAL_CONTENT_TEMPLATE",
+    "CODE_REVIEW_TEMPLATE",
+    "CUSTOMER_SERVICE_TEMPLATE",
+    "WRITING_QUALITY_TEMPLATE",
+    "PRODUCT_REVIEW_TEMPLATE",
+    "MEDICAL_INFO_TEMPLATE",
+    "API_DOCS_TEMPLATE",
+    # Exceptions
+    "VLLMJudgeError",
+    "ConfigurationError",
+    "ConnectionError",
+    "TimeoutError",
+    "ParseError",
+    "MetricNotFoundError",
+    "InvalidInputError",
+    "RetryExhaustedError"
+]

vllm_judge-0.1.0/src/vllm_judge/api/__init__.py ADDED Viewed

@@ -0,0 +1,39 @@
+"""
+API module for vLLM Judge.
+"""
+from vllm_judge.api.server import app, create_app, start_server
+from vllm_judge.api.client import JudgeClient
+from vllm_judge.api.models import (
+    EvaluateRequest,
+    BatchEvaluateRequest,
+    AsyncBatchRequest,
+    EvaluationResponse,
+    BatchResponse,
+    AsyncBatchResponse,
+    JobStatusResponse,
+    MetricInfo,
+    HealthResponse,
+    ErrorResponse
+)
+__all__ = [
+    # Server
+    "app",
+    "create_app",
+    "start_server",
+    # Client
+    "JudgeClient",
+    # Models
+    "EvaluateRequest",
+    "BatchEvaluateRequest",
+    "AsyncBatchRequest",
+    "EvaluationResponse",
+    "BatchResponse",
+    "AsyncBatchResponse",
+    "JobStatusResponse",
+    "MetricInfo",
+    "HealthResponse",
+    "ErrorResponse"
+]