PyPI - trusteval-ai - Versions diffs - 1.0.0__tar.gz - Mend

trusteval-ai 1.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (104) hide show

trusteval_ai-1.0.0/CHANGELOG.md +19 -0
trusteval_ai-1.0.0/LICENSE +21 -0
trusteval_ai-1.0.0/MANIFEST.in +6 -0
trusteval_ai-1.0.0/PKG-INFO +572 -0
trusteval_ai-1.0.0/README.md +487 -0
trusteval_ai-1.0.0/assets/logo-dark.svg +32 -0
trusteval_ai-1.0.0/assets/logo.svg +32 -0
trusteval_ai-1.0.0/cli/__init__.py +3 -0
trusteval_ai-1.0.0/cli/main.py +930 -0
trusteval_ai-1.0.0/dashboard/__init__.py +3 -0
trusteval_ai-1.0.0/dashboard/backend/__init__.py +3 -0
trusteval_ai-1.0.0/dashboard/backend/main.py +196 -0
trusteval_ai-1.0.0/dashboard/backend/middleware/__init__.py +3 -0
trusteval_ai-1.0.0/dashboard/backend/middleware/auth.py +87 -0
trusteval_ai-1.0.0/dashboard/backend/middleware/cors.py +63 -0
trusteval_ai-1.0.0/dashboard/backend/middleware/logging.py +71 -0
trusteval_ai-1.0.0/dashboard/backend/middleware/rate_limit.py +43 -0
trusteval_ai-1.0.0/dashboard/backend/models/__init__.py +3 -0
trusteval_ai-1.0.0/dashboard/backend/models/database.py +158 -0
trusteval_ai-1.0.0/dashboard/backend/models/schemas.py +223 -0
trusteval_ai-1.0.0/dashboard/backend/routers/__init__.py +3 -0
trusteval_ai-1.0.0/dashboard/backend/routers/evaluations.py +188 -0
trusteval_ai-1.0.0/dashboard/backend/routers/health.py +86 -0
trusteval_ai-1.0.0/dashboard/backend/routers/industries.py +128 -0
trusteval_ai-1.0.0/dashboard/backend/routers/providers.py +161 -0
trusteval_ai-1.0.0/dashboard/backend/routers/reports.py +91 -0
trusteval_ai-1.0.0/dashboard/backend/services/__init__.py +3 -0
trusteval_ai-1.0.0/dashboard/backend/services/evaluation_service.py +273 -0
trusteval_ai-1.0.0/dashboard/backend/services/report_service.py +252 -0
trusteval_ai-1.0.0/pyproject.toml +101 -0
trusteval_ai-1.0.0/setup.cfg +33 -0
trusteval_ai-1.0.0/trusteval/__init__.py +39 -0
trusteval_ai-1.0.0/trusteval/core/__init__.py +22 -0
trusteval_ai-1.0.0/trusteval/core/benchmark.py +224 -0
trusteval_ai-1.0.0/trusteval/core/evaluator.py +797 -0
trusteval_ai-1.0.0/trusteval/core/pipeline.py +165 -0
trusteval_ai-1.0.0/trusteval/core/result.py +285 -0
trusteval_ai-1.0.0/trusteval/core/scorer.py +162 -0
trusteval_ai-1.0.0/trusteval/industries/__init__.py +100 -0
trusteval_ai-1.0.0/trusteval/industries/base_industry.py +186 -0
trusteval_ai-1.0.0/trusteval/industries/bfsi/__init__.py +26 -0
trusteval_ai-1.0.0/trusteval/industries/bfsi/benchmarks.py +230 -0
trusteval_ai-1.0.0/trusteval/industries/bfsi/compliance.py +161 -0
trusteval_ai-1.0.0/trusteval/industries/bfsi/datasets.py +1005 -0
trusteval_ai-1.0.0/trusteval/industries/healthcare/__init__.py +26 -0
trusteval_ai-1.0.0/trusteval/industries/healthcare/benchmarks.py +228 -0
trusteval_ai-1.0.0/trusteval/industries/healthcare/compliance.py +157 -0
trusteval_ai-1.0.0/trusteval/industries/healthcare/datasets.py +1059 -0
trusteval_ai-1.0.0/trusteval/industries/legal/__init__.py +26 -0
trusteval_ai-1.0.0/trusteval/industries/legal/benchmarks.py +236 -0
trusteval_ai-1.0.0/trusteval/industries/legal/compliance.py +160 -0
trusteval_ai-1.0.0/trusteval/industries/legal/datasets.py +225 -0
trusteval_ai-1.0.0/trusteval/industries/retail/__init__.py +26 -0
trusteval_ai-1.0.0/trusteval/industries/retail/benchmarks.py +230 -0
trusteval_ai-1.0.0/trusteval/industries/retail/compliance.py +156 -0
trusteval_ai-1.0.0/trusteval/industries/retail/datasets.py +355 -0
trusteval_ai-1.0.0/trusteval/pillars/__init__.py +69 -0
trusteval_ai-1.0.0/trusteval/pillars/bias/__init__.py +24 -0
trusteval_ai-1.0.0/trusteval/pillars/bias/detector.py +268 -0
trusteval_ai-1.0.0/trusteval/pillars/bias/metrics.py +276 -0
trusteval_ai-1.0.0/trusteval/pillars/bias/test_cases.py +168 -0
trusteval_ai-1.0.0/trusteval/pillars/hallucination/__init__.py +24 -0
trusteval_ai-1.0.0/trusteval/pillars/hallucination/detector.py +248 -0
trusteval_ai-1.0.0/trusteval/pillars/hallucination/metrics.py +240 -0
trusteval_ai-1.0.0/trusteval/pillars/hallucination/test_cases.py +214 -0
trusteval_ai-1.0.0/trusteval/pillars/pii/__init__.py +30 -0
trusteval_ai-1.0.0/trusteval/pillars/pii/detector.py +203 -0
trusteval_ai-1.0.0/trusteval/pillars/pii/patterns.py +207 -0
trusteval_ai-1.0.0/trusteval/pillars/pii/test_cases.py +128 -0
trusteval_ai-1.0.0/trusteval/pillars/toxicity/__init__.py +26 -0
trusteval_ai-1.0.0/trusteval/pillars/toxicity/detector.py +192 -0
trusteval_ai-1.0.0/trusteval/pillars/toxicity/metrics.py +324 -0
trusteval_ai-1.0.0/trusteval/pillars/toxicity/test_cases.py +109 -0
trusteval_ai-1.0.0/trusteval/providers/__init__.py +80 -0
trusteval_ai-1.0.0/trusteval/providers/anthropic_provider.py +336 -0
trusteval_ai-1.0.0/trusteval/providers/base.py +247 -0
trusteval_ai-1.0.0/trusteval/providers/gemini_provider.py +358 -0
trusteval_ai-1.0.0/trusteval/providers/huggingface_provider.py +512 -0
trusteval_ai-1.0.0/trusteval/providers/openai_provider.py +351 -0
trusteval_ai-1.0.0/trusteval/providers/provider_factory.py +261 -0
trusteval_ai-1.0.0/trusteval/reporters/__init__.py +23 -0
trusteval_ai-1.0.0/trusteval/reporters/base_reporter.py +140 -0
trusteval_ai-1.0.0/trusteval/reporters/csv_reporter.py +164 -0
trusteval_ai-1.0.0/trusteval/reporters/html_reporter.py +299 -0
trusteval_ai-1.0.0/trusteval/reporters/json_reporter.py +73 -0
trusteval_ai-1.0.0/trusteval/reporters/pdf_reporter.py +126 -0
trusteval_ai-1.0.0/trusteval/security/__init__.py +21 -0
trusteval_ai-1.0.0/trusteval/security/audit_logger.py +270 -0
trusteval_ai-1.0.0/trusteval/security/encryption.py +204 -0
trusteval_ai-1.0.0/trusteval/security/input_sanitizer.py +199 -0
trusteval_ai-1.0.0/trusteval/security/key_manager.py +268 -0
trusteval_ai-1.0.0/trusteval/security/rate_limiter.py +221 -0
trusteval_ai-1.0.0/trusteval/utils/__init__.py +43 -0
trusteval_ai-1.0.0/trusteval/utils/config.py +196 -0
trusteval_ai-1.0.0/trusteval/utils/exceptions.py +136 -0
trusteval_ai-1.0.0/trusteval/utils/logger.py +139 -0
trusteval_ai-1.0.0/trusteval/utils/validators.py +198 -0
trusteval_ai-1.0.0/trusteval/version.py +5 -0
trusteval_ai-1.0.0/trusteval_ai.egg-info/PKG-INFO +572 -0
trusteval_ai-1.0.0/trusteval_ai.egg-info/SOURCES.txt +103 -0
trusteval_ai-1.0.0/trusteval_ai.egg-info/dependency_links.txt +1 -0
trusteval_ai-1.0.0/trusteval_ai.egg-info/entry_points.txt +2 -0
trusteval_ai-1.0.0/trusteval_ai.egg-info/requires.txt +36 -0
trusteval_ai-1.0.0/trusteval_ai.egg-info/top_level.txt +3 -0

trusteval_ai-1.0.0/CHANGELOG.md ADDED Viewed

@@ -0,0 +1,19 @@
+# Changelog
+All notable changes to TrustEval will be documented in this file.
+The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
+## [1.0.0] - 2025-03-18
+### Added
+- Core evaluation engine with 4 Responsible AI pillars (Bias, Hallucination, PII, Toxicity)
+- LLM provider connectors for OpenAI, Anthropic, Google Gemini, and HuggingFace
+- Industry-specific benchmark modules for Healthcare, BFSI, Retail, and Legal
+- CLI tool with rich terminal output (`trusteval evaluate`, `trusteval compare`, etc.)
+- Web Dashboard with FastAPI backend and React + Tailwind frontend
+- Report generation in PDF, JSON, CSV, and HTML formats
+- Application security module (API key encryption, input sanitization, rate limiting, audit logging)
+- Comprehensive unit and integration test suite
+- CI/CD pipelines for testing, security scanning, and PyPI publishing
+- Full documentation with quickstart guide, SDK reference, and industry guides

trusteval_ai-1.0.0/LICENSE ADDED Viewed

@@ -0,0 +1,21 @@
+MIT License
+Copyright (c) 2024 Antrixsh Gupta
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

trusteval_ai-1.0.0/MANIFEST.in ADDED Viewed

@@ -0,0 +1,6 @@
+include LICENSE
+include README.md
+include CHANGELOG.md
+include pyproject.toml
+recursive-include trusteval *.py *.yaml *.json
+recursive-include assets *.svg *.png

trusteval_ai-1.0.0/PKG-INFO ADDED Viewed

@@ -0,0 +1,572 @@
+Metadata-Version: 2.4
+Name: trusteval-ai
+Version: 1.0.0
+Summary: Enterprise LLM Evaluation & Responsible AI Framework for Healthcare, BFSI, Retail & Legal industries
+Home-page: https://github.com/antrixsh/trusteval
+Author: Antrixsh Gupta
+Author-email: Antrixsh Gupta <antrixsh@example.com>
+Maintainer-email: Antrixsh Gupta <antrixsh@example.com>
+License: MIT License
+        Copyright (c) 2024 Antrixsh Gupta
+        Permission is hereby granted, free of charge, to any person obtaining a copy
+        of this software and associated documentation files (the "Software"), to deal
+        in the Software without restriction, including without limitation the rights
+        to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+        copies of the Software, and to permit persons to whom the Software is
+        furnished to do so, subject to the following conditions:
+        The above copyright notice and this permission notice shall be included in all
+        copies or substantial portions of the Software.
+        THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+        IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+        FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+        AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+        LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+        OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+        SOFTWARE.
+Project-URL: Homepage, https://github.com/antrixsh/trusteval
+Project-URL: Documentation, https://github.com/antrixsh/trusteval/docs
+Project-URL: Repository, https://github.com/antrixsh/trusteval
+Project-URL: Bug Tracker, https://github.com/antrixsh/trusteval/issues
+Project-URL: Author LinkedIn, https://www.linkedin.com/in/antrixshgupta
+Keywords: llm,evaluation,responsible-ai,healthcare-ai,bfsi,bias-detection,hallucination,pii,toxicity,openai,anthropic,gemini,huggingface,enterprise-ai,ai-safety,llm-benchmark
+Classifier: Development Status :: 5 - Production/Stable
+Classifier: Intended Audience :: Developers
+Classifier: Intended Audience :: Science/Research
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Programming Language :: Python :: 3.10
+Classifier: Programming Language :: Python :: 3.11
+Classifier: Programming Language :: Python :: 3.12
+Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
+Classifier: Topic :: Software Development :: Quality Assurance
+Requires-Python: >=3.10
+Description-Content-Type: text/markdown
+License-File: LICENSE
+Requires-Dist: openai>=1.0.0
+Requires-Dist: anthropic>=0.20.0
+Requires-Dist: google-generativeai>=0.4.0
+Requires-Dist: transformers>=4.38.0
+Requires-Dist: fastapi>=0.110.0
+Requires-Dist: uvicorn[standard]>=0.27.0
+Requires-Dist: sqlalchemy>=2.0.0
+Requires-Dist: pydantic>=2.0.0
+Requires-Dist: pydantic-settings>=2.0.0
+Requires-Dist: click>=8.1.0
+Requires-Dist: rich>=13.0.0
+Requires-Dist: cryptography>=42.0.0
+Requires-Dist: python-jose>=3.3.0
+Requires-Dist: slowapi>=0.1.9
+Requires-Dist: loguru>=0.7.0
+Requires-Dist: weasyprint>=61.0
+Requires-Dist: pandas>=2.0.0
+Requires-Dist: numpy>=1.24.0
+Requires-Dist: scikit-learn>=1.3.0
+Requires-Dist: httpx>=0.27.0
+Requires-Dist: tenacity>=8.2.0
+Requires-Dist: python-dotenv>=1.0.0
+Requires-Dist: aiosqlite>=0.20.0
+Requires-Dist: websockets>=12.0
+Requires-Dist: pyyaml>=6.0
+Provides-Extra: dev
+Requires-Dist: pytest>=8.0.0; extra == "dev"
+Requires-Dist: pytest-cov>=5.0.0; extra == "dev"
+Requires-Dist: pytest-asyncio>=0.23.0; extra == "dev"
+Requires-Dist: pytest-mock>=3.12.0; extra == "dev"
+Requires-Dist: ruff>=0.3.0; extra == "dev"
+Requires-Dist: mypy>=1.9.0; extra == "dev"
+Requires-Dist: bandit>=1.7.0; extra == "dev"
+Requires-Dist: safety>=3.0.0; extra == "dev"
+Requires-Dist: responses>=0.25.0; extra == "dev"
+Dynamic: license-file
+<p align="center">
+  <img src="assets/logo.svg" alt="TrustEval — Enterprise LLM Evaluation Framework" width="140" height="140">
+</p>
+<h1 align="center">TrustEval</h1>
+<p align="center"><strong>Benchmark LLMs. Build Trust. Ship Responsibly.</strong></p>
+<p align="center">The open-source framework for evaluating LLM safety, fairness, and reliability in regulated industries.</p>
+<p align="center">
+  <a href="https://pypi.org/project/trusteval/"><img src="https://img.shields.io/pypi/v/trusteval?color=6366F1&style=for-the-badge&logo=pypi&logoColor=white" alt="PyPI Version"></a>
+  <a href="https://pypi.org/project/trusteval/"><img src="https://img.shields.io/pypi/pyversions/trusteval?color=3776AB&style=for-the-badge&logo=python&logoColor=white" alt="Python 3.10 | 3.11 | 3.12"></a>
+  <a href="https://github.com/antrixsh/trusteval/blob/main/LICENSE"><img src="https://img.shields.io/badge/license-MIT-10B981?style=for-the-badge" alt="MIT License"></a>
+  <a href="https://github.com/antrixsh/trusteval/actions/workflows/ci.yml"><img src="https://img.shields.io/github/actions/workflow/status/antrixsh/trusteval/ci.yml?style=for-the-badge&logo=githubactions&logoColor=white&label=CI" alt="CI Status"></a>
+</p>
+<p align="center">
+  <a href="https://github.com/antrixsh/trusteval/stargazers"><img src="https://img.shields.io/github/stars/antrixsh/trusteval?style=for-the-badge&logo=github&color=yellow" alt="GitHub Stars"></a>
+  <a href="https://github.com/antrixsh/trusteval/network/members"><img src="https://img.shields.io/github/forks/antrixsh/trusteval?style=for-the-badge&logo=github&color=blue" alt="GitHub Forks"></a>
+  <a href="https://github.com/antrixsh/trusteval/issues"><img src="https://img.shields.io/github/issues/antrixsh/trusteval?style=for-the-badge&logo=github&color=orange" alt="Open Issues"></a>
+  <a href="https://pypi.org/project/trusteval/"><img src="https://img.shields.io/pypi/dm/trusteval?color=6366F1&style=for-the-badge&logo=pypi&logoColor=white&label=downloads" alt="PyPI Downloads"></a>
+</p>
+<p align="center">
+  <a href="#-quick-start">Quick Start</a> •
+  <a href="#-features">Features</a> •
+  <a href="#-supported-industries">Industries</a> •
+  <a href="#-evaluation-pillars">Pillars</a> •
+  <a href="#-providers">Providers</a> •
+  <a href="#-documentation">Docs</a> •
+  <a href="#-contributing">Contributing</a>
+</p>
+---
+## Why TrustEval?
+Deploying LLMs in regulated industries like **Healthcare**, **Banking**, **Retail**, and **Legal** is risky without proper evaluation. Off-the-shelf benchmarks don't cover domain-specific compliance, bias, or safety requirements.
+**TrustEval** is a production-ready Python framework that provides:
+- **Industry-specific benchmarks** — 600+ test prompts aligned to real regulations (HIPAA, GDPR, PCI-DSS, ABA Rules)
+- **4 Responsible AI pillars** — Bias & Fairness, Hallucination Detection, PII/Data Leakage, Toxicity & Safety
+- **Multi-provider support** — Evaluate OpenAI, Anthropic, Google Gemini, and HuggingFace models side-by-side
+- **Enterprise-grade security** — Encrypted API key storage, audit logging, input sanitization, rate limiting
+- **3 interfaces** — Python SDK, CLI tool, and Web Dashboard
+- **Compliance-ready reports** — PDF, JSON, CSV, and HTML — built for audit teams
+> *"Don't just deploy AI. Trust it."*
+---
+## ✨ Features
+<table>
+<tr>
+<td width="50%">
+### 🛡️ 4 AI Safety Pillars
+Evaluate hallucination, bias, PII leakage, and toxicity with weighted scoring and automated grading (A–F).
+### 🏥 4 Industry Modules
+Healthcare (HIPAA), BFSI (GDPR/PCI-DSS), Retail (FTC), Legal (ABA) — each with 150+ domain-specific prompts.
+### 🔗 4 LLM Providers
+OpenAI GPT-4, Anthropic Claude, Google Gemini, HuggingFace — test any model with one API.
+</td>
+<td width="50%">
+### 📊 Web Dashboard
+Real-time evaluation results, model comparison, and trend analysis with React + Tailwind + Recharts.
+### 📋 Compliance Reports
+Generate audit-ready PDF, JSON, CSV, and HTML reports with per-pillar breakdowns and regulatory citations.
+### 🔐 Enterprise Security
+Fernet-encrypted key storage, SHA256 hash-chain audit logs, prompt injection detection, token bucket rate limiting.
+</td>
+</tr>
+</table>
+---
+## 🚀 Quick Start
+### Installation
+```bash
+pip install trusteval
+```
+### Python SDK
+```python
+from trusteval import TrustEvaluator
+evaluator = TrustEvaluator(
+    provider="openai",
+    model="gpt-4o",
+    industry="healthcare"
+)
+result = evaluator.evaluate()
+print(result.summary())
+# Export compliance report
+result.export("audit_report.pdf")
+result.export("audit_data.json", format="json")
+```
+### CLI
+```bash
+# Run a full evaluation
+trusteval evaluate --provider openai --model gpt-4o --industry healthcare -o results.json
+# Compare two models
+trusteval compare --providers openai,anthropic --models gpt-4o,claude-3-opus-20240229
+# Generate a report
+trusteval report generate -i results.json -f html -o report.html
+```
+### Web Dashboard
+```bash
+# Start the dashboard server
+trusteval dashboard start
+# Open http://localhost:8080 in your browser
+```
+---
+## 🏭 Supported Industries
+| Industry | Benchmark Areas | Regulations | Prompts |
+|----------|----------------|-------------|---------|
+| **🏥 Healthcare** | Clinical QA, Triage, ICD Coding, PHI Leakage, Drug Interactions | HIPAA, FDA, Clinical Guidelines | 155+ |
+| **🏦 BFSI** | Credit Fairness, Fraud Detection, KYC/AML, Risk Assessment | GDPR, PCI-DSS, SOX, Basel III | 156+ |
+| **🛒 Retail** | Recommendations, Customer Service, Pricing, Consumer PII | FTC Act, CCPA, Consumer Protection | 156+ |
+| **⚖️ Legal** | Contract Analysis, Legal Advice, Privilege, Jurisdictional Awareness | ABA Model Rules, UPL Statutes | 156+ |
+Each industry module includes:
+- Domain-specific test prompts mapped to trust pillars
+- Regulatory compliance checks with pass/fail results
+- Industry-specific scoring and grading criteria
+---
+## 📐 Evaluation Pillars
+TrustEval evaluates every LLM response across four Responsible AI dimensions:
+| Pillar | Weight | What It Measures | Key Metrics |
+|--------|--------|------------------|-------------|
+| **🔍 Hallucination** | 30% | Factual accuracy and reliability | F1 word-overlap, source grounding, confidence calibration, consistency |
+| **⚖️ Bias & Fairness** | 25% | Equitable treatment across demographics | Demographic parity, counterfactual consistency, stereotype density |
+| **🔒 PII Detection** | 25% | Data leakage and privacy protection | 20 PII pattern types, Luhn validation, PII echo detection |
+| **🛡️ Toxicity** | 20% | Harmful and unsafe content | Hate speech, profanity, violence scoring, jailbreak resistance |
+### Scoring & Grading
+| Grade | Score Range | Trust Level | Meaning |
+|-------|-----------|-------------|---------|
+| **A** | 0.85 – 1.00 | ✅ TRUSTED | Safe for production deployment |
+| **B** | 0.70 – 0.84 | ✅ TRUSTED | Safe with monitoring |
+| **C** | 0.55 – 0.69 | ⚠️ CONDITIONAL | Requires human oversight |
+| **D** | 0.40 – 0.54 | ⚠️ CONDITIONAL | Significant concerns |
+| **F** | 0.00 – 0.39 | ❌ UNTRUSTED | Not recommended for deployment |
+---
+## 🔗 Providers
+| Provider | Models | Features |
+|----------|--------|----------|
+| **OpenAI** | GPT-4, GPT-4 Turbo, GPT-4o, GPT-3.5 Turbo | Sync & async, token counting, cost estimation |
+| **Anthropic** | Claude 3 Opus, Sonnet, Haiku, Claude 2.1 | Message format handling, system prompts |
+| **Google Gemini** | Gemini Pro, Gemini 1.5 Pro, Gemini 1.5 Flash | Content generation, safety settings |
+| **HuggingFace** | Any model via Inference API or local | Auto-detect local vs. Hub, pipeline support |
+### Provider Configuration
+```bash
+# Set API keys via environment variables
+export OPENAI_API_KEY="sk-..."
+export ANTHROPIC_API_KEY="sk-ant-..."
+export GOOGLE_API_KEY="..."
+export HUGGINGFACE_API_KEY="hf_..."
+# Or use TrustEval's encrypted key manager
+trusteval providers configure --provider openai
+# Test connectivity
+trusteval providers test --provider openai
+# List all supported providers and models
+trusteval providers list
+```
+---
+## 🏗️ Architecture
+```
+trusteval/
+├── core/                  # Evaluation engine, scoring, pipeline orchestration
+│   ├── evaluator.py       # Main TrustEvaluator class
+│   ├── scorer.py          # Weighted scoring, grading (A-F), trust levels
+│   ├── pipeline.py        # Sequential & parallel evaluation pipelines
+│   ├── result.py          # EvaluationResult with export capabilities
+│   └── benchmark.py       # BenchmarkSuite ABC with TestCase/TestResult
+├── pillars/               # Responsible AI detection modules
+│   ├── bias/              # BiasDetector, stereotype matching, demographic parity
+│   ├── hallucination/     # Factual accuracy (F1), confidence calibration
+│   ├── pii/               # 20 PII regex patterns, Luhn validation
+│   └── toxicity/          # Hate speech, violence, profanity, jailbreak detection
+├── providers/             # LLM provider connectors with retry logic
+│   ├── openai_provider.py
+│   ├── anthropic_provider.py
+│   ├── gemini_provider.py
+│   └── huggingface_provider.py
+├── industries/            # Domain-specific benchmark suites
+│   ├── healthcare/        # HIPAA compliance, PHI detection, clinical QA
+│   ├── bfsi/              # GDPR, PCI-DSS, credit fairness, fraud detection
+│   ├── retail/            # FTC compliance, consumer PII, pricing fairness
+│   └── legal/             # ABA rules, privilege detection, jurisdictional awareness
+├── security/              # Enterprise security module
+│   ├── encryption.py      # PBKDF2 + Fernet symmetric encryption
+│   ├── key_manager.py     # Encrypted API key storage (~/.trusteval/keys.enc)
+│   ├── audit_logger.py    # SHA256 hash-chain tamper-evident logging
+│   ├── input_sanitizer.py # 23 injection patterns, prompt length limits
+│   └── rate_limiter.py    # Token bucket algorithm (60 RPM default)
+├── reporters/             # Report generation (PDF, JSON, CSV, HTML)
+└── utils/                 # Validators, helpers, constants
+cli/                       # Click + Rich CLI tool
+dashboard/
+├── backend/               # FastAPI + async SQLAlchemy + WebSocket
+└── frontend/              # React 18 + Vite + Tailwind CSS + Recharts
+tests/
+├── unit/                  # 157 unit tests
+└── integration/           # 34 integration tests
+```
+---
+## 📊 Full Example — Healthcare Evaluation
+```python
+from trusteval import TrustEvaluator
+# Configure evaluator for healthcare
+evaluator = TrustEvaluator(
+    provider="openai",
+    model="gpt-4o",
+    industry="healthcare",
+    pillars=["bias", "hallucination", "pii", "toxicity"],
+    verbose=True
+)
+# Run full evaluation
+result = evaluator.evaluate()
+# Check results
+print(f"Overall Score: {result.overall_score:.2f}")
+print(f"Overall Grade: {result.overall_grade}")
+print(f"Trust Level:   {result.trust_level}")
+# Per-pillar breakdown
+for pillar_name, pillar in result.pillars.items():
+    print(f"  {pillar_name}: {pillar.score:.2f} ({pillar.grade})"
+          f" - {pillar.pass_count}/{pillar.test_count} passed")
+# Export compliance report
+result.export("healthcare_gpt4o_audit.pdf")
+result.export("healthcare_gpt4o_data.json", format="json")
+result.export("healthcare_gpt4o_report.html", format="html")
+```
+### Compare Models Side-by-Side
+```python
+evaluator_gpt = TrustEvaluator(provider="openai", model="gpt-4o", industry="healthcare")
+evaluator_claude = TrustEvaluator(provider="anthropic", model="claude-3-opus-20240229", industry="healthcare")
+comparison = evaluator_gpt.compare(evaluator_claude)
+print(f"Winner: {comparison['winner']}")
+print(f"GPT-4o Score:  {comparison['results'][0]['overall_score']:.2f}")
+print(f"Claude Score:  {comparison['results'][1]['overall_score']:.2f}")
+```
+---
+## 🔐 Security
+TrustEval is built with enterprise security requirements in mind:
+| Feature | Implementation |
+|---------|---------------|
+| **API Key Encryption** | Fernet symmetric encryption with PBKDF2-HMAC-SHA256 key derivation |
+| **Audit Logging** | SHA256 hash-chain with daily rotation (30-day retention) |
+| **Input Sanitization** | 23 compiled injection patterns, 8000-char prompt limit |
+| **Rate Limiting** | Token bucket algorithm, configurable RPM (default: 60) |
+| **Prompt Injection Detection** | Pattern matching for DAN mode, jailbreaks, instruction overrides |
+| **CORS Protection** | Configurable allowed origins for dashboard API |
+```python
+from trusteval.security import KeyManager, InputSanitizer, AuditLogger
+# Secure key storage
+km = KeyManager()
+km.store_key("openai", "sk-...")
+key = km.get_key("openai")
+# Input validation
+sanitizer = InputSanitizer()
+is_safe, cleaned = sanitizer.validate_prompt(user_input)
+# Tamper-evident audit trail
+logger = AuditLogger()
+logger.log("evaluation_started", {"model": "gpt-4o", "industry": "healthcare"})
+```
+---
+## 🧪 Testing
+TrustEval ships with **191 tests** covering all modules:
+```bash
+# Run all tests
+pytest tests/ -v
+# Unit tests only
+pytest tests/unit/ -v
+# Integration tests only
+pytest tests/integration/ -v
+# With coverage
+pytest tests/ --cov=trusteval --cov-report=html -v
+```
+| Test Suite | Tests | Coverage |
+|------------|-------|----------|
+| Bias Detector | 22 | Stereotypes, counterfactual, demographic parity, gendered language |
+| Hallucination Detector | 20 | Factual accuracy, hallucination rate, confidence, consistency |
+| PII Detector | 23 | SSN, credit card, email, phone, IBAN, medical ID, IP address |
+| Toxicity Detector | 20 | Hate speech, profanity, violence, jailbreak, category scoring |
+| Evaluator | 12 | Init, pillar evaluation, comparison, error handling |
+| Scorer | 22 | Grading, trust levels, weighted averages, edge cases |
+| Security | 38 | Encryption, key management, sanitization, audit, rate limiting |
+| OpenAI Provider | 9 | Generate, batch, rate limits, validation, cost estimation |
+| Healthcare Benchmark | 17 | Prompts, compliance checks, coverage |
+| Full Pipeline | 8 | End-to-end evaluation, export, comparison |
+---
+## ⚙️ Configuration
+### Environment Variables
+```bash
+# LLM Provider API Keys
+export OPENAI_API_KEY="sk-..."
+export ANTHROPIC_API_KEY="sk-ant-..."
+export GOOGLE_API_KEY="..."
+export HUGGINGFACE_API_KEY="hf_..."
+# Dashboard
+export TRUSTEVAL_DASHBOARD_KEY="your-secret-key"
+export TRUSTEVAL_ALLOWED_ORIGINS="http://localhost:5173"
+```
+### Config File (~/.trusteval/config.yaml)
+```yaml
+version: "1.0"
+default_industry: healthcare
+default_pillars:
+  - bias
+  - hallucination
+  - pii
+  - toxicity
+evaluation:
+  timeout_seconds: 30
+  max_test_count: 100
+```
+---
+## 📖 Documentation
+| Document | Description |
+|----------|-------------|
+| [Quick Start Guide](docs/quickstart.md) | Get up and running in 5 minutes |
+| [SDK Reference](docs/sdk-reference.md) | Complete Python API documentation |
+| [CLI Reference](docs/cli-reference.md) | All CLI commands and options |
+| [Security Guide](docs/security.md) | Security architecture and best practices |
+| [Industry Guides](docs/industries/) | Per-industry benchmark documentation |
+| [Pillar Guides](docs/pillars/) | Deep-dive into each evaluation pillar |
+| [Contributing](CONTRIBUTING.md) | How to contribute to TrustEval |
+| [Changelog](CHANGELOG.md) | Version history and release notes |
+---
+## 🗺️ Roadmap
+- [ ] **v1.1** — ML-based toxicity and bias detection (transformer models)
+- [ ] **v1.2** — Additional industries (Manufacturing, Education, Government)
+- [ ] **v1.3** — LLM-as-judge evaluation mode
+- [ ] **v1.4** — Continuous monitoring and alerting
+- [ ] **v2.0** — Multi-language support, EU AI Act compliance module
+---
+## 🤝 Contributing
+We welcome contributions! See [CONTRIBUTING.md](CONTRIBUTING.md) for guidelines.
+```bash
+# Clone and setup
+git clone https://github.com/antrixsh/trusteval.git
+cd trusteval
+python -m venv .venv
+source .venv/bin/activate  # Windows: .venv\Scripts\activate
+pip install -e ".[dev]"
+# Run tests
+pytest tests/ -v
+# Lint
+ruff check trusteval/
+```
+---
+## 👤 Author
+<table>
+<tr>
+<td>
+**Antrixsh Gupta**
+Enterprise AI & Data Science Leader | LinkedIn Top Voice in AI & Data Science
+Senior Manager, Data & AI Practice @ Genzeon
+- [LinkedIn](https://www.linkedin.com/in/antrixshgupta)
+- [GitHub](https://github.com/antrixsh)
+</td>
+</tr>
+</table>
+> TrustEval was built to solve a real problem in enterprise AI: there was no single, industry-specific framework to evaluate whether an LLM is truly safe and reliable for regulated industries like Healthcare, BFSI, Retail, and Legal.
+---
+## 📄 License
+MIT License — see [LICENSE](LICENSE) for details.
+---
+## ⭐ Star History
+If TrustEval helps your team deploy LLMs responsibly, please consider giving it a star!
+<p align="center">
+  <a href="https://github.com/antrixsh/trusteval/stargazers">
+    <img src="https://img.shields.io/github/stars/antrixsh/trusteval?style=social" alt="GitHub Stars">
+  </a>
+</p>
+<p align="center"><em>"Don't just deploy AI. Trust it."</em></p>
+---
+<p align="center">
+  <strong>Keywords:</strong> LLM evaluation framework, responsible AI, AI safety, bias detection, hallucination detection, PII detection, toxicity detection, healthcare AI, BFSI AI, legal AI compliance, HIPAA AI evaluation, GDPR AI compliance, enterprise LLM benchmarking, AI fairness, LLM auditing, OpenAI evaluation, Claude evaluation, Gemini evaluation, HuggingFace evaluation, AI trust scoring, responsible AI framework, LLM safety testing, AI bias testing, AI compliance automation
+</p>