testrelic-deepeval 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- testrelic_deepeval-0.1.0/.github/workflows/ci.yml +36 -0
- testrelic_deepeval-0.1.0/.github/workflows/deepeval-compat.yml +49 -0
- testrelic_deepeval-0.1.0/.github/workflows/publish-prod.yml +31 -0
- testrelic_deepeval-0.1.0/.github/workflows/publish-stage.yml +34 -0
- testrelic_deepeval-0.1.0/.gitignore +74 -0
- testrelic_deepeval-0.1.0/.python-version +1 -0
- testrelic_deepeval-0.1.0/CLAUDE.md +110 -0
- testrelic_deepeval-0.1.0/LICENSE +21 -0
- testrelic_deepeval-0.1.0/PKG-INFO +184 -0
- testrelic_deepeval-0.1.0/README.md +145 -0
- testrelic_deepeval-0.1.0/docs/architecture.md +125 -0
- testrelic_deepeval-0.1.0/docs/migration-from-confident-ai.md +93 -0
- testrelic_deepeval-0.1.0/docs/quickstart.md +89 -0
- testrelic_deepeval-0.1.0/pyproject.toml +73 -0
- testrelic_deepeval-0.1.0/ruff.toml +9 -0
- testrelic_deepeval-0.1.0/src/testrelic/__init__.py +19 -0
- testrelic_deepeval-0.1.0/src/testrelic/_version.py +1 -0
- testrelic_deepeval-0.1.0/src/testrelic/auth.py +82 -0
- testrelic_deepeval-0.1.0/src/testrelic/cli/__init__.py +5 -0
- testrelic_deepeval-0.1.0/src/testrelic/cli/login.py +24 -0
- testrelic_deepeval-0.1.0/src/testrelic/cli/main.py +43 -0
- testrelic_deepeval-0.1.0/src/testrelic/cli/test_run.py +48 -0
- testrelic_deepeval-0.1.0/src/testrelic/cli/view.py +22 -0
- testrelic_deepeval-0.1.0/src/testrelic/client.py +183 -0
- testrelic_deepeval-0.1.0/src/testrelic/config.py +81 -0
- testrelic_deepeval-0.1.0/src/testrelic/context.py +82 -0
- testrelic_deepeval-0.1.0/src/testrelic/datasets/__init__.py +6 -0
- testrelic_deepeval-0.1.0/src/testrelic/datasets/client.py +46 -0
- testrelic_deepeval-0.1.0/src/testrelic/datasets/pull.py +65 -0
- testrelic_deepeval-0.1.0/src/testrelic/deepeval/__init__.py +5 -0
- testrelic_deepeval-0.1.0/src/testrelic/deepeval/capture.py +70 -0
- testrelic_deepeval-0.1.0/src/testrelic/deepeval/evaluate.py +23 -0
- testrelic_deepeval-0.1.0/src/testrelic/deepeval/plugin.py +31 -0
- testrelic_deepeval-0.1.0/src/testrelic/deepeval/translators/__init__.py +5 -0
- testrelic_deepeval-0.1.0/src/testrelic/deepeval/translators/metric_data.py +50 -0
- testrelic_deepeval-0.1.0/src/testrelic/deepeval/translators/test_case.py +91 -0
- testrelic_deepeval-0.1.0/src/testrelic/deepeval/translators/test_run.py +137 -0
- testrelic_deepeval-0.1.0/src/testrelic/deepeval/translators/trace.py +10 -0
- testrelic_deepeval-0.1.0/src/testrelic/exceptions.py +19 -0
- testrelic_deepeval-0.1.0/src/testrelic/logger.py +15 -0
- testrelic_deepeval-0.1.0/src/testrelic/models/__init__.py +28 -0
- testrelic_deepeval-0.1.0/src/testrelic/models/datasets.py +35 -0
- testrelic_deepeval-0.1.0/src/testrelic/models/evals.py +75 -0
- testrelic_deepeval-0.1.0/src/testrelic/models/traces.py +28 -0
- testrelic_deepeval-0.1.0/src/testrelic/prompts/__init__.py +5 -0
- testrelic_deepeval-0.1.0/src/testrelic/prompts/client.py +19 -0
- testrelic_deepeval-0.1.0/src/testrelic/traces/__init__.py +12 -0
- testrelic_deepeval-0.1.0/src/testrelic/traces/client.py +25 -0
- testrelic_deepeval-0.1.0/src/testrelic/traces/exporter.py +23 -0
- testrelic_deepeval-0.1.0/tests/__init__.py +0 -0
- testrelic_deepeval-0.1.0/tests/conftest.py +60 -0
- testrelic_deepeval-0.1.0/tests/fixtures/__init__.py +0 -0
- testrelic_deepeval-0.1.0/tests/fixtures/deepeval_test_run.json +148 -0
- testrelic_deepeval-0.1.0/tests/fixtures/deepeval_test_run_conversational.json +55 -0
- testrelic_deepeval-0.1.0/tests/test_capture.py +108 -0
- testrelic_deepeval-0.1.0/tests/test_client.py +125 -0
- testrelic_deepeval-0.1.0/tests/test_config.py +55 -0
- testrelic_deepeval-0.1.0/tests/test_plugin.py +17 -0
- testrelic_deepeval-0.1.0/tests/test_translators.py +110 -0
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
name: CI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches: [main, stage, prod]
|
|
6
|
+
pull_request:
|
|
7
|
+
|
|
8
|
+
jobs:
|
|
9
|
+
test:
|
|
10
|
+
runs-on: ubuntu-latest
|
|
11
|
+
strategy:
|
|
12
|
+
fail-fast: false
|
|
13
|
+
matrix:
|
|
14
|
+
python: ["3.9", "3.10", "3.11", "3.12"]
|
|
15
|
+
steps:
|
|
16
|
+
- uses: actions/checkout@v4
|
|
17
|
+
|
|
18
|
+
- name: Set up Python ${{ matrix.python }}
|
|
19
|
+
uses: actions/setup-python@v5
|
|
20
|
+
with:
|
|
21
|
+
python-version: ${{ matrix.python }}
|
|
22
|
+
cache: pip
|
|
23
|
+
|
|
24
|
+
- name: Install
|
|
25
|
+
run: |
|
|
26
|
+
python -m pip install --upgrade pip
|
|
27
|
+
pip install -e ".[dev]"
|
|
28
|
+
|
|
29
|
+
- name: Lint (ruff)
|
|
30
|
+
run: ruff check src tests
|
|
31
|
+
|
|
32
|
+
- name: Type check (mypy)
|
|
33
|
+
run: mypy src/testrelic
|
|
34
|
+
|
|
35
|
+
- name: Test (pytest)
|
|
36
|
+
run: pytest --cov=testrelic --cov-report=term-missing
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
name: DeepEval compatibility check
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
schedule:
|
|
5
|
+
- cron: "0 6 * * 1" # Weekly, Monday 06:00 UTC
|
|
6
|
+
workflow_dispatch:
|
|
7
|
+
|
|
8
|
+
permissions:
|
|
9
|
+
contents: write
|
|
10
|
+
pull-requests: write
|
|
11
|
+
|
|
12
|
+
jobs:
|
|
13
|
+
bump-and-test:
|
|
14
|
+
runs-on: ubuntu-latest
|
|
15
|
+
steps:
|
|
16
|
+
- uses: actions/checkout@v4
|
|
17
|
+
|
|
18
|
+
- name: Set up Python
|
|
19
|
+
uses: actions/setup-python@v5
|
|
20
|
+
with:
|
|
21
|
+
python-version: "3.12"
|
|
22
|
+
|
|
23
|
+
- name: Detect latest deepeval release and bump pin
|
|
24
|
+
run: |
|
|
25
|
+
python -m pip install --upgrade pip
|
|
26
|
+
latest=$(pip index versions deepeval 2>/dev/null \
|
|
27
|
+
| head -n1 \
|
|
28
|
+
| sed -E 's/.*\(([^)]+)\).*/\1/' || true)
|
|
29
|
+
echo "Latest deepeval: ${latest:-unknown}"
|
|
30
|
+
if [ -n "$latest" ]; then
|
|
31
|
+
python -c "import re, pathlib; p=pathlib.Path('pyproject.toml'); t=p.read_text(); p.write_text(re.sub(r'\"deepeval>=[^\"]+\"', f'\"deepeval>=${latest}\"', t))"
|
|
32
|
+
fi
|
|
33
|
+
|
|
34
|
+
- name: Install
|
|
35
|
+
run: pip install -e ".[dev]"
|
|
36
|
+
|
|
37
|
+
- name: Run translator tests
|
|
38
|
+
run: pytest tests/test_translators.py tests/test_capture.py -v
|
|
39
|
+
|
|
40
|
+
- name: Open PR on diff
|
|
41
|
+
uses: peter-evans/create-pull-request@v6
|
|
42
|
+
with:
|
|
43
|
+
branch: deepeval-compat/bump
|
|
44
|
+
title: "chore(deps): bump deepeval to latest"
|
|
45
|
+
commit-message: "chore(deps): bump deepeval pin"
|
|
46
|
+
body: |
|
|
47
|
+
Automated weekly compatibility sweep.
|
|
48
|
+
Translator tests passed against the latest deepeval release.
|
|
49
|
+
labels: dependencies, automated
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
name: Publish prod (PyPI)
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches: [prod]
|
|
6
|
+
workflow_dispatch:
|
|
7
|
+
|
|
8
|
+
jobs:
|
|
9
|
+
publish:
|
|
10
|
+
runs-on: ubuntu-latest
|
|
11
|
+
permissions:
|
|
12
|
+
id-token: write
|
|
13
|
+
contents: read
|
|
14
|
+
environment:
|
|
15
|
+
name: pypi
|
|
16
|
+
url: https://pypi.org/project/testrelic-deepeval/
|
|
17
|
+
steps:
|
|
18
|
+
- uses: actions/checkout@v4
|
|
19
|
+
|
|
20
|
+
- name: Set up Python
|
|
21
|
+
uses: actions/setup-python@v5
|
|
22
|
+
with:
|
|
23
|
+
python-version: "3.12"
|
|
24
|
+
|
|
25
|
+
- name: Build sdist + wheel
|
|
26
|
+
run: |
|
|
27
|
+
python -m pip install --upgrade pip build
|
|
28
|
+
python -m build
|
|
29
|
+
|
|
30
|
+
- name: Publish to PyPI
|
|
31
|
+
uses: pypa/gh-action-pypi-publish@release/v1
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
name: Publish stage (TestPyPI)
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches: [stage]
|
|
6
|
+
workflow_dispatch:
|
|
7
|
+
|
|
8
|
+
jobs:
|
|
9
|
+
publish:
|
|
10
|
+
runs-on: ubuntu-latest
|
|
11
|
+
permissions:
|
|
12
|
+
id-token: write
|
|
13
|
+
contents: read
|
|
14
|
+
environment:
|
|
15
|
+
name: testpypi
|
|
16
|
+
url: https://test.pypi.org/project/testrelic-deepeval/
|
|
17
|
+
steps:
|
|
18
|
+
- uses: actions/checkout@v4
|
|
19
|
+
|
|
20
|
+
- name: Set up Python
|
|
21
|
+
uses: actions/setup-python@v5
|
|
22
|
+
with:
|
|
23
|
+
python-version: "3.12"
|
|
24
|
+
|
|
25
|
+
- name: Build sdist + wheel
|
|
26
|
+
run: |
|
|
27
|
+
python -m pip install --upgrade pip build
|
|
28
|
+
python -m build
|
|
29
|
+
|
|
30
|
+
- name: Publish to TestPyPI
|
|
31
|
+
uses: pypa/gh-action-pypi-publish@release/v1
|
|
32
|
+
with:
|
|
33
|
+
repository-url: https://test.pypi.org/legacy/
|
|
34
|
+
skip-existing: true
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
# Byte-compiled / optimized / DLL files
|
|
2
|
+
__pycache__/
|
|
3
|
+
*.py[cod]
|
|
4
|
+
*$py.class
|
|
5
|
+
|
|
6
|
+
# C extensions
|
|
7
|
+
*.so
|
|
8
|
+
|
|
9
|
+
# Distribution / packaging
|
|
10
|
+
.Python
|
|
11
|
+
build/
|
|
12
|
+
develop-eggs/
|
|
13
|
+
dist/
|
|
14
|
+
downloads/
|
|
15
|
+
eggs/
|
|
16
|
+
.eggs/
|
|
17
|
+
lib/
|
|
18
|
+
lib64/
|
|
19
|
+
parts/
|
|
20
|
+
sdist/
|
|
21
|
+
var/
|
|
22
|
+
wheels/
|
|
23
|
+
share/python-wheels/
|
|
24
|
+
*.egg-info/
|
|
25
|
+
.installed.cfg
|
|
26
|
+
*.egg
|
|
27
|
+
MANIFEST
|
|
28
|
+
|
|
29
|
+
# Virtual environments
|
|
30
|
+
.venv/
|
|
31
|
+
venv/
|
|
32
|
+
env/
|
|
33
|
+
ENV/
|
|
34
|
+
|
|
35
|
+
# Test/coverage caches
|
|
36
|
+
.tox/
|
|
37
|
+
.nox/
|
|
38
|
+
.coverage
|
|
39
|
+
.coverage.*
|
|
40
|
+
.cache
|
|
41
|
+
.pytest_cache/
|
|
42
|
+
htmlcov/
|
|
43
|
+
nosetests.xml
|
|
44
|
+
coverage.xml
|
|
45
|
+
*.cover
|
|
46
|
+
*.py,cover
|
|
47
|
+
.hypothesis/
|
|
48
|
+
|
|
49
|
+
# Type/lint caches
|
|
50
|
+
.mypy_cache/
|
|
51
|
+
.ruff_cache/
|
|
52
|
+
.dmypy.json
|
|
53
|
+
dmypy.json
|
|
54
|
+
|
|
55
|
+
# IDE
|
|
56
|
+
.idea/
|
|
57
|
+
.vscode/
|
|
58
|
+
*.swp
|
|
59
|
+
*~
|
|
60
|
+
|
|
61
|
+
# OS
|
|
62
|
+
.DS_Store
|
|
63
|
+
Thumbs.db
|
|
64
|
+
|
|
65
|
+
# TestRelic local state
|
|
66
|
+
.testrelic/
|
|
67
|
+
*.queue.json
|
|
68
|
+
|
|
69
|
+
# Jupyter
|
|
70
|
+
.ipynb_checkpoints
|
|
71
|
+
|
|
72
|
+
# Local env
|
|
73
|
+
.env
|
|
74
|
+
.env.local
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
3.9
|
|
@@ -0,0 +1,110 @@
|
|
|
1
|
+
# testrelic-python-sdk - Development Guidelines
|
|
2
|
+
|
|
3
|
+
## What this repo is
|
|
4
|
+
|
|
5
|
+
The Python SDK that bridges DeepEval workflows into the TestRelic platform.
|
|
6
|
+
Users `pip install testrelic-deepeval` and their `deepeval test run tests/`
|
|
7
|
+
uploads to TestRelic's `/api/v1/evals/*` endpoints instead of Confident AI.
|
|
8
|
+
|
|
9
|
+
Package name on PyPI: `testrelic-deepeval`. Import name: `testrelic`.
|
|
10
|
+
|
|
11
|
+
## Stack
|
|
12
|
+
|
|
13
|
+
- Python 3.9+ (target 3.9 in ruff/mypy for max compatibility)
|
|
14
|
+
- httpx (sync client), pydantic v2, typer, platformdirs
|
|
15
|
+
- pytest + respx for tests
|
|
16
|
+
- ruff + mypy strict for lint/type
|
|
17
|
+
- hatchling for build
|
|
18
|
+
|
|
19
|
+
## Project structure
|
|
20
|
+
|
|
21
|
+
```
|
|
22
|
+
src/testrelic/
|
|
23
|
+
__init__.py public re-exports
|
|
24
|
+
config.py Config dataclass + precedence resolution
|
|
25
|
+
auth.py ~/.testrelic/credentials.toml read/write
|
|
26
|
+
client.py HttpClient (retries + offline queue) + drain_queue()
|
|
27
|
+
context.py git + CI auto-detect
|
|
28
|
+
exceptions.py TestRelicError / AuthError / ApiError
|
|
29
|
+
logger.py "testrelic" logger
|
|
30
|
+
models/ Pydantic request/response models (camelCase fields)
|
|
31
|
+
deepeval/
|
|
32
|
+
plugin.py pytest11 entry point
|
|
33
|
+
capture.py reads DeepEval global TestRun, posts to /runs + /cases + /finalize
|
|
34
|
+
evaluate.py drop-in wrapper for deepeval.evaluate()
|
|
35
|
+
translators/ TestRun -> TestRelic payload translators (defensive)
|
|
36
|
+
datasets/ pull/push/list
|
|
37
|
+
prompts/ Phase 3 stub
|
|
38
|
+
traces/ Phase 4 stub + OTel exporter shell
|
|
39
|
+
cli/ typer app: login, logout, view, test, drain, version
|
|
40
|
+
tests/
|
|
41
|
+
fixtures/ JSON golden files mirroring DeepEval TestRun.model_dump()
|
|
42
|
+
test_translators.py Golden-file tests
|
|
43
|
+
test_capture.py capture_and_upload orchestration
|
|
44
|
+
test_client.py HTTP retries / queueing (respx)
|
|
45
|
+
test_plugin.py entry-point smoke test
|
|
46
|
+
test_config.py precedence resolution
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
## Commands
|
|
50
|
+
|
|
51
|
+
| Command | Purpose |
|
|
52
|
+
|---|---|
|
|
53
|
+
| `pip install -e ".[dev]"` | Install with dev deps |
|
|
54
|
+
| `ruff check src tests` | Lint |
|
|
55
|
+
| `mypy src/testrelic` | Strict type check |
|
|
56
|
+
| `pytest` | Run tests |
|
|
57
|
+
| `pytest --cov=testrelic` | With coverage |
|
|
58
|
+
| `python -m build` | Build wheel + sdist |
|
|
59
|
+
|
|
60
|
+
## Release
|
|
61
|
+
|
|
62
|
+
- `stage` branch -> TestPyPI via GH Actions OIDC (`.github/workflows/publish-stage.yml`)
|
|
63
|
+
- `prod` branch -> PyPI via GH Actions OIDC (`.github/workflows/publish-prod.yml`)
|
|
64
|
+
- Weekly `deepeval-compat.yml` bumps the deepeval pin and opens a PR if translator
|
|
65
|
+
tests still pass
|
|
66
|
+
|
|
67
|
+
Bump `src/testrelic/_version.py` and `pyproject.toml` together; both must stay in sync.
|
|
68
|
+
|
|
69
|
+
## Key design decisions
|
|
70
|
+
|
|
71
|
+
- **TestRelic-native API.** This SDK calls TestRelic's `/api/v1/evals/*` endpoints. It
|
|
72
|
+
does NOT speak Confident AI's schema. The TestRelic API is camelCase JSON.
|
|
73
|
+
- **Pytest plugin captures DeepEval in-memory.** No HTTP redirect, no monkey-patch -
|
|
74
|
+
we read `global_test_run_manager.get_test_run()` at
|
|
75
|
+
`pytest_sessionfinish(trylast=True)`. This means user code stays unchanged and
|
|
76
|
+
DeepEval's own behavior (printing results, returning eval objects) is preserved.
|
|
77
|
+
- **Defensive translators.** DeepEval's Pydantic schema evolves between releases. Use
|
|
78
|
+
`getattr(obj, "field", default)` everywhere and keep golden-file fixtures under
|
|
79
|
+
`tests/fixtures/` so the weekly compat job catches breaking changes early.
|
|
80
|
+
- **Offline queue.** Network failures write to `~/.testrelic/queue/`; calling
|
|
81
|
+
`testrelic drain` retries.
|
|
82
|
+
- **Disabled-by-default when no key.** If `TESTRELIC_API_KEY` and the credentials file
|
|
83
|
+
are both absent, the plugin logs a single info line and otherwise no-ops. Test
|
|
84
|
+
runs never fail because of TestRelic.
|
|
85
|
+
|
|
86
|
+
## Conventions
|
|
87
|
+
|
|
88
|
+
- `from __future__ import annotations` at the top of every module
|
|
89
|
+
- Imports sorted by ruff (`I` rule)
|
|
90
|
+
- Public surface re-exported via `src/testrelic/__init__.py`
|
|
91
|
+
- Pydantic models use camelCase field names directly (matching API JSON) with
|
|
92
|
+
`populate_by_name=True` and `extra="allow"` so the SDK degrades gracefully when
|
|
93
|
+
the server adds new optional fields
|
|
94
|
+
- Don't comment inside functions unless explaining non-obvious behavior
|
|
95
|
+
|
|
96
|
+
## Adding a new TestRelic endpoint
|
|
97
|
+
|
|
98
|
+
1. Add the request/response model under `src/testrelic/models/`
|
|
99
|
+
2. Add a thin function under the relevant module (`datasets/`, `traces/`, ...) that
|
|
100
|
+
constructs the model, calls `HttpClient.post|put|get`, and returns either the raw
|
|
101
|
+
dict or a parsed model
|
|
102
|
+
3. Cover happy path + error envelope handling with a `respx`-mocked test
|
|
103
|
+
4. Re-export the public surface from `__init__.py`
|
|
104
|
+
|
|
105
|
+
## Adding a new DeepEval field
|
|
106
|
+
|
|
107
|
+
1. Update the JSON fixture under `tests/fixtures/` to include the field
|
|
108
|
+
2. Update the matching translator (`translators/test_case.py` or `metric_data.py`)
|
|
109
|
+
3. Add an assertion to `tests/test_translators.py`
|
|
110
|
+
4. Bump CHANGELOG, no need to bump SDK version unless the API contract changes
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 TestRelic
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,184 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: testrelic-deepeval
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: TestRelic SDK for DeepEval - LLM evaluation results land in TestRelic instead of Confident AI.
|
|
5
|
+
Project-URL: Homepage, https://testrelic.ai
|
|
6
|
+
Project-URL: Documentation, https://docs.testrelic.ai/deepeval
|
|
7
|
+
Project-URL: Repository, https://github.com/testrelic-ai/testrelic-python-sdk
|
|
8
|
+
Author-email: TestRelic <engineering@testrelic.ai>
|
|
9
|
+
License: MIT
|
|
10
|
+
License-File: LICENSE
|
|
11
|
+
Keywords: deepeval,evaluation,llm,testing,testrelic
|
|
12
|
+
Classifier: Development Status :: 3 - Alpha
|
|
13
|
+
Classifier: Framework :: Pytest
|
|
14
|
+
Classifier: Programming Language :: Python :: 3
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
19
|
+
Requires-Python: >=3.9
|
|
20
|
+
Requires-Dist: httpx<1.0,>=0.27
|
|
21
|
+
Requires-Dist: platformdirs>=4.0
|
|
22
|
+
Requires-Dist: pydantic<3.0,>=2.6
|
|
23
|
+
Requires-Dist: tomli-w>=1.0
|
|
24
|
+
Requires-Dist: tomli>=2.0; python_version < '3.11'
|
|
25
|
+
Requires-Dist: typer<1.0,>=0.12
|
|
26
|
+
Provides-Extra: deepeval
|
|
27
|
+
Requires-Dist: deepeval<4.0,>=2.0; extra == 'deepeval'
|
|
28
|
+
Provides-Extra: dev
|
|
29
|
+
Requires-Dist: deepeval<4.0,>=2.0; extra == 'dev'
|
|
30
|
+
Requires-Dist: mypy>=1.10; extra == 'dev'
|
|
31
|
+
Requires-Dist: pytest-cov>=5.0; extra == 'dev'
|
|
32
|
+
Requires-Dist: pytest>=8.0; extra == 'dev'
|
|
33
|
+
Requires-Dist: respx>=0.21; extra == 'dev'
|
|
34
|
+
Requires-Dist: ruff>=0.6; extra == 'dev'
|
|
35
|
+
Provides-Extra: otel
|
|
36
|
+
Requires-Dist: opentelemetry-api>=1.25; extra == 'otel'
|
|
37
|
+
Requires-Dist: opentelemetry-sdk>=1.25; extra == 'otel'
|
|
38
|
+
Description-Content-Type: text/markdown
|
|
39
|
+
|
|
40
|
+
# testrelic-deepeval
|
|
41
|
+
|
|
42
|
+
The TestRelic Python SDK for [DeepEval](https://github.com/confident-ai/deepeval). Capture
|
|
43
|
+
LLM evaluation results from your existing DeepEval test suite and ship them to your
|
|
44
|
+
TestRelic workspace in one line of install.
|
|
45
|
+
|
|
46
|
+
```bash
|
|
47
|
+
pip install testrelic-deepeval
|
|
48
|
+
testrelic login
|
|
49
|
+
deepeval test run tests/
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
That's it. The pytest plugin captures DeepEval's in-memory `TestRun` at session finish
|
|
53
|
+
and uploads cases + metrics to TestRelic's `/api/v1/evals/*` endpoints.
|
|
54
|
+
|
|
55
|
+
## What this is (and isn't)
|
|
56
|
+
|
|
57
|
+
- **Is**: a pytest plugin + Python client for sending DeepEval results to TestRelic
|
|
58
|
+
- **Is**: a drop-in `testrelic.evaluate()` wrapper for programmatic eval runs
|
|
59
|
+
- **Is**: a CLI (`testrelic login`, `testrelic test`, `testrelic view`)
|
|
60
|
+
- **Is not**: a replacement for DeepEval (use DeepEval to author tests; we just receive
|
|
61
|
+
the results)
|
|
62
|
+
- **Is not**: a Confident AI proxy — we use TestRelic's own API, not Confident's
|
|
63
|
+
|
|
64
|
+
## Install
|
|
65
|
+
|
|
66
|
+
```bash
|
|
67
|
+
pip install testrelic-deepeval # core
|
|
68
|
+
pip install "testrelic-deepeval[deepeval]" # also pull deepeval if not already installed
|
|
69
|
+
pip install "testrelic-deepeval[otel]" # OTel tracing (Phase 4, preview)
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
Supported Python versions: 3.9, 3.10, 3.11, 3.12.
|
|
73
|
+
|
|
74
|
+
## Authenticate
|
|
75
|
+
|
|
76
|
+
```bash
|
|
77
|
+
testrelic login --api-key tr_yourkey_here
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
Credentials are stored at `~/.testrelic/credentials.toml` with `0600` permissions on
|
|
81
|
+
POSIX. Or pass credentials via environment:
|
|
82
|
+
|
|
83
|
+
```bash
|
|
84
|
+
export TESTRELIC_API_KEY=tr_yourkey
|
|
85
|
+
export TESTRELIC_BASE_URL=https://platform.testrelic.ai/api/v1/evals # only override if self-hosted
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
Precedence (highest to lowest): explicit `configure()` args → env vars → credentials
|
|
89
|
+
file → built-in defaults.
|
|
90
|
+
|
|
91
|
+
## Use with pytest
|
|
92
|
+
|
|
93
|
+
No code changes needed. After install + login:
|
|
94
|
+
|
|
95
|
+
```bash
|
|
96
|
+
deepeval test run tests/
|
|
97
|
+
# or, if you want the wrapper that double-checks credentials:
|
|
98
|
+
testrelic test tests/
|
|
99
|
+
```
|
|
100
|
+
|
|
101
|
+
Every test run uploads as a new eval run in TestRelic, annotated with the current
|
|
102
|
+
branch, commit, and CI run URL (auto-detected for GitHub Actions, GitLab, Jenkins,
|
|
103
|
+
CircleCI, and Buildkite).
|
|
104
|
+
|
|
105
|
+
## Use programmatically
|
|
106
|
+
|
|
107
|
+
```python
|
|
108
|
+
from deepeval.test_case import LLMTestCase
|
|
109
|
+
from deepeval.metrics import AnswerRelevancyMetric
|
|
110
|
+
from testrelic import evaluate
|
|
111
|
+
|
|
112
|
+
results = evaluate(
|
|
113
|
+
test_cases=[LLMTestCase(input="Hi", actual_output="Hello")],
|
|
114
|
+
metrics=[AnswerRelevancyMetric(threshold=0.7)],
|
|
115
|
+
)
|
|
116
|
+
# results is whatever deepeval.evaluate() returns; upload is automatic
|
|
117
|
+
```
|
|
118
|
+
|
|
119
|
+
## Datasets
|
|
120
|
+
|
|
121
|
+
```python
|
|
122
|
+
from testrelic import datasets
|
|
123
|
+
|
|
124
|
+
# Pull a dataset version into a deepeval EvaluationDataset
|
|
125
|
+
ds = datasets.pull("customer-support-qa", label="latest")
|
|
126
|
+
|
|
127
|
+
# Push goldens up to a new version
|
|
128
|
+
datasets.push(
|
|
129
|
+
alias="customer-support-qa",
|
|
130
|
+
goldens=[{"input": "...", "expected_output": "..."}],
|
|
131
|
+
label="v2",
|
|
132
|
+
description="Refreshed Q1 2026 examples",
|
|
133
|
+
)
|
|
134
|
+
```
|
|
135
|
+
|
|
136
|
+
## Migrating from Confident AI
|
|
137
|
+
|
|
138
|
+
See [docs/migration-from-confident-ai.md](docs/migration-from-confident-ai.md). The
|
|
139
|
+
TL;DR is:
|
|
140
|
+
|
|
141
|
+
1. `pip install testrelic-deepeval`
|
|
142
|
+
2. `testrelic login`
|
|
143
|
+
3. Remove `CONFIDENT_API_KEY` from CI
|
|
144
|
+
4. Run your existing tests unchanged
|
|
145
|
+
|
|
146
|
+
Or, in one shot: `testrelic migrate-from-confident`.
|
|
147
|
+
|
|
148
|
+
## Offline / flaky networks
|
|
149
|
+
|
|
150
|
+
Failed uploads land in `~/.testrelic/queue/`. Replay them with:
|
|
151
|
+
|
|
152
|
+
```bash
|
|
153
|
+
testrelic drain
|
|
154
|
+
```
|
|
155
|
+
|
|
156
|
+
The plugin never fails your test run because of an upload error — uploads run at
|
|
157
|
+
`pytest_sessionfinish(trylast=True)` and swallow exceptions with a warning log.
|
|
158
|
+
|
|
159
|
+
## CLI
|
|
160
|
+
|
|
161
|
+
| Command | Purpose |
|
|
162
|
+
|---|---|
|
|
163
|
+
| `testrelic login` | Save credentials |
|
|
164
|
+
| `testrelic logout` | Remove credentials |
|
|
165
|
+
| `testrelic test <path>` | Wrap `deepeval test run` with credential check |
|
|
166
|
+
| `testrelic view [run_id]` | Open latest (or specific) eval run in browser |
|
|
167
|
+
| `testrelic drain` | Replay queued offline uploads |
|
|
168
|
+
| `testrelic version` | Print SDK version |
|
|
169
|
+
| `testrelic migrate-from-confident` | Print migration steps |
|
|
170
|
+
|
|
171
|
+
## Development
|
|
172
|
+
|
|
173
|
+
```bash
|
|
174
|
+
git clone https://github.com/testrelic-ai/testrelic-python-sdk
|
|
175
|
+
cd testrelic-python-sdk
|
|
176
|
+
pip install -e ".[dev]"
|
|
177
|
+
ruff check src tests
|
|
178
|
+
mypy src/testrelic
|
|
179
|
+
pytest
|
|
180
|
+
```
|
|
181
|
+
|
|
182
|
+
## License
|
|
183
|
+
|
|
184
|
+
MIT. See [LICENSE](LICENSE).
|
|
@@ -0,0 +1,145 @@
|
|
|
1
|
+
# testrelic-deepeval
|
|
2
|
+
|
|
3
|
+
The TestRelic Python SDK for [DeepEval](https://github.com/confident-ai/deepeval). Capture
|
|
4
|
+
LLM evaluation results from your existing DeepEval test suite and ship them to your
|
|
5
|
+
TestRelic workspace in one line of install.
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
pip install testrelic-deepeval
|
|
9
|
+
testrelic login
|
|
10
|
+
deepeval test run tests/
|
|
11
|
+
```
|
|
12
|
+
|
|
13
|
+
That's it. The pytest plugin captures DeepEval's in-memory `TestRun` at session finish
|
|
14
|
+
and uploads cases + metrics to TestRelic's `/api/v1/evals/*` endpoints.
|
|
15
|
+
|
|
16
|
+
## What this is (and isn't)
|
|
17
|
+
|
|
18
|
+
- **Is**: a pytest plugin + Python client for sending DeepEval results to TestRelic
|
|
19
|
+
- **Is**: a drop-in `testrelic.evaluate()` wrapper for programmatic eval runs
|
|
20
|
+
- **Is**: a CLI (`testrelic login`, `testrelic test`, `testrelic view`)
|
|
21
|
+
- **Is not**: a replacement for DeepEval (use DeepEval to author tests; we just receive
|
|
22
|
+
the results)
|
|
23
|
+
- **Is not**: a Confident AI proxy — we use TestRelic's own API, not Confident's
|
|
24
|
+
|
|
25
|
+
## Install
|
|
26
|
+
|
|
27
|
+
```bash
|
|
28
|
+
pip install testrelic-deepeval # core
|
|
29
|
+
pip install "testrelic-deepeval[deepeval]" # also pull deepeval if not already installed
|
|
30
|
+
pip install "testrelic-deepeval[otel]" # OTel tracing (Phase 4, preview)
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
Supported Python versions: 3.9, 3.10, 3.11, 3.12.
|
|
34
|
+
|
|
35
|
+
## Authenticate
|
|
36
|
+
|
|
37
|
+
```bash
|
|
38
|
+
testrelic login --api-key tr_yourkey_here
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
Credentials are stored at `~/.testrelic/credentials.toml` with `0600` permissions on
|
|
42
|
+
POSIX. Or pass credentials via environment:
|
|
43
|
+
|
|
44
|
+
```bash
|
|
45
|
+
export TESTRELIC_API_KEY=tr_yourkey
|
|
46
|
+
export TESTRELIC_BASE_URL=https://platform.testrelic.ai/api/v1/evals # only override if self-hosted
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
Precedence (highest to lowest): explicit `configure()` args → env vars → credentials
|
|
50
|
+
file → built-in defaults.
|
|
51
|
+
|
|
52
|
+
## Use with pytest
|
|
53
|
+
|
|
54
|
+
No code changes needed. After install + login:
|
|
55
|
+
|
|
56
|
+
```bash
|
|
57
|
+
deepeval test run tests/
|
|
58
|
+
# or, if you want the wrapper that double-checks credentials:
|
|
59
|
+
testrelic test tests/
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
Every test run uploads as a new eval run in TestRelic, annotated with the current
|
|
63
|
+
branch, commit, and CI run URL (auto-detected for GitHub Actions, GitLab, Jenkins,
|
|
64
|
+
CircleCI, and Buildkite).
|
|
65
|
+
|
|
66
|
+
## Use programmatically
|
|
67
|
+
|
|
68
|
+
```python
|
|
69
|
+
from deepeval.test_case import LLMTestCase
|
|
70
|
+
from deepeval.metrics import AnswerRelevancyMetric
|
|
71
|
+
from testrelic import evaluate
|
|
72
|
+
|
|
73
|
+
results = evaluate(
|
|
74
|
+
test_cases=[LLMTestCase(input="Hi", actual_output="Hello")],
|
|
75
|
+
metrics=[AnswerRelevancyMetric(threshold=0.7)],
|
|
76
|
+
)
|
|
77
|
+
# results is whatever deepeval.evaluate() returns; upload is automatic
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
## Datasets
|
|
81
|
+
|
|
82
|
+
```python
|
|
83
|
+
from testrelic import datasets
|
|
84
|
+
|
|
85
|
+
# Pull a dataset version into a deepeval EvaluationDataset
|
|
86
|
+
ds = datasets.pull("customer-support-qa", label="latest")
|
|
87
|
+
|
|
88
|
+
# Push goldens up to a new version
|
|
89
|
+
datasets.push(
|
|
90
|
+
alias="customer-support-qa",
|
|
91
|
+
goldens=[{"input": "...", "expected_output": "..."}],
|
|
92
|
+
label="v2",
|
|
93
|
+
description="Refreshed Q1 2026 examples",
|
|
94
|
+
)
|
|
95
|
+
```
|
|
96
|
+
|
|
97
|
+
## Migrating from Confident AI
|
|
98
|
+
|
|
99
|
+
See [docs/migration-from-confident-ai.md](docs/migration-from-confident-ai.md). The
|
|
100
|
+
TL;DR is:
|
|
101
|
+
|
|
102
|
+
1. `pip install testrelic-deepeval`
|
|
103
|
+
2. `testrelic login`
|
|
104
|
+
3. Remove `CONFIDENT_API_KEY` from CI
|
|
105
|
+
4. Run your existing tests unchanged
|
|
106
|
+
|
|
107
|
+
Or, in one shot: `testrelic migrate-from-confident`.
|
|
108
|
+
|
|
109
|
+
## Offline / flaky networks
|
|
110
|
+
|
|
111
|
+
Failed uploads land in `~/.testrelic/queue/`. Replay them with:
|
|
112
|
+
|
|
113
|
+
```bash
|
|
114
|
+
testrelic drain
|
|
115
|
+
```
|
|
116
|
+
|
|
117
|
+
The plugin never fails your test run because of an upload error — uploads run at
|
|
118
|
+
`pytest_sessionfinish(trylast=True)` and swallow exceptions with a warning log.
|
|
119
|
+
|
|
120
|
+
## CLI
|
|
121
|
+
|
|
122
|
+
| Command | Purpose |
|
|
123
|
+
|---|---|
|
|
124
|
+
| `testrelic login` | Save credentials |
|
|
125
|
+
| `testrelic logout` | Remove credentials |
|
|
126
|
+
| `testrelic test <path>` | Wrap `deepeval test run` with credential check |
|
|
127
|
+
| `testrelic view [run_id]` | Open latest (or specific) eval run in browser |
|
|
128
|
+
| `testrelic drain` | Replay queued offline uploads |
|
|
129
|
+
| `testrelic version` | Print SDK version |
|
|
130
|
+
| `testrelic migrate-from-confident` | Print migration steps |
|
|
131
|
+
|
|
132
|
+
## Development
|
|
133
|
+
|
|
134
|
+
```bash
|
|
135
|
+
git clone https://github.com/testrelic-ai/testrelic-python-sdk
|
|
136
|
+
cd testrelic-python-sdk
|
|
137
|
+
pip install -e ".[dev]"
|
|
138
|
+
ruff check src tests
|
|
139
|
+
mypy src/testrelic
|
|
140
|
+
pytest
|
|
141
|
+
```
|
|
142
|
+
|
|
143
|
+
## License
|
|
144
|
+
|
|
145
|
+
MIT. See [LICENSE](LICENSE).
|