testrelic-deepeval 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. testrelic_deepeval-0.1.0/.github/workflows/ci.yml +36 -0
  2. testrelic_deepeval-0.1.0/.github/workflows/deepeval-compat.yml +49 -0
  3. testrelic_deepeval-0.1.0/.github/workflows/publish-prod.yml +31 -0
  4. testrelic_deepeval-0.1.0/.github/workflows/publish-stage.yml +34 -0
  5. testrelic_deepeval-0.1.0/.gitignore +74 -0
  6. testrelic_deepeval-0.1.0/.python-version +1 -0
  7. testrelic_deepeval-0.1.0/CLAUDE.md +110 -0
  8. testrelic_deepeval-0.1.0/LICENSE +21 -0
  9. testrelic_deepeval-0.1.0/PKG-INFO +184 -0
  10. testrelic_deepeval-0.1.0/README.md +145 -0
  11. testrelic_deepeval-0.1.0/docs/architecture.md +125 -0
  12. testrelic_deepeval-0.1.0/docs/migration-from-confident-ai.md +93 -0
  13. testrelic_deepeval-0.1.0/docs/quickstart.md +89 -0
  14. testrelic_deepeval-0.1.0/pyproject.toml +73 -0
  15. testrelic_deepeval-0.1.0/ruff.toml +9 -0
  16. testrelic_deepeval-0.1.0/src/testrelic/__init__.py +19 -0
  17. testrelic_deepeval-0.1.0/src/testrelic/_version.py +1 -0
  18. testrelic_deepeval-0.1.0/src/testrelic/auth.py +82 -0
  19. testrelic_deepeval-0.1.0/src/testrelic/cli/__init__.py +5 -0
  20. testrelic_deepeval-0.1.0/src/testrelic/cli/login.py +24 -0
  21. testrelic_deepeval-0.1.0/src/testrelic/cli/main.py +43 -0
  22. testrelic_deepeval-0.1.0/src/testrelic/cli/test_run.py +48 -0
  23. testrelic_deepeval-0.1.0/src/testrelic/cli/view.py +22 -0
  24. testrelic_deepeval-0.1.0/src/testrelic/client.py +183 -0
  25. testrelic_deepeval-0.1.0/src/testrelic/config.py +81 -0
  26. testrelic_deepeval-0.1.0/src/testrelic/context.py +82 -0
  27. testrelic_deepeval-0.1.0/src/testrelic/datasets/__init__.py +6 -0
  28. testrelic_deepeval-0.1.0/src/testrelic/datasets/client.py +46 -0
  29. testrelic_deepeval-0.1.0/src/testrelic/datasets/pull.py +65 -0
  30. testrelic_deepeval-0.1.0/src/testrelic/deepeval/__init__.py +5 -0
  31. testrelic_deepeval-0.1.0/src/testrelic/deepeval/capture.py +70 -0
  32. testrelic_deepeval-0.1.0/src/testrelic/deepeval/evaluate.py +23 -0
  33. testrelic_deepeval-0.1.0/src/testrelic/deepeval/plugin.py +31 -0
  34. testrelic_deepeval-0.1.0/src/testrelic/deepeval/translators/__init__.py +5 -0
  35. testrelic_deepeval-0.1.0/src/testrelic/deepeval/translators/metric_data.py +50 -0
  36. testrelic_deepeval-0.1.0/src/testrelic/deepeval/translators/test_case.py +91 -0
  37. testrelic_deepeval-0.1.0/src/testrelic/deepeval/translators/test_run.py +137 -0
  38. testrelic_deepeval-0.1.0/src/testrelic/deepeval/translators/trace.py +10 -0
  39. testrelic_deepeval-0.1.0/src/testrelic/exceptions.py +19 -0
  40. testrelic_deepeval-0.1.0/src/testrelic/logger.py +15 -0
  41. testrelic_deepeval-0.1.0/src/testrelic/models/__init__.py +28 -0
  42. testrelic_deepeval-0.1.0/src/testrelic/models/datasets.py +35 -0
  43. testrelic_deepeval-0.1.0/src/testrelic/models/evals.py +75 -0
  44. testrelic_deepeval-0.1.0/src/testrelic/models/traces.py +28 -0
  45. testrelic_deepeval-0.1.0/src/testrelic/prompts/__init__.py +5 -0
  46. testrelic_deepeval-0.1.0/src/testrelic/prompts/client.py +19 -0
  47. testrelic_deepeval-0.1.0/src/testrelic/traces/__init__.py +12 -0
  48. testrelic_deepeval-0.1.0/src/testrelic/traces/client.py +25 -0
  49. testrelic_deepeval-0.1.0/src/testrelic/traces/exporter.py +23 -0
  50. testrelic_deepeval-0.1.0/tests/__init__.py +0 -0
  51. testrelic_deepeval-0.1.0/tests/conftest.py +60 -0
  52. testrelic_deepeval-0.1.0/tests/fixtures/__init__.py +0 -0
  53. testrelic_deepeval-0.1.0/tests/fixtures/deepeval_test_run.json +148 -0
  54. testrelic_deepeval-0.1.0/tests/fixtures/deepeval_test_run_conversational.json +55 -0
  55. testrelic_deepeval-0.1.0/tests/test_capture.py +108 -0
  56. testrelic_deepeval-0.1.0/tests/test_client.py +125 -0
  57. testrelic_deepeval-0.1.0/tests/test_config.py +55 -0
  58. testrelic_deepeval-0.1.0/tests/test_plugin.py +17 -0
  59. testrelic_deepeval-0.1.0/tests/test_translators.py +110 -0
@@ -0,0 +1,36 @@
1
+ name: CI
2
+
3
+ on:
4
+ push:
5
+ branches: [main, stage, prod]
6
+ pull_request:
7
+
8
+ jobs:
9
+ test:
10
+ runs-on: ubuntu-latest
11
+ strategy:
12
+ fail-fast: false
13
+ matrix:
14
+ python: ["3.9", "3.10", "3.11", "3.12"]
15
+ steps:
16
+ - uses: actions/checkout@v4
17
+
18
+ - name: Set up Python ${{ matrix.python }}
19
+ uses: actions/setup-python@v5
20
+ with:
21
+ python-version: ${{ matrix.python }}
22
+ cache: pip
23
+
24
+ - name: Install
25
+ run: |
26
+ python -m pip install --upgrade pip
27
+ pip install -e ".[dev]"
28
+
29
+ - name: Lint (ruff)
30
+ run: ruff check src tests
31
+
32
+ - name: Type check (mypy)
33
+ run: mypy src/testrelic
34
+
35
+ - name: Test (pytest)
36
+ run: pytest --cov=testrelic --cov-report=term-missing
@@ -0,0 +1,49 @@
1
+ name: DeepEval compatibility check
2
+
3
+ on:
4
+ schedule:
5
+ - cron: "0 6 * * 1" # Weekly, Monday 06:00 UTC
6
+ workflow_dispatch:
7
+
8
+ permissions:
9
+ contents: write
10
+ pull-requests: write
11
+
12
+ jobs:
13
+ bump-and-test:
14
+ runs-on: ubuntu-latest
15
+ steps:
16
+ - uses: actions/checkout@v4
17
+
18
+ - name: Set up Python
19
+ uses: actions/setup-python@v5
20
+ with:
21
+ python-version: "3.12"
22
+
23
+ - name: Detect latest deepeval release and bump pin
24
+ run: |
25
+ python -m pip install --upgrade pip
26
+ latest=$(pip index versions deepeval 2>/dev/null \
27
+ | head -n1 \
28
+ | sed -E 's/.*\(([^)]+)\).*/\1/' || true)
29
+ echo "Latest deepeval: ${latest:-unknown}"
30
+ if [ -n "$latest" ]; then
31
+ python -c "import re, pathlib; p=pathlib.Path('pyproject.toml'); t=p.read_text(); p.write_text(re.sub(r'\"deepeval>=[^\"]+\"', f'\"deepeval>=${latest}\"', t))"
32
+ fi
33
+
34
+ - name: Install
35
+ run: pip install -e ".[dev]"
36
+
37
+ - name: Run translator tests
38
+ run: pytest tests/test_translators.py tests/test_capture.py -v
39
+
40
+ - name: Open PR on diff
41
+ uses: peter-evans/create-pull-request@v6
42
+ with:
43
+ branch: deepeval-compat/bump
44
+ title: "chore(deps): bump deepeval to latest"
45
+ commit-message: "chore(deps): bump deepeval pin"
46
+ body: |
47
+ Automated weekly compatibility sweep.
48
+ Translator tests passed against the latest deepeval release.
49
+ labels: dependencies, automated
@@ -0,0 +1,31 @@
1
+ name: Publish prod (PyPI)
2
+
3
+ on:
4
+ push:
5
+ branches: [prod]
6
+ workflow_dispatch:
7
+
8
+ jobs:
9
+ publish:
10
+ runs-on: ubuntu-latest
11
+ permissions:
12
+ id-token: write
13
+ contents: read
14
+ environment:
15
+ name: pypi
16
+ url: https://pypi.org/project/testrelic-deepeval/
17
+ steps:
18
+ - uses: actions/checkout@v4
19
+
20
+ - name: Set up Python
21
+ uses: actions/setup-python@v5
22
+ with:
23
+ python-version: "3.12"
24
+
25
+ - name: Build sdist + wheel
26
+ run: |
27
+ python -m pip install --upgrade pip build
28
+ python -m build
29
+
30
+ - name: Publish to PyPI
31
+ uses: pypa/gh-action-pypi-publish@release/v1
@@ -0,0 +1,34 @@
1
+ name: Publish stage (TestPyPI)
2
+
3
+ on:
4
+ push:
5
+ branches: [stage]
6
+ workflow_dispatch:
7
+
8
+ jobs:
9
+ publish:
10
+ runs-on: ubuntu-latest
11
+ permissions:
12
+ id-token: write
13
+ contents: read
14
+ environment:
15
+ name: testpypi
16
+ url: https://test.pypi.org/project/testrelic-deepeval/
17
+ steps:
18
+ - uses: actions/checkout@v4
19
+
20
+ - name: Set up Python
21
+ uses: actions/setup-python@v5
22
+ with:
23
+ python-version: "3.12"
24
+
25
+ - name: Build sdist + wheel
26
+ run: |
27
+ python -m pip install --upgrade pip build
28
+ python -m build
29
+
30
+ - name: Publish to TestPyPI
31
+ uses: pypa/gh-action-pypi-publish@release/v1
32
+ with:
33
+ repository-url: https://test.pypi.org/legacy/
34
+ skip-existing: true
@@ -0,0 +1,74 @@
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ eggs/
16
+ .eggs/
17
+ lib/
18
+ lib64/
19
+ parts/
20
+ sdist/
21
+ var/
22
+ wheels/
23
+ share/python-wheels/
24
+ *.egg-info/
25
+ .installed.cfg
26
+ *.egg
27
+ MANIFEST
28
+
29
+ # Virtual environments
30
+ .venv/
31
+ venv/
32
+ env/
33
+ ENV/
34
+
35
+ # Test/coverage caches
36
+ .tox/
37
+ .nox/
38
+ .coverage
39
+ .coverage.*
40
+ .cache
41
+ .pytest_cache/
42
+ htmlcov/
43
+ nosetests.xml
44
+ coverage.xml
45
+ *.cover
46
+ *.py,cover
47
+ .hypothesis/
48
+
49
+ # Type/lint caches
50
+ .mypy_cache/
51
+ .ruff_cache/
52
+ .dmypy.json
53
+ dmypy.json
54
+
55
+ # IDE
56
+ .idea/
57
+ .vscode/
58
+ *.swp
59
+ *~
60
+
61
+ # OS
62
+ .DS_Store
63
+ Thumbs.db
64
+
65
+ # TestRelic local state
66
+ .testrelic/
67
+ *.queue.json
68
+
69
+ # Jupyter
70
+ .ipynb_checkpoints
71
+
72
+ # Local env
73
+ .env
74
+ .env.local
@@ -0,0 +1 @@
1
+ 3.9
@@ -0,0 +1,110 @@
1
+ # testrelic-python-sdk - Development Guidelines
2
+
3
+ ## What this repo is
4
+
5
+ The Python SDK that bridges DeepEval workflows into the TestRelic platform.
6
+ Users `pip install testrelic-deepeval` and their `deepeval test run tests/`
7
+ uploads to TestRelic's `/api/v1/evals/*` endpoints instead of Confident AI.
8
+
9
+ Package name on PyPI: `testrelic-deepeval`. Import name: `testrelic`.
10
+
11
+ ## Stack
12
+
13
+ - Python 3.9+ (target 3.9 in ruff/mypy for max compatibility)
14
+ - httpx (sync client), pydantic v2, typer, platformdirs
15
+ - pytest + respx for tests
16
+ - ruff + mypy strict for lint/type
17
+ - hatchling for build
18
+
19
+ ## Project structure
20
+
21
+ ```
22
+ src/testrelic/
23
+ __init__.py public re-exports
24
+ config.py Config dataclass + precedence resolution
25
+ auth.py ~/.testrelic/credentials.toml read/write
26
+ client.py HttpClient (retries + offline queue) + drain_queue()
27
+ context.py git + CI auto-detect
28
+ exceptions.py TestRelicError / AuthError / ApiError
29
+ logger.py "testrelic" logger
30
+ models/ Pydantic request/response models (camelCase fields)
31
+ deepeval/
32
+ plugin.py pytest11 entry point
33
+ capture.py reads DeepEval global TestRun, posts to /runs + /cases + /finalize
34
+ evaluate.py drop-in wrapper for deepeval.evaluate()
35
+ translators/ TestRun -> TestRelic payload translators (defensive)
36
+ datasets/ pull/push/list
37
+ prompts/ Phase 3 stub
38
+ traces/ Phase 4 stub + OTel exporter shell
39
+ cli/ typer app: login, logout, view, test, drain, version
40
+ tests/
41
+ fixtures/ JSON golden files mirroring DeepEval TestRun.model_dump()
42
+ test_translators.py Golden-file tests
43
+ test_capture.py capture_and_upload orchestration
44
+ test_client.py HTTP retries / queueing (respx)
45
+ test_plugin.py entry-point smoke test
46
+ test_config.py precedence resolution
47
+ ```
48
+
49
+ ## Commands
50
+
51
+ | Command | Purpose |
52
+ |---|---|
53
+ | `pip install -e ".[dev]"` | Install with dev deps |
54
+ | `ruff check src tests` | Lint |
55
+ | `mypy src/testrelic` | Strict type check |
56
+ | `pytest` | Run tests |
57
+ | `pytest --cov=testrelic` | With coverage |
58
+ | `python -m build` | Build wheel + sdist |
59
+
60
+ ## Release
61
+
62
+ - `stage` branch -> TestPyPI via GH Actions OIDC (`.github/workflows/publish-stage.yml`)
63
+ - `prod` branch -> PyPI via GH Actions OIDC (`.github/workflows/publish-prod.yml`)
64
+ - Weekly `deepeval-compat.yml` bumps the deepeval pin and opens a PR if translator
65
+ tests still pass
66
+
67
+ Bump `src/testrelic/_version.py` and `pyproject.toml` together; both must stay in sync.
68
+
69
+ ## Key design decisions
70
+
71
+ - **TestRelic-native API.** This SDK calls TestRelic's `/api/v1/evals/*` endpoints. It
72
+ does NOT speak Confident AI's schema. The TestRelic API is camelCase JSON.
73
+ - **Pytest plugin captures DeepEval in-memory.** No HTTP redirect, no monkey-patch -
74
+ we read `global_test_run_manager.get_test_run()` at
75
+ `pytest_sessionfinish(trylast=True)`. This means user code stays unchanged and
76
+ DeepEval's own behavior (printing results, returning eval objects) is preserved.
77
+ - **Defensive translators.** DeepEval's Pydantic schema evolves between releases. Use
78
+ `getattr(obj, "field", default)` everywhere and keep golden-file fixtures under
79
+ `tests/fixtures/` so the weekly compat job catches breaking changes early.
80
+ - **Offline queue.** Network failures write to `~/.testrelic/queue/`; calling
81
+ `testrelic drain` retries.
82
+ - **Disabled-by-default when no key.** If `TESTRELIC_API_KEY` and the credentials file
83
+ are both absent, the plugin logs a single info line and otherwise no-ops. Test
84
+ runs never fail because of TestRelic.
85
+
86
+ ## Conventions
87
+
88
+ - `from __future__ import annotations` at the top of every module
89
+ - Imports sorted by ruff (`I` rule)
90
+ - Public surface re-exported via `src/testrelic/__init__.py`
91
+ - Pydantic models use camelCase field names directly (matching API JSON) with
92
+ `populate_by_name=True` and `extra="allow"` so the SDK degrades gracefully when
93
+ the server adds new optional fields
94
+ - Don't comment inside functions unless explaining non-obvious behavior
95
+
96
+ ## Adding a new TestRelic endpoint
97
+
98
+ 1. Add the request/response model under `src/testrelic/models/`
99
+ 2. Add a thin function under the relevant module (`datasets/`, `traces/`, ...) that
100
+ constructs the model, calls `HttpClient.post|put|get`, and returns either the raw
101
+ dict or a parsed model
102
+ 3. Cover happy path + error envelope handling with a `respx`-mocked test
103
+ 4. Re-export the public surface from `__init__.py`
104
+
105
+ ## Adding a new DeepEval field
106
+
107
+ 1. Update the JSON fixture under `tests/fixtures/` to include the field
108
+ 2. Update the matching translator (`translators/test_case.py` or `metric_data.py`)
109
+ 3. Add an assertion to `tests/test_translators.py`
110
+ 4. Bump CHANGELOG, no need to bump SDK version unless the API contract changes
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 TestRelic
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,184 @@
1
+ Metadata-Version: 2.4
2
+ Name: testrelic-deepeval
3
+ Version: 0.1.0
4
+ Summary: TestRelic SDK for DeepEval - LLM evaluation results land in TestRelic instead of Confident AI.
5
+ Project-URL: Homepage, https://testrelic.ai
6
+ Project-URL: Documentation, https://docs.testrelic.ai/deepeval
7
+ Project-URL: Repository, https://github.com/testrelic-ai/testrelic-python-sdk
8
+ Author-email: TestRelic <engineering@testrelic.ai>
9
+ License: MIT
10
+ License-File: LICENSE
11
+ Keywords: deepeval,evaluation,llm,testing,testrelic
12
+ Classifier: Development Status :: 3 - Alpha
13
+ Classifier: Framework :: Pytest
14
+ Classifier: Programming Language :: Python :: 3
15
+ Classifier: Programming Language :: Python :: 3.9
16
+ Classifier: Programming Language :: Python :: 3.10
17
+ Classifier: Programming Language :: Python :: 3.11
18
+ Classifier: Programming Language :: Python :: 3.12
19
+ Requires-Python: >=3.9
20
+ Requires-Dist: httpx<1.0,>=0.27
21
+ Requires-Dist: platformdirs>=4.0
22
+ Requires-Dist: pydantic<3.0,>=2.6
23
+ Requires-Dist: tomli-w>=1.0
24
+ Requires-Dist: tomli>=2.0; python_version < '3.11'
25
+ Requires-Dist: typer<1.0,>=0.12
26
+ Provides-Extra: deepeval
27
+ Requires-Dist: deepeval<4.0,>=2.0; extra == 'deepeval'
28
+ Provides-Extra: dev
29
+ Requires-Dist: deepeval<4.0,>=2.0; extra == 'dev'
30
+ Requires-Dist: mypy>=1.10; extra == 'dev'
31
+ Requires-Dist: pytest-cov>=5.0; extra == 'dev'
32
+ Requires-Dist: pytest>=8.0; extra == 'dev'
33
+ Requires-Dist: respx>=0.21; extra == 'dev'
34
+ Requires-Dist: ruff>=0.6; extra == 'dev'
35
+ Provides-Extra: otel
36
+ Requires-Dist: opentelemetry-api>=1.25; extra == 'otel'
37
+ Requires-Dist: opentelemetry-sdk>=1.25; extra == 'otel'
38
+ Description-Content-Type: text/markdown
39
+
40
+ # testrelic-deepeval
41
+
42
+ The TestRelic Python SDK for [DeepEval](https://github.com/confident-ai/deepeval). Capture
43
+ LLM evaluation results from your existing DeepEval test suite and ship them to your
44
+ TestRelic workspace in one line of install.
45
+
46
+ ```bash
47
+ pip install testrelic-deepeval
48
+ testrelic login
49
+ deepeval test run tests/
50
+ ```
51
+
52
+ That's it. The pytest plugin captures DeepEval's in-memory `TestRun` at session finish
53
+ and uploads cases + metrics to TestRelic's `/api/v1/evals/*` endpoints.
54
+
55
+ ## What this is (and isn't)
56
+
57
+ - **Is**: a pytest plugin + Python client for sending DeepEval results to TestRelic
58
+ - **Is**: a drop-in `testrelic.evaluate()` wrapper for programmatic eval runs
59
+ - **Is**: a CLI (`testrelic login`, `testrelic test`, `testrelic view`)
60
+ - **Is not**: a replacement for DeepEval (use DeepEval to author tests; we just receive
61
+ the results)
62
+ - **Is not**: a Confident AI proxy — we use TestRelic's own API, not Confident's
63
+
64
+ ## Install
65
+
66
+ ```bash
67
+ pip install testrelic-deepeval # core
68
+ pip install "testrelic-deepeval[deepeval]" # also pull deepeval if not already installed
69
+ pip install "testrelic-deepeval[otel]" # OTel tracing (Phase 4, preview)
70
+ ```
71
+
72
+ Supported Python versions: 3.9, 3.10, 3.11, 3.12.
73
+
74
+ ## Authenticate
75
+
76
+ ```bash
77
+ testrelic login --api-key tr_yourkey_here
78
+ ```
79
+
80
+ Credentials are stored at `~/.testrelic/credentials.toml` with `0600` permissions on
81
+ POSIX. Or pass credentials via environment:
82
+
83
+ ```bash
84
+ export TESTRELIC_API_KEY=tr_yourkey
85
+ export TESTRELIC_BASE_URL=https://platform.testrelic.ai/api/v1/evals # only override if self-hosted
86
+ ```
87
+
88
+ Precedence (highest to lowest): explicit `configure()` args → env vars → credentials
89
+ file → built-in defaults.
90
+
91
+ ## Use with pytest
92
+
93
+ No code changes needed. After install + login:
94
+
95
+ ```bash
96
+ deepeval test run tests/
97
+ # or, if you want the wrapper that double-checks credentials:
98
+ testrelic test tests/
99
+ ```
100
+
101
+ Every test run uploads as a new eval run in TestRelic, annotated with the current
102
+ branch, commit, and CI run URL (auto-detected for GitHub Actions, GitLab, Jenkins,
103
+ CircleCI, and Buildkite).
104
+
105
+ ## Use programmatically
106
+
107
+ ```python
108
+ from deepeval.test_case import LLMTestCase
109
+ from deepeval.metrics import AnswerRelevancyMetric
110
+ from testrelic import evaluate
111
+
112
+ results = evaluate(
113
+ test_cases=[LLMTestCase(input="Hi", actual_output="Hello")],
114
+ metrics=[AnswerRelevancyMetric(threshold=0.7)],
115
+ )
116
+ # results is whatever deepeval.evaluate() returns; upload is automatic
117
+ ```
118
+
119
+ ## Datasets
120
+
121
+ ```python
122
+ from testrelic import datasets
123
+
124
+ # Pull a dataset version into a deepeval EvaluationDataset
125
+ ds = datasets.pull("customer-support-qa", label="latest")
126
+
127
+ # Push goldens up to a new version
128
+ datasets.push(
129
+ alias="customer-support-qa",
130
+ goldens=[{"input": "...", "expected_output": "..."}],
131
+ label="v2",
132
+ description="Refreshed Q1 2026 examples",
133
+ )
134
+ ```
135
+
136
+ ## Migrating from Confident AI
137
+
138
+ See [docs/migration-from-confident-ai.md](docs/migration-from-confident-ai.md). The
139
+ TL;DR is:
140
+
141
+ 1. `pip install testrelic-deepeval`
142
+ 2. `testrelic login`
143
+ 3. Remove `CONFIDENT_API_KEY` from CI
144
+ 4. Run your existing tests unchanged
145
+
146
+ Or, in one shot: `testrelic migrate-from-confident`.
147
+
148
+ ## Offline / flaky networks
149
+
150
+ Failed uploads land in `~/.testrelic/queue/`. Replay them with:
151
+
152
+ ```bash
153
+ testrelic drain
154
+ ```
155
+
156
+ The plugin never fails your test run because of an upload error — uploads run at
157
+ `pytest_sessionfinish(trylast=True)` and swallow exceptions with a warning log.
158
+
159
+ ## CLI
160
+
161
+ | Command | Purpose |
162
+ |---|---|
163
+ | `testrelic login` | Save credentials |
164
+ | `testrelic logout` | Remove credentials |
165
+ | `testrelic test <path>` | Wrap `deepeval test run` with credential check |
166
+ | `testrelic view [run_id]` | Open latest (or specific) eval run in browser |
167
+ | `testrelic drain` | Replay queued offline uploads |
168
+ | `testrelic version` | Print SDK version |
169
+ | `testrelic migrate-from-confident` | Print migration steps |
170
+
171
+ ## Development
172
+
173
+ ```bash
174
+ git clone https://github.com/testrelic-ai/testrelic-python-sdk
175
+ cd testrelic-python-sdk
176
+ pip install -e ".[dev]"
177
+ ruff check src tests
178
+ mypy src/testrelic
179
+ pytest
180
+ ```
181
+
182
+ ## License
183
+
184
+ MIT. See [LICENSE](LICENSE).
@@ -0,0 +1,145 @@
1
+ # testrelic-deepeval
2
+
3
+ The TestRelic Python SDK for [DeepEval](https://github.com/confident-ai/deepeval). Capture
4
+ LLM evaluation results from your existing DeepEval test suite and ship them to your
5
+ TestRelic workspace in one line of install.
6
+
7
+ ```bash
8
+ pip install testrelic-deepeval
9
+ testrelic login
10
+ deepeval test run tests/
11
+ ```
12
+
13
+ That's it. The pytest plugin captures DeepEval's in-memory `TestRun` at session finish
14
+ and uploads cases + metrics to TestRelic's `/api/v1/evals/*` endpoints.
15
+
16
+ ## What this is (and isn't)
17
+
18
+ - **Is**: a pytest plugin + Python client for sending DeepEval results to TestRelic
19
+ - **Is**: a drop-in `testrelic.evaluate()` wrapper for programmatic eval runs
20
+ - **Is**: a CLI (`testrelic login`, `testrelic test`, `testrelic view`)
21
+ - **Is not**: a replacement for DeepEval (use DeepEval to author tests; we just receive
22
+ the results)
23
+ - **Is not**: a Confident AI proxy — we use TestRelic's own API, not Confident's
24
+
25
+ ## Install
26
+
27
+ ```bash
28
+ pip install testrelic-deepeval # core
29
+ pip install "testrelic-deepeval[deepeval]" # also pull deepeval if not already installed
30
+ pip install "testrelic-deepeval[otel]" # OTel tracing (Phase 4, preview)
31
+ ```
32
+
33
+ Supported Python versions: 3.9, 3.10, 3.11, 3.12.
34
+
35
+ ## Authenticate
36
+
37
+ ```bash
38
+ testrelic login --api-key tr_yourkey_here
39
+ ```
40
+
41
+ Credentials are stored at `~/.testrelic/credentials.toml` with `0600` permissions on
42
+ POSIX. Or pass credentials via environment:
43
+
44
+ ```bash
45
+ export TESTRELIC_API_KEY=tr_yourkey
46
+ export TESTRELIC_BASE_URL=https://platform.testrelic.ai/api/v1/evals # only override if self-hosted
47
+ ```
48
+
49
+ Precedence (highest to lowest): explicit `configure()` args → env vars → credentials
50
+ file → built-in defaults.
51
+
52
+ ## Use with pytest
53
+
54
+ No code changes needed. After install + login:
55
+
56
+ ```bash
57
+ deepeval test run tests/
58
+ # or, if you want the wrapper that double-checks credentials:
59
+ testrelic test tests/
60
+ ```
61
+
62
+ Every test run uploads as a new eval run in TestRelic, annotated with the current
63
+ branch, commit, and CI run URL (auto-detected for GitHub Actions, GitLab, Jenkins,
64
+ CircleCI, and Buildkite).
65
+
66
+ ## Use programmatically
67
+
68
+ ```python
69
+ from deepeval.test_case import LLMTestCase
70
+ from deepeval.metrics import AnswerRelevancyMetric
71
+ from testrelic import evaluate
72
+
73
+ results = evaluate(
74
+ test_cases=[LLMTestCase(input="Hi", actual_output="Hello")],
75
+ metrics=[AnswerRelevancyMetric(threshold=0.7)],
76
+ )
77
+ # results is whatever deepeval.evaluate() returns; upload is automatic
78
+ ```
79
+
80
+ ## Datasets
81
+
82
+ ```python
83
+ from testrelic import datasets
84
+
85
+ # Pull a dataset version into a deepeval EvaluationDataset
86
+ ds = datasets.pull("customer-support-qa", label="latest")
87
+
88
+ # Push goldens up to a new version
89
+ datasets.push(
90
+ alias="customer-support-qa",
91
+ goldens=[{"input": "...", "expected_output": "..."}],
92
+ label="v2",
93
+ description="Refreshed Q1 2026 examples",
94
+ )
95
+ ```
96
+
97
+ ## Migrating from Confident AI
98
+
99
+ See [docs/migration-from-confident-ai.md](docs/migration-from-confident-ai.md). The
100
+ TL;DR is:
101
+
102
+ 1. `pip install testrelic-deepeval`
103
+ 2. `testrelic login`
104
+ 3. Remove `CONFIDENT_API_KEY` from CI
105
+ 4. Run your existing tests unchanged
106
+
107
+ Or, in one shot: `testrelic migrate-from-confident`.
108
+
109
+ ## Offline / flaky networks
110
+
111
+ Failed uploads land in `~/.testrelic/queue/`. Replay them with:
112
+
113
+ ```bash
114
+ testrelic drain
115
+ ```
116
+
117
+ The plugin never fails your test run because of an upload error — uploads run at
118
+ `pytest_sessionfinish(trylast=True)` and swallow exceptions with a warning log.
119
+
120
+ ## CLI
121
+
122
+ | Command | Purpose |
123
+ |---|---|
124
+ | `testrelic login` | Save credentials |
125
+ | `testrelic logout` | Remove credentials |
126
+ | `testrelic test <path>` | Wrap `deepeval test run` with credential check |
127
+ | `testrelic view [run_id]` | Open latest (or specific) eval run in browser |
128
+ | `testrelic drain` | Replay queued offline uploads |
129
+ | `testrelic version` | Print SDK version |
130
+ | `testrelic migrate-from-confident` | Print migration steps |
131
+
132
+ ## Development
133
+
134
+ ```bash
135
+ git clone https://github.com/testrelic-ai/testrelic-python-sdk
136
+ cd testrelic-python-sdk
137
+ pip install -e ".[dev]"
138
+ ruff check src tests
139
+ mypy src/testrelic
140
+ pytest
141
+ ```
142
+
143
+ ## License
144
+
145
+ MIT. See [LICENSE](LICENSE).