testmind 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- testmind-0.1.0/.github/workflows/ci.yml +31 -0
- testmind-0.1.0/.github/workflows/publish.yml +51 -0
- testmind-0.1.0/.gitignore +10 -0
- testmind-0.1.0/.python-version +1 -0
- testmind-0.1.0/CHANGELOG.md +25 -0
- testmind-0.1.0/LICENSE +21 -0
- testmind-0.1.0/PKG-INFO +531 -0
- testmind-0.1.0/README.md +505 -0
- testmind-0.1.0/main.py +9 -0
- testmind-0.1.0/pyproject.toml +61 -0
- testmind-0.1.0/src/testmind/analysis/flaky.py +96 -0
- testmind-0.1.0/src/testmind/analysis/models.py +62 -0
- testmind-0.1.0/src/testmind/analysis/predictor.py +101 -0
- testmind-0.1.0/src/testmind/analysis/regression.py +153 -0
- testmind-0.1.0/src/testmind/analysis/stability.py +99 -0
- testmind-0.1.0/src/testmind/cli/app.py +293 -0
- testmind-0.1.0/src/testmind/domain/models.py +60 -0
- testmind-0.1.0/src/testmind/parsers/base.py +9 -0
- testmind-0.1.0/src/testmind/parsers/html_parser.py +233 -0
- testmind-0.1.0/src/testmind/parsers/junit_parser.py +109 -0
- testmind-0.1.0/src/testmind/reports/formatters.py +162 -0
- testmind-0.1.0/src/testmind/reports/summary.py +109 -0
- testmind-0.1.0/src/testmind/storage/base.py +49 -0
- testmind-0.1.0/src/testmind/storage/sqlite_store.py +229 -0
- testmind-0.1.0/src/testmind/utils/tools.py +7 -0
- testmind-0.1.0/testmind.code-workspace +7 -0
- testmind-0.1.0/tests/__init__.py +0 -0
- testmind-0.1.0/tests/analysis/__init__.py +0 -0
- testmind-0.1.0/tests/analysis/test_flaky.py +123 -0
- testmind-0.1.0/tests/analysis/test_predictor.py +132 -0
- testmind-0.1.0/tests/analysis/test_regression.py +157 -0
- testmind-0.1.0/tests/analysis/test_stability.py +128 -0
- testmind-0.1.0/tests/cli/__init__.py +0 -0
- testmind-0.1.0/tests/cli/test_cli.py +470 -0
- testmind-0.1.0/tests/parsers/__init__.py +0 -0
- testmind-0.1.0/tests/parsers/test_html_parser.py +314 -0
- testmind-0.1.0/tests/parsers/test_junit_parser.py +375 -0
- testmind-0.1.0/tests/reports/__init__.py +0 -0
- testmind-0.1.0/tests/reports/test_formatters.py +224 -0
- testmind-0.1.0/tests/reports/test_summary.py +236 -0
- testmind-0.1.0/tests/storage/__init__.py +0 -0
- testmind-0.1.0/tests/storage/test_sqlite_store.py +334 -0
- testmind-0.1.0/uv.lock +375 -0
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
name: CI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
branches: [main, develop]
|
|
6
|
+
pull_request:
|
|
7
|
+
branches: [main]
|
|
8
|
+
|
|
9
|
+
jobs:
|
|
10
|
+
test:
|
|
11
|
+
runs-on: ubuntu-latest
|
|
12
|
+
strategy:
|
|
13
|
+
matrix:
|
|
14
|
+
python-version: ["3.13"]
|
|
15
|
+
|
|
16
|
+
steps:
|
|
17
|
+
- uses: actions/checkout@v4
|
|
18
|
+
|
|
19
|
+
- name: Install uv
|
|
20
|
+
uses: astral-sh/setup-uv@v5
|
|
21
|
+
with:
|
|
22
|
+
version: "latest"
|
|
23
|
+
|
|
24
|
+
- name: Set up Python ${{ matrix.python-version }}
|
|
25
|
+
run: uv python install ${{ matrix.python-version }}
|
|
26
|
+
|
|
27
|
+
- name: Install dependencies
|
|
28
|
+
run: uv sync --all-groups
|
|
29
|
+
|
|
30
|
+
- name: Run tests
|
|
31
|
+
run: uv run pytest --cov=src/testmind --cov-report=term-missing
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
name: Publish to PyPI
|
|
2
|
+
|
|
3
|
+
on:
|
|
4
|
+
push:
|
|
5
|
+
tags:
|
|
6
|
+
- "v*.*.*"
|
|
7
|
+
|
|
8
|
+
jobs:
|
|
9
|
+
build:
|
|
10
|
+
runs-on: ubuntu-latest
|
|
11
|
+
steps:
|
|
12
|
+
- uses: actions/checkout@v4
|
|
13
|
+
|
|
14
|
+
- name: Install uv
|
|
15
|
+
uses: astral-sh/setup-uv@v5
|
|
16
|
+
with:
|
|
17
|
+
version: "latest"
|
|
18
|
+
|
|
19
|
+
- name: Set up Python
|
|
20
|
+
run: uv python install 3.13
|
|
21
|
+
|
|
22
|
+
- name: Install dependencies
|
|
23
|
+
run: uv sync --all-groups
|
|
24
|
+
|
|
25
|
+
- name: Run tests
|
|
26
|
+
run: uv run pytest
|
|
27
|
+
|
|
28
|
+
- name: Build package
|
|
29
|
+
run: uv build
|
|
30
|
+
|
|
31
|
+
- name: Upload dist artifacts
|
|
32
|
+
uses: actions/upload-artifact@v4
|
|
33
|
+
with:
|
|
34
|
+
name: dist
|
|
35
|
+
path: dist/
|
|
36
|
+
|
|
37
|
+
publish:
|
|
38
|
+
needs: build
|
|
39
|
+
runs-on: ubuntu-latest
|
|
40
|
+
environment: pypi
|
|
41
|
+
permissions:
|
|
42
|
+
id-token: write # required for Trusted Publisher (OIDC)
|
|
43
|
+
steps:
|
|
44
|
+
- name: Download dist artifacts
|
|
45
|
+
uses: actions/download-artifact@v4
|
|
46
|
+
with:
|
|
47
|
+
name: dist
|
|
48
|
+
path: dist/
|
|
49
|
+
|
|
50
|
+
- name: Publish to PyPI
|
|
51
|
+
uses: pypa/gh-action-pypi-publish@release/v1
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
3.13
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
All notable changes to this project will be documented in this file.
|
|
4
|
+
|
|
5
|
+
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
|
|
6
|
+
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
7
|
+
|
|
8
|
+
## [Unreleased]
|
|
9
|
+
|
|
10
|
+
## [0.1.0] - 2026-03-08
|
|
11
|
+
|
|
12
|
+
### Added
|
|
13
|
+
- JUnit XML parser with support for bulk ingestion of historical reports
|
|
14
|
+
- SQLite-backed storage with SHA-256 deduplication
|
|
15
|
+
- Flaky test detection (flip rate + fail rate thresholds)
|
|
16
|
+
- Regression detection (stable reference window vs recent failures)
|
|
17
|
+
- Suite-wide failure spike detection (z-score baseline comparison)
|
|
18
|
+
- Stability index (0–100 composite score per test)
|
|
19
|
+
- Failure prediction with trend classification (degrading / stable / improving)
|
|
20
|
+
- CLI commands: `ingest`, `analyze`, `projects`, `history`
|
|
21
|
+
- Text and JSON output formats
|
|
22
|
+
- Python library API — all components importable and composable independently
|
|
23
|
+
|
|
24
|
+
[Unreleased]: https://github.com/Slaaayer/testmind/compare/v0.1.0...HEAD
|
|
25
|
+
[0.1.0]: https://github.com/Slaaayer/testmind/releases/tag/v0.1.0
|
testmind-0.1.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 TestMind Contributors
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
testmind-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,531 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: testmind
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: CLI tool to ingest test reports, detect flaky/regression patterns, and predict failures.
|
|
5
|
+
Project-URL: Homepage, https://github.com/Slaaayer/testmind
|
|
6
|
+
Project-URL: Repository, https://github.com/Slaaayer/testmind
|
|
7
|
+
Project-URL: Bug Tracker, https://github.com/Slaaayer/testmind/issues
|
|
8
|
+
Author: TestMind Contributors
|
|
9
|
+
License: MIT
|
|
10
|
+
License-File: LICENSE
|
|
11
|
+
Keywords: ci,flaky-tests,junit,pytest,quality,regression,test-reports,testing
|
|
12
|
+
Classifier: Development Status :: 3 - Alpha
|
|
13
|
+
Classifier: Environment :: Console
|
|
14
|
+
Classifier: Intended Audience :: Developers
|
|
15
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
16
|
+
Classifier: Operating System :: OS Independent
|
|
17
|
+
Classifier: Programming Language :: Python :: 3
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
19
|
+
Classifier: Topic :: Software Development :: Testing
|
|
20
|
+
Classifier: Topic :: Utilities
|
|
21
|
+
Requires-Python: >=3.13
|
|
22
|
+
Requires-Dist: beautifulsoup4>=4.12
|
|
23
|
+
Requires-Dist: pydantic>=2.12.5
|
|
24
|
+
Requires-Dist: typer>=0.23.1
|
|
25
|
+
Description-Content-Type: text/markdown
|
|
26
|
+
|
|
27
|
+
# TestMind
|
|
28
|
+
|
|
29
|
+
A CLI tool and Python library for ingesting test reports, detecting patterns
|
|
30
|
+
(flaky tests, regressions, spikes), and predicting failures based on historical
|
|
31
|
+
execution data.
|
|
32
|
+
|
|
33
|
+
Supports **JUnit XML** today, with the parser interface open to CSV, HTML, and
|
|
34
|
+
other formats.
|
|
35
|
+
|
|
36
|
+
---
|
|
37
|
+
|
|
38
|
+
## Installation
|
|
39
|
+
|
|
40
|
+
```bash
|
|
41
|
+
# Clone and install in editable mode
|
|
42
|
+
git clone <repo>
|
|
43
|
+
cd testmind
|
|
44
|
+
uv sync # installs all dependencies + the testmind CLI
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
The `testmind` command is registered as a script entry point and is available
|
|
48
|
+
immediately after installation.
|
|
49
|
+
|
|
50
|
+
```bash
|
|
51
|
+
testmind --help
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
---
|
|
55
|
+
|
|
56
|
+
## Quick start
|
|
57
|
+
|
|
58
|
+
```bash
|
|
59
|
+
# First time: bulk-load historical reports to get meaningful analysis immediately
|
|
60
|
+
testmind ingest reports/history/*.xml --project my-service
|
|
61
|
+
|
|
62
|
+
# Day-to-day: ingest the latest run
|
|
63
|
+
testmind ingest reports/junit.xml --project my-service
|
|
64
|
+
|
|
65
|
+
# Check which projects you are tracking
|
|
66
|
+
testmind projects
|
|
67
|
+
|
|
68
|
+
# Re-run analysis on the latest stored run
|
|
69
|
+
testmind analyze my-service
|
|
70
|
+
|
|
71
|
+
# Browse the run history
|
|
72
|
+
testmind history my-service
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
By default the database lives at `~/.testmind/testmind.db`.
|
|
76
|
+
Override it with `--db <path>` or the `TESTMIND_DB` environment variable.
|
|
77
|
+
|
|
78
|
+
---
|
|
79
|
+
|
|
80
|
+
## Commands
|
|
81
|
+
|
|
82
|
+
### `ingest` — parse, store, analyse
|
|
83
|
+
|
|
84
|
+
```
|
|
85
|
+
testmind ingest <FILE> [FILE ...] --project <NAME> [OPTIONS]
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
Accepts **one or more** JUnit XML files. Each file is parsed, stored, and
|
|
89
|
+
counted. After all files are processed a single analysis summary is printed,
|
|
90
|
+
covering the full available history.
|
|
91
|
+
|
|
92
|
+
This makes it possible to bootstrap a project on the first run by pointing at
|
|
93
|
+
an archive of historical reports — patterns like flaky tests or regressions are
|
|
94
|
+
only detectable once enough history exists, so bulk-loading is the recommended
|
|
95
|
+
first step.
|
|
96
|
+
|
|
97
|
+
Each file is processed independently: a parse error on one file prints a
|
|
98
|
+
warning and moves on; the command only exits with code 1 if **every** file
|
|
99
|
+
fails. Duplicate reports (same content hash) are silently skipped, so running
|
|
100
|
+
the same command twice is always safe.
|
|
101
|
+
|
|
102
|
+
| Option | Default | Description |
|
|
103
|
+
|---|---|---|
|
|
104
|
+
| `--project / -p` | required | Project name to track the run under |
|
|
105
|
+
| `--format / -f` | `text` | Output format: `text` or `json` |
|
|
106
|
+
| `--db` | `~/.testmind/testmind.db` | SQLite database file |
|
|
107
|
+
| `--limit / -n` | `30` | Max historical reports loaded for analysis |
|
|
108
|
+
|
|
109
|
+
```bash
|
|
110
|
+
# First run: load a full archive to seed history
|
|
111
|
+
testmind ingest reports/history/*.xml --project payments-service
|
|
112
|
+
|
|
113
|
+
# Day-to-day: ingest the latest CI run
|
|
114
|
+
testmind ingest build/reports/TEST-suite.xml --project payments-service
|
|
115
|
+
|
|
116
|
+
# JSON output — useful in CI pipelines
|
|
117
|
+
testmind ingest reports/junit.xml --project auth-service --format json
|
|
118
|
+
|
|
119
|
+
# Project-scoped database
|
|
120
|
+
testmind ingest reports/*.xml --project orders --db ./data/orders.db
|
|
121
|
+
|
|
122
|
+
# Override DB via env var
|
|
123
|
+
TESTMIND_DB=./ci.db testmind ingest reports/junit.xml --project api
|
|
124
|
+
```
|
|
125
|
+
|
|
126
|
+
**Example output for a bulk ingest:**
|
|
127
|
+
|
|
128
|
+
```
|
|
129
|
+
Ingesting 5 reports for project 'payments-service'...
|
|
130
|
+
[1/5] TEST-2024-01-01.xml stored 'nightly-2024-01-01' [87✓ 3✗ 2⊘ 0!]
|
|
131
|
+
[2/5] TEST-2024-01-02.xml stored 'nightly-2024-01-02' [90✓ 0✗ 2⊘ 0!]
|
|
132
|
+
[3/5] TEST-2024-01-03.xml stored 'nightly-2024-01-03' [88✓ 2✗ 2⊘ 0!]
|
|
133
|
+
[4/5] TEST-2024-01-04.xml stored 'nightly-2024-01-04' [91✓ 0✗ 1⊘ 0!]
|
|
134
|
+
[5/5] TEST-2024-01-05.xml stored 'nightly-2024-01-05' [85✓ 5✗ 2⊘ 0!]
|
|
135
|
+
|
|
136
|
+
5 stored.
|
|
137
|
+
|
|
138
|
+
TestMind Report — project: payments-service
|
|
139
|
+
Run: nightly-2024-01-05 | 2024-01-05 10:00:00 UTC | Duration: 12.34s
|
|
140
|
+
...
|
|
141
|
+
```
|
|
142
|
+
|
|
143
|
+
---
|
|
144
|
+
|
|
145
|
+
### `analyze` — re-run analysis on the latest run
|
|
146
|
+
|
|
147
|
+
```
|
|
148
|
+
testmind analyze <PROJECT> [OPTIONS]
|
|
149
|
+
```
|
|
150
|
+
|
|
151
|
+
Runs the full analysis pipeline against the most recent stored run without
|
|
152
|
+
re-parsing anything. Useful when you want to re-inspect results after changing
|
|
153
|
+
thresholds or after more history has accumulated.
|
|
154
|
+
|
|
155
|
+
| Option | Default | Description |
|
|
156
|
+
|---|---|---|
|
|
157
|
+
| `--format / -f` | `text` | `text` or `json` |
|
|
158
|
+
| `--db` | `~/.testmind/testmind.db` | SQLite database file |
|
|
159
|
+
| `--limit / -n` | `30` | Max historical reports loaded |
|
|
160
|
+
|
|
161
|
+
```bash
|
|
162
|
+
testmind analyze payments-service
|
|
163
|
+
testmind analyze payments-service --format json | jq '.flaky'
|
|
164
|
+
```
|
|
165
|
+
|
|
166
|
+
---
|
|
167
|
+
|
|
168
|
+
### `projects` — list tracked projects
|
|
169
|
+
|
|
170
|
+
```
|
|
171
|
+
testmind projects [--db <path>]
|
|
172
|
+
```
|
|
173
|
+
|
|
174
|
+
Prints a table of all projects with their run count and the timestamp of the
|
|
175
|
+
most recent run.
|
|
176
|
+
|
|
177
|
+
```
|
|
178
|
+
Project Reports Latest run
|
|
179
|
+
----------------------------------------------------------------------
|
|
180
|
+
auth-service 12 2024-06-15 09:45
|
|
181
|
+
orders-service 8 2024-06-14 22:10
|
|
182
|
+
payments-service 31 2024-06-15 10:00
|
|
183
|
+
```
|
|
184
|
+
|
|
185
|
+
---
|
|
186
|
+
|
|
187
|
+
### `history` — browse run history
|
|
188
|
+
|
|
189
|
+
```
|
|
190
|
+
testmind history <PROJECT> [--limit N] [--db <path>]
|
|
191
|
+
```
|
|
192
|
+
|
|
193
|
+
Prints a chronological table (newest first) of all stored runs for a project.
|
|
194
|
+
|
|
195
|
+
```
|
|
196
|
+
History for 'payments-service' (showing 5 run(s))
|
|
197
|
+
|
|
198
|
+
Run Timestamp Pass Fail Skip Err Duration
|
|
199
|
+
--------------------------------------------------------------------------------------------------
|
|
200
|
+
nightly-2024-06-15 2024-06-15 10:00:00 87 3 2 0 12.34s
|
|
201
|
+
nightly-2024-06-14 2024-06-14 10:00:01 90 0 2 0 11.90s
|
|
202
|
+
nightly-2024-06-13 2024-06-13 10:00:00 88 2 2 0 12.01s
|
|
203
|
+
```
|
|
204
|
+
|
|
205
|
+
```bash
|
|
206
|
+
testmind history payments-service --limit 5
|
|
207
|
+
testmind history payments-service --limit 100 --db ./archive.db
|
|
208
|
+
```
|
|
209
|
+
|
|
210
|
+
---
|
|
211
|
+
|
|
212
|
+
## Output formats
|
|
213
|
+
|
|
214
|
+
### Text (default)
|
|
215
|
+
|
|
216
|
+
The text report is structured in sections, printed only when there is
|
|
217
|
+
something to show.
|
|
218
|
+
|
|
219
|
+
```
|
|
220
|
+
TestMind Report — project: payments-service
|
|
221
|
+
Run: nightly-2024-06-15 | 2024-06-15 10:00:00 UTC | Duration: 12.34s
|
|
222
|
+
────────────────────────────────────────────────────────────
|
|
223
|
+
OVERVIEW
|
|
224
|
+
Total: 92 Passed: 87 Failed: 3 Skipped: 2 Errors: 0
|
|
225
|
+
Pass rate: 94.6% Fail rate: 3.3%
|
|
226
|
+
|
|
227
|
+
FLAKY TESTS (2)
|
|
228
|
+
test_process_refund flip=70.0% fail=40.0% runs=10
|
|
229
|
+
test_currency_conversion flip=60.0% fail=30.0% runs=10
|
|
230
|
+
|
|
231
|
+
REGRESSIONS (1)
|
|
232
|
+
test_checkout_timeout ref_pass=100.0% recent_fail=66.7%
|
|
233
|
+
|
|
234
|
+
STABILITY INDEX (worst 10 of 87 tests)
|
|
235
|
+
Test Score Pass Consist Flips
|
|
236
|
+
test_process_refund 38.0 60.0% 95.0% 70.0%
|
|
237
|
+
test_currency_conversion 44.0 70.0% 92.0% 60.0%
|
|
238
|
+
...
|
|
239
|
+
|
|
240
|
+
FAILURE PREDICTIONS (top 10 by risk)
|
|
241
|
+
Test Prob Trend Confidence
|
|
242
|
+
test_checkout_timeout 78.0% degrading 55.0%
|
|
243
|
+
test_process_refund 45.0% stable 50.0%
|
|
244
|
+
...
|
|
245
|
+
|
|
246
|
+
ISSUES: 2 flaky | 1 regression(s) | 0 spike(s)
|
|
247
|
+
```
|
|
248
|
+
|
|
249
|
+
A **spike banner** is injected at the top when a sudden suite-wide failure
|
|
250
|
+
surge is detected:
|
|
251
|
+
|
|
252
|
+
```
|
|
253
|
+
FAILURE SPIKE DETECTED
|
|
254
|
+
Current fail rate : 48.0%
|
|
255
|
+
Baseline : 3.2% ± 1.1%
|
|
256
|
+
Z-score : 40.73
|
|
257
|
+
```
|
|
258
|
+
|
|
259
|
+
### JSON
|
|
260
|
+
|
|
261
|
+
Pass `--format json` to get a machine-readable object. Useful for piping into
|
|
262
|
+
`jq`, posting to Slack, or feeding downstream tools.
|
|
263
|
+
|
|
264
|
+
```json
|
|
265
|
+
{
|
|
266
|
+
"project": "payments-service",
|
|
267
|
+
"report": {
|
|
268
|
+
"id": "a3f9c...",
|
|
269
|
+
"name": "nightly-2024-06-15",
|
|
270
|
+
"timestamp": "2024-06-15T10:00:00+00:00",
|
|
271
|
+
"duration": 12.34,
|
|
272
|
+
"passed": 87,
|
|
273
|
+
"failed": 3,
|
|
274
|
+
"skipped": 2,
|
|
275
|
+
"errors": 0,
|
|
276
|
+
"total": 92,
|
|
277
|
+
"pass_rate": 0.9457,
|
|
278
|
+
"fail_rate": 0.0326
|
|
279
|
+
},
|
|
280
|
+
"issues": {
|
|
281
|
+
"flaky_count": 2,
|
|
282
|
+
"regression_count": 1,
|
|
283
|
+
"spike_detected": false
|
|
284
|
+
},
|
|
285
|
+
"flaky": [
|
|
286
|
+
{
|
|
287
|
+
"test_name": "test_process_refund",
|
|
288
|
+
"is_flaky": true,
|
|
289
|
+
"flip_rate": 0.7,
|
|
290
|
+
"pass_rate": 0.6,
|
|
291
|
+
"fail_rate": 0.4,
|
|
292
|
+
"run_count": 10,
|
|
293
|
+
"insufficient_data": false
|
|
294
|
+
}
|
|
295
|
+
],
|
|
296
|
+
"regressions": [ ... ],
|
|
297
|
+
"spike": null,
|
|
298
|
+
"stability": [ ... ],
|
|
299
|
+
"predictions": [ ... ]
|
|
300
|
+
}
|
|
301
|
+
```
|
|
302
|
+
|
|
303
|
+
```bash
|
|
304
|
+
# Extract only flaky tests from a CI run
|
|
305
|
+
testmind ingest reports/junit.xml --project api --format json \
|
|
306
|
+
| tail -n +2 \
|
|
307
|
+
| jq '[.flaky[] | {test: .test_name, flip_rate: .flip_rate}]'
|
|
308
|
+
|
|
309
|
+
# Fail CI if regressions are detected
|
|
310
|
+
COUNT=$(testmind analyze my-service --format json | jq '.issues.regression_count')
|
|
311
|
+
[ "$COUNT" -gt 0 ] && exit 1
|
|
312
|
+
```
|
|
313
|
+
|
|
314
|
+
---
|
|
315
|
+
|
|
316
|
+
## Python library usage
|
|
317
|
+
|
|
318
|
+
Every component is importable and composable independently.
|
|
319
|
+
|
|
320
|
+
### Parse a report
|
|
321
|
+
|
|
322
|
+
```python
|
|
323
|
+
from testmind.parsers.junit_parser import JUnitParser
|
|
324
|
+
|
|
325
|
+
parser = JUnitParser()
|
|
326
|
+
report = parser.parse("reports/junit.xml", project="my-service")
|
|
327
|
+
|
|
328
|
+
print(report.name, report.pass_rate, report.fail_rate)
|
|
329
|
+
for test in report.tests:
|
|
330
|
+
print(test.name, test.status, test.duration)
|
|
331
|
+
```
|
|
332
|
+
|
|
333
|
+
### Store and retrieve history
|
|
334
|
+
|
|
335
|
+
```python
|
|
336
|
+
from testmind.storage.sqlite_store import SQLiteStore
|
|
337
|
+
|
|
338
|
+
store = SQLiteStore("~/.testmind/my-service.db")
|
|
339
|
+
store.save_report(report)
|
|
340
|
+
|
|
341
|
+
# All runs for a project, newest first
|
|
342
|
+
reports = store.get_reports("my-service", limit=20)
|
|
343
|
+
|
|
344
|
+
# Per-test history across runs: list[(datetime, TestResult)]
|
|
345
|
+
history = store.get_test_history("my-service", "test_checkout", limit=30)
|
|
346
|
+
|
|
347
|
+
store.close()
|
|
348
|
+
```
|
|
349
|
+
|
|
350
|
+
### Run individual analysers
|
|
351
|
+
|
|
352
|
+
```python
|
|
353
|
+
from testmind.analysis.flaky import FlakyDetector
|
|
354
|
+
from testmind.analysis.regression import RegressionDetector, SpikeDetector
|
|
355
|
+
from testmind.analysis.stability import StabilityAnalyzer
|
|
356
|
+
from testmind.analysis.predictor import FailurePredictor
|
|
357
|
+
|
|
358
|
+
history = store.get_test_history("my-service", "test_checkout", limit=30)
|
|
359
|
+
|
|
360
|
+
flaky = FlakyDetector().analyze("test_checkout", history)
|
|
361
|
+
regr = RegressionDetector().analyze("test_checkout", history)
|
|
362
|
+
stable = StabilityAnalyzer().analyze("test_checkout", history)
|
|
363
|
+
pred = FailurePredictor().analyze("test_checkout", history)
|
|
364
|
+
|
|
365
|
+
print(flaky.is_flaky, flaky.flip_rate)
|
|
366
|
+
print(regr.is_regression)
|
|
367
|
+
print(stable.score) # 0–100
|
|
368
|
+
print(pred.failure_probability, pred.trend)
|
|
369
|
+
```
|
|
370
|
+
|
|
371
|
+
### Generate a full summary
|
|
372
|
+
|
|
373
|
+
```python
|
|
374
|
+
from testmind.reports.summary import Summarizer
|
|
375
|
+
from testmind.reports.formatters import TextFormatter, JsonFormatter
|
|
376
|
+
|
|
377
|
+
# report must already be saved in the store
|
|
378
|
+
summarizer = Summarizer(history_limit=30)
|
|
379
|
+
summary = summarizer.summarize("my-service", store)
|
|
380
|
+
|
|
381
|
+
print(TextFormatter().format(summary))
|
|
382
|
+
print(JsonFormatter().format(summary))
|
|
383
|
+
```
|
|
384
|
+
|
|
385
|
+
---
|
|
386
|
+
|
|
387
|
+
## Under the hood
|
|
388
|
+
|
|
389
|
+
### Storage
|
|
390
|
+
|
|
391
|
+
All data is persisted in a **SQLite database** (stdlib `sqlite3`, no ORM).
|
|
392
|
+
Two tables:
|
|
393
|
+
|
|
394
|
+
- `reports` — one row per ingested run (name, project, timestamp, pass/fail/skip/error counts, duration)
|
|
395
|
+
- `test_results` — one row per test case, linked to its report
|
|
396
|
+
|
|
397
|
+
Reports are deduplicated by a **SHA-256 content hash** derived from project
|
|
398
|
+
name, duration, timestamp, and test count. Ingesting the same file twice is
|
|
399
|
+
always safe.
|
|
400
|
+
|
|
401
|
+
### Pattern detection
|
|
402
|
+
|
|
403
|
+
All analysers operate on the per-test history: a list of
|
|
404
|
+
`(timestamp, TestResult)` pairs retrieved from the store. They require a
|
|
405
|
+
minimum number of runs before drawing conclusions (`insufficient_data=True`
|
|
406
|
+
is returned otherwise).
|
|
407
|
+
|
|
408
|
+
#### Flaky test
|
|
409
|
+
|
|
410
|
+
A test is flaky when it produces **mixed results** without a clear directional
|
|
411
|
+
trend.
|
|
412
|
+
|
|
413
|
+
```
|
|
414
|
+
is_flaky = fail_rate ∈ (0.10, 0.90) # not consistently passing or failing
|
|
415
|
+
AND flip_rate > 0.15 # consecutive outcomes differ often
|
|
416
|
+
|
|
417
|
+
flip_rate = |{consecutive pairs that differ}| / (n - 1)
|
|
418
|
+
```
|
|
419
|
+
|
|
420
|
+
Default minimum: **5 runs**.
|
|
421
|
+
|
|
422
|
+
#### Regression
|
|
423
|
+
|
|
424
|
+
A test is a regression when it was **stable and has recently broken**.
|
|
425
|
+
|
|
426
|
+
```
|
|
427
|
+
reference window = all runs except the last 3
|
|
428
|
+
recent window = last 3 runs
|
|
429
|
+
|
|
430
|
+
is_regression = reference_pass_rate >= 0.90 # was stable
|
|
431
|
+
AND recent_fail_rate >= 0.60 # now failing
|
|
432
|
+
```
|
|
433
|
+
|
|
434
|
+
Default minimum: **6 runs total**.
|
|
435
|
+
|
|
436
|
+
#### Spike
|
|
437
|
+
|
|
438
|
+
A spike is a sudden **suite-wide** increase in failure rate in the latest run
|
|
439
|
+
compared to the rolling baseline.
|
|
440
|
+
|
|
441
|
+
```
|
|
442
|
+
baseline = fail_rate of all previous runs in the window
|
|
443
|
+
z_score = (current_fail_rate - baseline_mean) / baseline_std
|
|
444
|
+
|
|
445
|
+
is_spike = z_score >= 2.0 AND current_fail_rate > baseline_mean
|
|
446
|
+
```
|
|
447
|
+
|
|
448
|
+
Requires at least **3 baseline reports**.
|
|
449
|
+
|
|
450
|
+
#### Stability index (0 – 100)
|
|
451
|
+
|
|
452
|
+
A composite score per test:
|
|
453
|
+
|
|
454
|
+
```
|
|
455
|
+
score = pass_rate × 60
|
|
456
|
+
+ duration_consistency × 20
|
|
457
|
+
+ (1 − flip_rate) × 20
|
|
458
|
+
|
|
459
|
+
duration_consistency = 1 − min(CV, 1)
|
|
460
|
+
where CV = std(durations) / mean(durations)
|
|
461
|
+
```
|
|
462
|
+
|
|
463
|
+
A perfectly stable test (always passes, consistent timing, never flips) scores
|
|
464
|
+
**100**. A consistently failing test with stable timing scores **40**. A
|
|
465
|
+
maximally flaky test scores near **0**.
|
|
466
|
+
|
|
467
|
+
#### Failure prediction
|
|
468
|
+
|
|
469
|
+
A lightweight trend model — no external dependencies, no ML framework.
|
|
470
|
+
|
|
471
|
+
```
|
|
472
|
+
1. Encode each run as 1.0 (fail/error) or 0.0 (pass/skip).
|
|
473
|
+
2. Fit an OLS linear regression on the sequence (index → outcome).
|
|
474
|
+
3. Predict next value = mean(last 3 outcomes) + slope.
|
|
475
|
+
4. Clamp to [0, 1].
|
|
476
|
+
|
|
477
|
+
slope > +0.05 → DEGRADING
|
|
478
|
+
slope < −0.05 → IMPROVING
|
|
479
|
+
otherwise → STABLE
|
|
480
|
+
|
|
481
|
+
confidence = min(run_count / 20, 1.0)
|
|
482
|
+
```
|
|
483
|
+
|
|
484
|
+
### Architecture
|
|
485
|
+
|
|
486
|
+
```
|
|
487
|
+
src/testmind/
|
|
488
|
+
├── domain/
|
|
489
|
+
│ └── models.py TestResult, TestReport, TestStatus
|
|
490
|
+
├── parsers/
|
|
491
|
+
│ ├── base.py Abstract ReportParser
|
|
492
|
+
│ └── junit_parser.py JUnit XML parser
|
|
493
|
+
├── storage/
|
|
494
|
+
│ ├── base.py Abstract Store
|
|
495
|
+
│ └── sqlite_store.py SQLite implementation
|
|
496
|
+
├── analysis/
|
|
497
|
+
│ ├── models.py Result dataclasses + Trend enum
|
|
498
|
+
│ ├── flaky.py FlakyDetector
|
|
499
|
+
│ ├── regression.py RegressionDetector, SpikeDetector
|
|
500
|
+
│ ├── stability.py StabilityAnalyzer
|
|
501
|
+
│ └── predictor.py FailurePredictor
|
|
502
|
+
├── reports/
|
|
503
|
+
│ ├── summary.py RunSummary, Summarizer
|
|
504
|
+
│ └── formatters.py TextFormatter, JsonFormatter
|
|
505
|
+
└── cli/
|
|
506
|
+
└── app.py Typer CLI (ingest, analyze, projects, history)
|
|
507
|
+
```
|
|
508
|
+
|
|
509
|
+
---
|
|
510
|
+
|
|
511
|
+
## Running tests
|
|
512
|
+
|
|
513
|
+
```bash
|
|
514
|
+
uv run pytest # all 173 tests
|
|
515
|
+
uv run pytest tests/parsers/
|
|
516
|
+
uv run pytest tests/analysis/
|
|
517
|
+
uv run pytest tests/storage/
|
|
518
|
+
uv run pytest tests/reports/
|
|
519
|
+
uv run pytest tests/cli/
|
|
520
|
+
uv run pytest --cov=src/testmind --cov-report=term-missing
|
|
521
|
+
```
|
|
522
|
+
|
|
523
|
+
---
|
|
524
|
+
|
|
525
|
+
## Configuration reference
|
|
526
|
+
|
|
527
|
+
| Env var | CLI flag | Default | Description |
|
|
528
|
+
|---|---|---|---|
|
|
529
|
+
| `TESTMIND_DB` | `--db` | `~/.testmind/testmind.db` | Path to the SQLite database |
|
|
530
|
+
| — | `--format` | `text` | Output format for `ingest` and `analyze` |
|
|
531
|
+
| — | `--limit` | `30` | Max historical reports loaded per analysis |
|