vector-engine 1.0.0__tar.gz → 1.0.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. vector_engine-1.0.2/PKG-INFO +202 -0
  2. vector_engine-1.0.2/README.md +184 -0
  3. {vector_engine-1.0.0 → vector_engine-1.0.2}/pyproject.toml +5 -5
  4. vector_engine-1.0.2/tests/test_artifact_contracts.py +205 -0
  5. {vector_engine-1.0.0 → vector_engine-1.0.2}/tests/test_credibility_audit.py +34 -10
  6. vector_engine-1.0.2/tests/test_dataset_benchmark_tooling.py +191 -0
  7. vector_engine-1.0.2/tests/test_env_diagnostics.py +18 -0
  8. vector_engine-1.0.2/tests/test_eval_surface_v1.py +77 -0
  9. vector_engine-1.0.2/tests/test_ingest_pipeline.py +83 -0
  10. vector_engine-1.0.2/tests/test_install_smoke.py +7 -0
  11. vector_engine-1.0.2/tests/test_perf_smoke.py +17 -0
  12. vector_engine-1.0.2/tests/test_profile_local.py +46 -0
  13. {vector_engine-1.0.0 → vector_engine-1.0.2}/tests/test_real_corpus_eval.py +44 -0
  14. {vector_engine-1.0.0 → vector_engine-1.0.2}/tests/test_v02_features.py +46 -1
  15. {vector_engine-1.0.0 → vector_engine-1.0.2}/vector_engine/eval/__init__.py +2 -0
  16. {vector_engine-1.0.0 → vector_engine-1.0.2}/vector_engine/eval/retrieval.py +96 -0
  17. vector_engine-1.0.2/vector_engine.egg-info/PKG-INFO +202 -0
  18. {vector_engine-1.0.0 → vector_engine-1.0.2}/vector_engine.egg-info/SOURCES.txt +7 -0
  19. {vector_engine-1.0.0 → vector_engine-1.0.2}/vector_engine.egg-info/requires.txt +3 -3
  20. vector_engine-1.0.0/PKG-INFO +0 -342
  21. vector_engine-1.0.0/README.md +0 -324
  22. vector_engine-1.0.0/tests/test_artifact_contracts.py +0 -83
  23. vector_engine-1.0.0/vector_engine.egg-info/PKG-INFO +0 -342
  24. {vector_engine-1.0.0 → vector_engine-1.0.2}/LICENSE +0 -0
  25. {vector_engine-1.0.0 → vector_engine-1.0.2}/setup.cfg +0 -0
  26. {vector_engine-1.0.0 → vector_engine-1.0.2}/tests/test_api_stability.py +0 -0
  27. {vector_engine-1.0.0 → vector_engine-1.0.2}/tests/test_core.py +0 -0
  28. {vector_engine-1.0.0 → vector_engine-1.0.2}/tests/test_faiss_optional.py +0 -0
  29. {vector_engine-1.0.0 → vector_engine-1.0.2}/tests/test_hardening.py +0 -0
  30. {vector_engine-1.0.0 → vector_engine-1.0.2}/tests/test_ml_eval.py +0 -0
  31. {vector_engine-1.0.0 → vector_engine-1.0.2}/tests/test_persistence_compat.py +0 -0
  32. {vector_engine-1.0.0 → vector_engine-1.0.2}/tests/test_rag_reliability.py +0 -0
  33. {vector_engine-1.0.0 → vector_engine-1.0.2}/tests/test_release_bundle.py +0 -0
  34. {vector_engine-1.0.0 → vector_engine-1.0.2}/vector_engine/__init__.py +0 -0
  35. {vector_engine-1.0.0 → vector_engine-1.0.2}/vector_engine/array.py +0 -0
  36. {vector_engine-1.0.0 → vector_engine-1.0.2}/vector_engine/backends/__init__.py +0 -0
  37. {vector_engine-1.0.0 → vector_engine-1.0.2}/vector_engine/backends/base.py +0 -0
  38. {vector_engine-1.0.0 → vector_engine-1.0.2}/vector_engine/backends/bruteforce.py +0 -0
  39. {vector_engine-1.0.0 → vector_engine-1.0.2}/vector_engine/backends/faiss_backend.py +0 -0
  40. {vector_engine-1.0.0 → vector_engine-1.0.2}/vector_engine/backends/registry.py +0 -0
  41. {vector_engine-1.0.0 → vector_engine-1.0.2}/vector_engine/index.py +0 -0
  42. {vector_engine-1.0.0 → vector_engine-1.0.2}/vector_engine/io/__init__.py +0 -0
  43. {vector_engine-1.0.0 → vector_engine-1.0.2}/vector_engine/io/manifest.py +0 -0
  44. {vector_engine-1.0.0 → vector_engine-1.0.2}/vector_engine/metric.py +0 -0
  45. {vector_engine-1.0.0 → vector_engine-1.0.2}/vector_engine/ml/__init__.py +0 -0
  46. {vector_engine-1.0.0 → vector_engine-1.0.2}/vector_engine/ml/clustering.py +0 -0
  47. {vector_engine-1.0.0 → vector_engine-1.0.2}/vector_engine/ml/knn.py +0 -0
  48. {vector_engine-1.0.0 → vector_engine-1.0.2}/vector_engine/results.py +0 -0
  49. {vector_engine-1.0.0 → vector_engine-1.0.2}/vector_engine/training/__init__.py +0 -0
  50. {vector_engine-1.0.0 → vector_engine-1.0.2}/vector_engine/training/hard_negative.py +0 -0
  51. {vector_engine-1.0.0 → vector_engine-1.0.2}/vector_engine.egg-info/dependency_links.txt +0 -0
  52. {vector_engine-1.0.0 → vector_engine-1.0.2}/vector_engine.egg-info/top_level.txt +0 -0
@@ -0,0 +1,202 @@
1
+ Metadata-Version: 2.4
2
+ Name: vector-engine
3
+ Version: 1.0.2
4
+ Summary: ML-first vector computation and retrieval engine.
5
+ Author: Neel Panchal
6
+ License-Expression: MIT
7
+ Requires-Python: >=3.10
8
+ Description-Content-Type: text/markdown
9
+ License-File: LICENSE
10
+ Requires-Dist: numpy<2.0,>=1.26.4
11
+ Provides-Extra: faiss
12
+ Requires-Dist: faiss-cpu>=1.7.4; (platform_system != "Darwin" or platform_machine != "arm64") and extra == "faiss"
13
+ Provides-Extra: ml
14
+ Requires-Dist: scikit-learn<1.7,>=1.4; extra == "ml"
15
+ Provides-Extra: dev
16
+ Requires-Dist: pytest<9,>=8.2; extra == "dev"
17
+ Dynamic: license-file
18
+
19
+ # Vector Engine v1.0.0
20
+
21
+ Reproducibility-first vector retrieval toolkit for local ML and IR workflows.
22
+
23
+ Vector Engine provides a clean Python API for vector indexing/search, evaluation, training utilities, and evidence-oriented benchmarking on a single machine.
24
+
25
+ ## Why Vector Engine
26
+
27
+ - ANN libraries are fast but often backend-specific and low-level.
28
+ - Vector databases focus on serving and infra, not local experimentation loops.
29
+ - ML teams still need one local toolkit for ingest, retrieval, evaluation, and reproducibility.
30
+
31
+ Vector Engine focuses on that local workflow and keeps evidence outputs machine-checkable.
32
+
33
+ ## Start Here
34
+
35
+ ```bash
36
+ python3 -m venv .venv
37
+ source .venv/bin/activate
38
+ python -m pip install --upgrade pip && python -m pip install vector-engine
39
+ ```
40
+
41
+ ## Install Options
42
+
43
+ PyPI:
44
+
45
+ ```bash
46
+ python3 -m venv .venv
47
+ source .venv/bin/activate
48
+ python -m pip install --upgrade pip
49
+ python -m pip install vector-engine
50
+ ```
51
+
52
+ Local development:
53
+
54
+ ```bash
55
+ python3 -m venv .venv
56
+ source .venv/bin/activate
57
+ python -m pip install --upgrade pip setuptools wheel
58
+ python -m pip install -e ".[dev,ml]"
59
+ python -m pytest -q
60
+ ```
61
+
62
+ macOS arm64 + Python 3.12 constrained setup:
63
+
64
+ ```bash
65
+ python3.12 -m venv .venv312
66
+ source .venv312/bin/activate
67
+ python -m pip install --upgrade pip setuptools wheel
68
+ python -m pip install -c requirements/constraints-macos-arm64-py312.txt -e ".[dev,ml]"
69
+ python -m pytest -q
70
+ ```
71
+
72
+ Optional FAISS extra:
73
+
74
+ ```bash
75
+ python -m pip install -e ".[faiss]"
76
+ ```
77
+
78
+ If you hit `externally-managed-environment`, use a virtual environment as shown above.
79
+
80
+ ## 60-Second Quickstart
81
+
82
+ ```python
83
+ import numpy as np
84
+ from vector_engine import VectorArray, VectorIndex
85
+
86
+ xb = VectorArray.from_numpy(
87
+ np.random.randn(1000, 384).astype("float32"),
88
+ ids=[f"doc-{i}" for i in range(1000)],
89
+ normalize=True,
90
+ )
91
+ xq = VectorArray.from_numpy(np.random.randn(2, 384).astype("float32"), normalize=True)
92
+
93
+ index = VectorIndex.create(xb, metric="cosine", backend="bruteforce")
94
+ results = index.search(xq, k=5)
95
+ print(results.ids[0], results.scores[0])
96
+ ```
97
+
98
+ ## v1.0.0 Surface
99
+
100
+ - Core: `VectorArray`, `VectorIndex`, `Metric`, `SearchResult`
101
+ - ML: `knn_classify`, `knn_regress`, `kmeans`, `KMeansResult`
102
+ - Training: `mine_hard_negatives`, `TripletBatch`
103
+ - Eval: `precision_at_k`, `recall_at_k`, `ndcg_at_k`, `retrieval_report`, `retrieval_report_detailed`, `batch_metrics_summary`, `retrieval_cohort_report`
104
+ - Ingest/connectors: `load_numpy_bundle`, `load_jsonl_bundle`, `load_parquet_bundle`, `with_deterministic_splits`, `scripts/ingest_dataset.py`
105
+
106
+ ## API Contract Highlights
107
+
108
+ - `VectorArray` requires non-empty 2D tensors `(n, d)` and unique `int`/`str` IDs.
109
+ - `VectorIndex.search(..., k=...)` requires positive integer `k`.
110
+ - Metadata lengths align with vector row counts.
111
+ - `kmeans(..., random_state=...)` validates finite vectors and deterministic seeds.
112
+ - Retrieval evaluation validates malformed ground truth with stable `eval_error` prefixes.
113
+
114
+ ## Data Ingest to Eval Recipe
115
+
116
+ 1) Build a reproducible ingest bundle from JSONL:
117
+
118
+ ```bash
119
+ python scripts/ingest_dataset.py \
120
+ --input-jsonl artifacts/raw/source.jsonl \
121
+ --output-dir artifacts/ingest_bundle \
122
+ --id-field id \
123
+ --text-field text \
124
+ --embedding-dim 256 \
125
+ --seed 7 \
126
+ --label-field label \
127
+ --split-field split \
128
+ --query-group-field query_group \
129
+ --ground-truth-field ground_truth
130
+ ```
131
+
132
+ 1) Run retrieval evaluation:
133
+
134
+ ```bash
135
+ python scripts/rag_real_corpus_eval.py \
136
+ --embeddings artifacts/ingest_bundle/embeddings.npy \
137
+ --query-embeddings artifacts/repro_smoke/real_corpus_inputs/query_embeddings.npy \
138
+ --ids artifacts/ingest_bundle/ids.json \
139
+ --ground-truth artifacts/ingest_bundle/ground_truth.json \
140
+ --metadata artifacts/ingest_bundle/metadata.json \
141
+ --output artifacts/real_corpus_runs/run_1.json \
142
+ --backend bruteforce \
143
+ --k 6 \
144
+ --ks 1,3,6 \
145
+ --loops 5 \
146
+ --threshold-recall 0.75 \
147
+ --threshold-ndcg 0.70 \
148
+ --threshold-p95-ms 120
149
+ ```
150
+
151
+ Bundle outputs include:
152
+
153
+ - `embeddings.npy`, `ids.json`, `metadata.json`
154
+ - optional `labels.json`, `splits.json`, `query_groups.json`, `ground_truth.json`
155
+ - `ingest_manifest.v1.json` (contract-validated)
156
+
157
+ ## Backends
158
+
159
+ | Backend | Search | Add | Save/Load | Custom Metric |
160
+ | --- | ---: | ---: | ---: | ---: |
161
+ | `bruteforce` | yes | yes | yes | yes |
162
+ | `faiss` | yes | yes | yes | no |
163
+
164
+ FAISS is optional. The required reproducibility path is bruteforce-safe.
165
+
166
+ ## Reproducibility and Evidence
167
+
168
+ Recommended release evidence flow:
169
+
170
+ ```bash
171
+ python scripts/repro_smoke.py --output-dir artifacts/repro_smoke
172
+ python scripts/benchmark_matrix.py --mode exact --warmup 2 --loops 8 --seed 7 --output-dir artifacts/benchmark_matrix
173
+ python scripts/publishable_results.py --matrix-summary artifacts/benchmark_matrix/matrix_summary.json --stability-summary artifacts/testing_runs/stability_summary_bruteforce_200.json --output artifacts/benchmark_matrix/publishable_results.v1.json
174
+ python scripts/credibility_audit.py --matrix-summary artifacts/benchmark_matrix/matrix_summary.json --stability-summary artifacts/testing_runs/stability_summary_bruteforce_200.json --publishable-summary artifacts/benchmark_matrix/publishable_results.v1.json --output artifacts/audit/credibility_audit.v1.json
175
+ ```
176
+
177
+ ## Examples
178
+
179
+ - `examples/minimal_rag_integration.py`
180
+ - `examples/hard_negative_training_batch.py`
181
+ - `examples/cohort_eval_workflow.py`
182
+ - `notebooks/01_semantic_search.ipynb`
183
+ - `notebooks/02_knn_baseline.ipynb`
184
+ - `notebooks/03_recommender_similarity.ipynb`
185
+
186
+ ## Troubleshooting
187
+
188
+ - **`externally-managed-environment`**: install inside a venv.
189
+ - **No FAISS available**: run bruteforce path and skip overlap-gated FAISS checks.
190
+ - **Dimension mismatch**: ensure query and index embeddings share the same dimension.
191
+ - **NumPy segfault on macOS/Python 3.12**: reinstall with `requirements/constraints-macos-arm64-py312.txt` and run `python scripts/env_diagnostics.py`.
192
+
193
+ ## Project Links
194
+
195
+ - `docs/releases/v1.0.0.md`
196
+ - `docs/releases/v1.0.0-checklist.md`
197
+ - `docs/reproducibility.md`
198
+ - `docs/use_cases.md`
199
+ - `docs/api_stability.md`
200
+ - `docs/research_claims.md`
201
+ - `LICENSE`
202
+ - `CITATION.cff`
@@ -0,0 +1,184 @@
1
+ # Vector Engine v1.0.0
2
+
3
+ Reproducibility-first vector retrieval toolkit for local ML and IR workflows.
4
+
5
+ Vector Engine provides a clean Python API for vector indexing/search, evaluation, training utilities, and evidence-oriented benchmarking on a single machine.
6
+
7
+ ## Why Vector Engine
8
+
9
+ - ANN libraries are fast but often backend-specific and low-level.
10
+ - Vector databases focus on serving and infra, not local experimentation loops.
11
+ - ML teams still need one local toolkit for ingest, retrieval, evaluation, and reproducibility.
12
+
13
+ Vector Engine focuses on that local workflow and keeps evidence outputs machine-checkable.
14
+
15
+ ## Start Here
16
+
17
+ ```bash
18
+ python3 -m venv .venv
19
+ source .venv/bin/activate
20
+ python -m pip install --upgrade pip && python -m pip install vector-engine
21
+ ```
22
+
23
+ ## Install Options
24
+
25
+ PyPI:
26
+
27
+ ```bash
28
+ python3 -m venv .venv
29
+ source .venv/bin/activate
30
+ python -m pip install --upgrade pip
31
+ python -m pip install vector-engine
32
+ ```
33
+
34
+ Local development:
35
+
36
+ ```bash
37
+ python3 -m venv .venv
38
+ source .venv/bin/activate
39
+ python -m pip install --upgrade pip setuptools wheel
40
+ python -m pip install -e ".[dev,ml]"
41
+ python -m pytest -q
42
+ ```
43
+
44
+ macOS arm64 + Python 3.12 constrained setup:
45
+
46
+ ```bash
47
+ python3.12 -m venv .venv312
48
+ source .venv312/bin/activate
49
+ python -m pip install --upgrade pip setuptools wheel
50
+ python -m pip install -c requirements/constraints-macos-arm64-py312.txt -e ".[dev,ml]"
51
+ python -m pytest -q
52
+ ```
53
+
54
+ Optional FAISS extra:
55
+
56
+ ```bash
57
+ python -m pip install -e ".[faiss]"
58
+ ```
59
+
60
+ If you hit `externally-managed-environment`, use a virtual environment as shown above.
61
+
62
+ ## 60-Second Quickstart
63
+
64
+ ```python
65
+ import numpy as np
66
+ from vector_engine import VectorArray, VectorIndex
67
+
68
+ xb = VectorArray.from_numpy(
69
+ np.random.randn(1000, 384).astype("float32"),
70
+ ids=[f"doc-{i}" for i in range(1000)],
71
+ normalize=True,
72
+ )
73
+ xq = VectorArray.from_numpy(np.random.randn(2, 384).astype("float32"), normalize=True)
74
+
75
+ index = VectorIndex.create(xb, metric="cosine", backend="bruteforce")
76
+ results = index.search(xq, k=5)
77
+ print(results.ids[0], results.scores[0])
78
+ ```
79
+
80
+ ## v1.0.0 Surface
81
+
82
+ - Core: `VectorArray`, `VectorIndex`, `Metric`, `SearchResult`
83
+ - ML: `knn_classify`, `knn_regress`, `kmeans`, `KMeansResult`
84
+ - Training: `mine_hard_negatives`, `TripletBatch`
85
+ - Eval: `precision_at_k`, `recall_at_k`, `ndcg_at_k`, `retrieval_report`, `retrieval_report_detailed`, `batch_metrics_summary`, `retrieval_cohort_report`
86
+ - Ingest/connectors: `load_numpy_bundle`, `load_jsonl_bundle`, `load_parquet_bundle`, `with_deterministic_splits`, `scripts/ingest_dataset.py`
87
+
88
+ ## API Contract Highlights
89
+
90
+ - `VectorArray` requires non-empty 2D tensors `(n, d)` and unique `int`/`str` IDs.
91
+ - `VectorIndex.search(..., k=...)` requires positive integer `k`.
92
+ - Metadata lengths align with vector row counts.
93
+ - `kmeans(..., random_state=...)` validates finite vectors and deterministic seeds.
94
+ - Retrieval evaluation validates malformed ground truth with stable `eval_error` prefixes.
95
+
96
+ ## Data Ingest to Eval Recipe
97
+
98
+ 1) Build a reproducible ingest bundle from JSONL:
99
+
100
+ ```bash
101
+ python scripts/ingest_dataset.py \
102
+ --input-jsonl artifacts/raw/source.jsonl \
103
+ --output-dir artifacts/ingest_bundle \
104
+ --id-field id \
105
+ --text-field text \
106
+ --embedding-dim 256 \
107
+ --seed 7 \
108
+ --label-field label \
109
+ --split-field split \
110
+ --query-group-field query_group \
111
+ --ground-truth-field ground_truth
112
+ ```
113
+
114
+ 1) Run retrieval evaluation:
115
+
116
+ ```bash
117
+ python scripts/rag_real_corpus_eval.py \
118
+ --embeddings artifacts/ingest_bundle/embeddings.npy \
119
+ --query-embeddings artifacts/repro_smoke/real_corpus_inputs/query_embeddings.npy \
120
+ --ids artifacts/ingest_bundle/ids.json \
121
+ --ground-truth artifacts/ingest_bundle/ground_truth.json \
122
+ --metadata artifacts/ingest_bundle/metadata.json \
123
+ --output artifacts/real_corpus_runs/run_1.json \
124
+ --backend bruteforce \
125
+ --k 6 \
126
+ --ks 1,3,6 \
127
+ --loops 5 \
128
+ --threshold-recall 0.75 \
129
+ --threshold-ndcg 0.70 \
130
+ --threshold-p95-ms 120
131
+ ```
132
+
133
+ Bundle outputs include:
134
+
135
+ - `embeddings.npy`, `ids.json`, `metadata.json`
136
+ - optional `labels.json`, `splits.json`, `query_groups.json`, `ground_truth.json`
137
+ - `ingest_manifest.v1.json` (contract-validated)
138
+
139
+ ## Backends
140
+
141
+ | Backend | Search | Add | Save/Load | Custom Metric |
142
+ | --- | ---: | ---: | ---: | ---: |
143
+ | `bruteforce` | yes | yes | yes | yes |
144
+ | `faiss` | yes | yes | yes | no |
145
+
146
+ FAISS is optional. The required reproducibility path is bruteforce-safe.
147
+
148
+ ## Reproducibility and Evidence
149
+
150
+ Recommended release evidence flow:
151
+
152
+ ```bash
153
+ python scripts/repro_smoke.py --output-dir artifacts/repro_smoke
154
+ python scripts/benchmark_matrix.py --mode exact --warmup 2 --loops 8 --seed 7 --output-dir artifacts/benchmark_matrix
155
+ python scripts/publishable_results.py --matrix-summary artifacts/benchmark_matrix/matrix_summary.json --stability-summary artifacts/testing_runs/stability_summary_bruteforce_200.json --output artifacts/benchmark_matrix/publishable_results.v1.json
156
+ python scripts/credibility_audit.py --matrix-summary artifacts/benchmark_matrix/matrix_summary.json --stability-summary artifacts/testing_runs/stability_summary_bruteforce_200.json --publishable-summary artifacts/benchmark_matrix/publishable_results.v1.json --output artifacts/audit/credibility_audit.v1.json
157
+ ```
158
+
159
+ ## Examples
160
+
161
+ - `examples/minimal_rag_integration.py`
162
+ - `examples/hard_negative_training_batch.py`
163
+ - `examples/cohort_eval_workflow.py`
164
+ - `notebooks/01_semantic_search.ipynb`
165
+ - `notebooks/02_knn_baseline.ipynb`
166
+ - `notebooks/03_recommender_similarity.ipynb`
167
+
168
+ ## Troubleshooting
169
+
170
+ - **`externally-managed-environment`**: install inside a venv.
171
+ - **No FAISS available**: run bruteforce path and skip overlap-gated FAISS checks.
172
+ - **Dimension mismatch**: ensure query and index embeddings share the same dimension.
173
+ - **NumPy segfault on macOS/Python 3.12**: reinstall with `requirements/constraints-macos-arm64-py312.txt` and run `python scripts/env_diagnostics.py`.
174
+
175
+ ## Project Links
176
+
177
+ - `docs/releases/v1.0.0.md`
178
+ - `docs/releases/v1.0.0-checklist.md`
179
+ - `docs/reproducibility.md`
180
+ - `docs/use_cases.md`
181
+ - `docs/api_stability.md`
182
+ - `docs/research_claims.md`
183
+ - `LICENSE`
184
+ - `CITATION.cff`
@@ -4,16 +4,16 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "vector-engine"
7
- version = "1.0.0"
7
+ version = "1.0.2"
8
8
  description = "ML-first vector computation and retrieval engine."
9
9
  readme = "README.md"
10
10
  requires-python = ">=3.10"
11
11
  license = "MIT"
12
12
  authors = [
13
- { name = "Vector Engine Contributors" }
13
+ { name = "Neel Panchal" }
14
14
  ]
15
15
  dependencies = [
16
- "numpy>=1.24",
16
+ "numpy>=1.26.4,<2.0",
17
17
  ]
18
18
 
19
19
  [project.optional-dependencies]
@@ -21,10 +21,10 @@ faiss = [
21
21
  "faiss-cpu>=1.7.4; platform_system != 'Darwin' or platform_machine != 'arm64'",
22
22
  ]
23
23
  ml = [
24
- "scikit-learn>=1.3",
24
+ "scikit-learn>=1.4,<1.7",
25
25
  ]
26
26
  dev = [
27
- "pytest>=7.4",
27
+ "pytest>=8.2,<9",
28
28
  ]
29
29
 
30
30
  [tool.setuptools.packages.find]
@@ -0,0 +1,205 @@
1
+ import pytest
2
+
3
+ from scripts.artifact_contracts import (
4
+ validate_benchmark_report,
5
+ validate_ingest_manifest,
6
+ validate_matrix_summary,
7
+ validate_publishable_summary,
8
+ validate_real_corpus_payload,
9
+ validate_stability_summary,
10
+ )
11
+
12
+
13
+ def test_validate_real_corpus_payload_accepts_valid_shape():
14
+ payload = {
15
+ "timestamp_utc": "2026-01-01T00:00:00+00:00",
16
+ "backend": "bruteforce",
17
+ "k": 10,
18
+ "ks": [1, 5, 10],
19
+ "metrics": {"recall@10": 1.0},
20
+ "performance": {"latency_p50_ms": 1.0, "latency_p95_ms": 2.0, "qps": 100.0},
21
+ "topk_ids": [["a", "b"]],
22
+ "runtime_seconds": 0.01,
23
+ "checks": {"recall_gate": True},
24
+ "environment": {"platform": "x", "python_version": "3.11", "machine": "x", "processor": "x"},
25
+ "inputs": {"embeddings_path": "a", "query_embeddings_path": "b", "ids_path": "c", "ground_truth_path": "d"},
26
+ "artifact_contract_version": "1.0",
27
+ }
28
+ validate_real_corpus_payload(payload)
29
+
30
+
31
+ def test_validate_real_corpus_payload_rejects_out_of_range_metrics():
32
+ payload = {
33
+ "timestamp_utc": "2026-01-01T00:00:00+00:00",
34
+ "backend": "bruteforce",
35
+ "k": 10,
36
+ "ks": [1, 5, 10],
37
+ "metrics": {"recall@10": 1.2},
38
+ "performance": {"latency_p50_ms": 1.0, "latency_p95_ms": 2.0, "qps": 100.0},
39
+ "topk_ids": [["a", "b"]],
40
+ "runtime_seconds": 0.01,
41
+ "checks": {"recall_gate": True},
42
+ "environment": {"platform": "x", "python_version": "3.11", "machine": "x", "processor": "x"},
43
+ "inputs": {"embeddings_path": "a", "query_embeddings_path": "b", "ids_path": "c", "ground_truth_path": "d"},
44
+ "artifact_contract_version": "1.0",
45
+ }
46
+ with pytest.raises(ValueError, match="contract_error"):
47
+ validate_real_corpus_payload(payload)
48
+
49
+
50
+ def test_validate_benchmark_report_rejects_missing_results():
51
+ payload = {
52
+ "timestamp_utc": "2026-01-01T00:00:00+00:00",
53
+ "config": {},
54
+ "environment": {},
55
+ "results": [],
56
+ "artifact_contract_version": "1.0",
57
+ }
58
+ with pytest.raises(ValueError, match="contract_error"):
59
+ validate_benchmark_report(payload)
60
+
61
+
62
+ def test_validate_benchmark_report_rejects_unknown_backend():
63
+ payload = {
64
+ "timestamp_utc": "2026-01-01T00:00:00+00:00",
65
+ "config": {"mode": "exact", "k": 10, "min_flat_overlap": None},
66
+ "environment": {},
67
+ "results": [
68
+ {
69
+ "backend": "custom_backend",
70
+ "qps": 1.0,
71
+ "latency_p50_ms": 1.0,
72
+ "latency_p95_ms": 2.0,
73
+ "overlap_vs_bruteforce": 1.0,
74
+ "memory_mb_estimate": 1.0,
75
+ }
76
+ ],
77
+ "artifact_contract_version": "1.0",
78
+ }
79
+ with pytest.raises(ValueError, match="contract_error"):
80
+ validate_benchmark_report(payload)
81
+
82
+
83
+ def test_validate_matrix_stability_publishable_contracts():
84
+ matrix_summary = {
85
+ "timestamp_utc": "2026-01-01T00:00:00+00:00",
86
+ "protocol": {
87
+ "profile": "dev",
88
+ "mode": "exact",
89
+ "warmup": 2,
90
+ "loops": 8,
91
+ "seed": 7,
92
+ "min_flat_overlap": None,
93
+ "max_memory_mb": 1024.0,
94
+ "matrix_size": 1,
95
+ },
96
+ "environment": {},
97
+ "matrix": [{"name": "s", "n": 1000, "d": 32, "nq": 64, "k": 5}],
98
+ "backend_summary": {
99
+ "bruteforce": {
100
+ "latency_p50_ms": {"mean": 1.0, "median": 1.0, "min": 1.0, "max": 1.0},
101
+ "latency_p95_ms": {"mean": 2.0, "median": 2.0, "min": 2.0, "max": 2.0},
102
+ "qps": {"mean": 100.0, "median": 100.0, "min": 90.0, "max": 110.0},
103
+ "overlap_vs_bruteforce": {"mean": 1.0, "median": 1.0, "min": 1.0, "max": 1.0},
104
+ }
105
+ },
106
+ "runs_dir": "artifacts/benchmark_matrix",
107
+ "artifact_contract_version": "1.0",
108
+ }
109
+ validate_matrix_summary(matrix_summary)
110
+
111
+ stability_summary = {
112
+ "timestamp_utc": "2026-01-01T00:00:00+00:00",
113
+ "run_count": 3,
114
+ "backend": "bruteforce",
115
+ "config": {},
116
+ "environment": {},
117
+ "performance_summary": {
118
+ "latency_p50_ms": {"mean": 1.0, "median": 1.0, "std": 0.0, "cv": 0.0, "p02_5": 1.0, "p97_5": 1.0, "min": 1.0, "max": 1.0},
119
+ "latency_p95_ms": {"mean": 1.0, "median": 1.0, "std": 0.0, "cv": 0.0, "p02_5": 1.0, "p97_5": 1.0, "min": 1.0, "max": 1.0},
120
+ "qps": {"mean": 1.0, "median": 1.0, "std": 0.0, "cv": 0.0, "p02_5": 1.0, "p97_5": 1.0, "min": 1.0, "max": 1.0},
121
+ },
122
+ "metric_summary": {"recall@1": {"mean": 1.0}},
123
+ "check_pass_rate": {},
124
+ "input_files": {},
125
+ "runs_path": "artifacts/testing_runs/runs.jsonl",
126
+ "artifact_contract_version": "1.0",
127
+ }
128
+ validate_stability_summary(stability_summary)
129
+
130
+ publishable = {
131
+ "generated_at_utc": "2026-01-01T00:00:00+00:00",
132
+ "sources": {"matrix_summary_path": "artifacts/benchmark_matrix/matrix_summary.json"},
133
+ "matrix_backend_summary": {"bruteforce": {"qps": {"mean": 1.0}}},
134
+ "stability_performance_summary": {"qps": {"mean": 1.0}},
135
+ "stability_metric_summary": {"recall@1": {"mean": 1.0}},
136
+ "protocol": {"matrix_protocol": {"mode": "exact"}, "stability_config": {"run_count": 3}},
137
+ "environment": {"matrix": {"platform": "x"}, "stability": {"platform": "x"}},
138
+ "artifact_contract_version": "1.0",
139
+ }
140
+ validate_publishable_summary(publishable)
141
+
142
+
143
+ def test_validate_ingest_manifest_accepts_valid_shape():
144
+ payload = {
145
+ "timestamp_utc": "2026-01-01T00:00:00+00:00",
146
+ "input_jsonl": "data/source.jsonl",
147
+ "output_dir": "artifacts/ingest",
148
+ "record_count": 10,
149
+ "embedding_dim": 64,
150
+ "provider": "hash",
151
+ "seed": 7,
152
+ "fields": {
153
+ "id_field": "id",
154
+ "text_field": "text",
155
+ "label_field": "label",
156
+ "split_field": "split",
157
+ "query_group_field": "query_group",
158
+ "ground_truth_field": "ground_truth",
159
+ },
160
+ "artifacts": {
161
+ "embeddings_path": "artifacts/ingest/embeddings.npy",
162
+ "ids_path": "artifacts/ingest/ids.json",
163
+ "metadata_path": "artifacts/ingest/metadata.json",
164
+ "labels_path": "artifacts/ingest/labels.json",
165
+ "splits_path": "artifacts/ingest/splits.json",
166
+ "query_groups_path": "artifacts/ingest/query_groups.json",
167
+ "ground_truth_path": "artifacts/ingest/ground_truth.json",
168
+ },
169
+ "environment": {"platform": "x", "python_version": "3.12", "machine": "arm64", "processor": "arm"},
170
+ "artifact_contract_version": "1.0",
171
+ }
172
+ validate_ingest_manifest(payload)
173
+
174
+
175
+ def test_validate_ingest_manifest_rejects_invalid_record_count():
176
+ payload = {
177
+ "timestamp_utc": "2026-01-01T00:00:00+00:00",
178
+ "input_jsonl": "data/source.jsonl",
179
+ "output_dir": "artifacts/ingest",
180
+ "record_count": 0,
181
+ "embedding_dim": 64,
182
+ "provider": "hash",
183
+ "seed": 7,
184
+ "fields": {
185
+ "id_field": "id",
186
+ "text_field": "text",
187
+ "label_field": None,
188
+ "split_field": None,
189
+ "query_group_field": None,
190
+ "ground_truth_field": None,
191
+ },
192
+ "artifacts": {
193
+ "embeddings_path": "artifacts/ingest/embeddings.npy",
194
+ "ids_path": "artifacts/ingest/ids.json",
195
+ "metadata_path": "artifacts/ingest/metadata.json",
196
+ "labels_path": None,
197
+ "splits_path": None,
198
+ "query_groups_path": None,
199
+ "ground_truth_path": None,
200
+ },
201
+ "environment": {"platform": "x", "python_version": "3.12", "machine": "arm64", "processor": "arm"},
202
+ "artifact_contract_version": "1.0",
203
+ }
204
+ with pytest.raises(ValueError, match="contract_error"):
205
+ validate_ingest_manifest(payload)