voyage-ledgerproof 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- voyage_ledgerproof-0.1.0/LICENSE +189 -0
- voyage_ledgerproof-0.1.0/PKG-INFO +187 -0
- voyage_ledgerproof-0.1.0/README.md +156 -0
- voyage_ledgerproof-0.1.0/ledgerproof_voyage/__init__.py +102 -0
- voyage_ledgerproof-0.1.0/ledgerproof_voyage/async_client_wrapper.py +120 -0
- voyage_ledgerproof-0.1.0/ledgerproof_voyage/canonical.py +106 -0
- voyage_ledgerproof-0.1.0/ledgerproof_voyage/client_wrapper.py +168 -0
- voyage_ledgerproof-0.1.0/ledgerproof_voyage/decorator.py +164 -0
- voyage_ledgerproof-0.1.0/ledgerproof_voyage/emitter.py +102 -0
- voyage_ledgerproof-0.1.0/ledgerproof_voyage/manual.py +411 -0
- voyage_ledgerproof-0.1.0/ledgerproof_voyage/schema.py +242 -0
- voyage_ledgerproof-0.1.0/ledgerproof_voyage/signer.py +123 -0
- voyage_ledgerproof-0.1.0/ledgerproof_voyage/version.py +1 -0
- voyage_ledgerproof-0.1.0/pyproject.toml +51 -0
- voyage_ledgerproof-0.1.0/setup.cfg +4 -0
- voyage_ledgerproof-0.1.0/tests/test_canonical.py +71 -0
- voyage_ledgerproof-0.1.0/tests/test_client_wrapper.py +221 -0
- voyage_ledgerproof-0.1.0/tests/test_emitter.py +55 -0
- voyage_ledgerproof-0.1.0/tests/test_manual.py +236 -0
- voyage_ledgerproof-0.1.0/tests/test_rag_pipeline.py +142 -0
- voyage_ledgerproof-0.1.0/tests/test_schema.py +226 -0
- voyage_ledgerproof-0.1.0/voyage_ledgerproof.egg-info/PKG-INFO +187 -0
- voyage_ledgerproof-0.1.0/voyage_ledgerproof.egg-info/SOURCES.txt +24 -0
- voyage_ledgerproof-0.1.0/voyage_ledgerproof.egg-info/dependency_links.txt +1 -0
- voyage_ledgerproof-0.1.0/voyage_ledgerproof.egg-info/requires.txt +8 -0
- voyage_ledgerproof-0.1.0/voyage_ledgerproof.egg-info/top_level.txt +1 -0
|
@@ -0,0 +1,189 @@
|
|
|
1
|
+
Apache License
|
|
2
|
+
Version 2.0, January 2004
|
|
3
|
+
http://www.apache.org/licenses/
|
|
4
|
+
|
|
5
|
+
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
|
6
|
+
|
|
7
|
+
1. Definitions.
|
|
8
|
+
|
|
9
|
+
"License" shall mean the terms and conditions for use, reproduction,
|
|
10
|
+
and distribution as defined by Sections 1 through 9 of this document.
|
|
11
|
+
|
|
12
|
+
"Licensor" shall mean the copyright owner or entity authorized by
|
|
13
|
+
the copyright owner that is granting the License.
|
|
14
|
+
|
|
15
|
+
"Legal Entity" shall mean the union of the acting entity and all
|
|
16
|
+
other entities that control, are controlled by, or are under common
|
|
17
|
+
control with that entity. For the purposes of this definition,
|
|
18
|
+
"control" means (i) the power, direct or indirect, to cause the
|
|
19
|
+
direction or management of such entity, whether by contract or
|
|
20
|
+
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
|
21
|
+
outstanding shares, or (iii) beneficial ownership of such entity.
|
|
22
|
+
|
|
23
|
+
"You" (or "Your") shall mean an individual or Legal Entity
|
|
24
|
+
exercising permissions granted by this License.
|
|
25
|
+
|
|
26
|
+
"Source" form shall mean the preferred form for making modifications,
|
|
27
|
+
including but not limited to software source code, documentation
|
|
28
|
+
source, and configuration files.
|
|
29
|
+
|
|
30
|
+
"Object" form shall mean any form resulting from mechanical
|
|
31
|
+
transformation or translation of a Source form, including but
|
|
32
|
+
not limited to compiled object code, generated documentation,
|
|
33
|
+
and conversions to other media types.
|
|
34
|
+
|
|
35
|
+
"Work" shall mean the work of authorship, whether in Source or
|
|
36
|
+
Object form, made available under the License, as indicated by a
|
|
37
|
+
copyright notice that is included in or attached to the work
|
|
38
|
+
(an example is provided in the Appendix below).
|
|
39
|
+
|
|
40
|
+
"Derivative Works" shall mean any work, whether in Source or Object
|
|
41
|
+
form, that is based on (or derived from) the Work and for which the
|
|
42
|
+
editorial revisions, annotations, elaborations, or other modifications
|
|
43
|
+
represent, as a whole, an original work of authorship. For the purposes
|
|
44
|
+
of this License, Derivative Works shall not include works that remain
|
|
45
|
+
separable from, or merely link (or bind by name) to the interfaces of,
|
|
46
|
+
the Work and Derivative Works thereof.
|
|
47
|
+
|
|
48
|
+
"Contribution" shall mean any work of authorship, including
|
|
49
|
+
the original version of the Work and any modifications or additions
|
|
50
|
+
to that Work or Derivative Works thereof, that is intentionally
|
|
51
|
+
submitted to Licensor for inclusion in the Work by the copyright owner
|
|
52
|
+
or by an individual or Legal Entity authorized to submit on behalf of
|
|
53
|
+
the copyright owner. For the purposes of this definition, "submitted"
|
|
54
|
+
means any form of electronic, verbal, or written communication sent
|
|
55
|
+
to the Licensor or its representatives, including but not limited to
|
|
56
|
+
communication on electronic mailing lists, source code control systems,
|
|
57
|
+
and issue tracking systems that are managed by, or on behalf of, the
|
|
58
|
+
Licensor for the purpose of discussing and improving the Work, but
|
|
59
|
+
excluding communication that is conspicuously marked or otherwise
|
|
60
|
+
designated in writing by the copyright owner as "Not a Contribution."
|
|
61
|
+
|
|
62
|
+
"Contributor" shall mean Licensor and any individual or Legal Entity
|
|
63
|
+
on behalf of whom a Contribution has been received by Licensor and
|
|
64
|
+
subsequently incorporated within the Work.
|
|
65
|
+
|
|
66
|
+
2. Grant of Copyright License. Subject to the terms and conditions of
|
|
67
|
+
this License, each Contributor hereby grants to You a perpetual,
|
|
68
|
+
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
|
69
|
+
copyright license to reproduce, prepare Derivative Works of,
|
|
70
|
+
publicly display, publicly perform, sublicense, and distribute the
|
|
71
|
+
Work and such Derivative Works in Source or Object form.
|
|
72
|
+
|
|
73
|
+
3. Grant of Patent License. Subject to the terms and conditions of
|
|
74
|
+
this License, each Contributor hereby grants to You a perpetual,
|
|
75
|
+
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
|
76
|
+
(except as stated in this section) patent license to make, have made,
|
|
77
|
+
use, offer to sell, sell, import, and otherwise transfer the Work,
|
|
78
|
+
where such license applies only to those patent claims licensable
|
|
79
|
+
by such Contributor that are necessarily infringed by their
|
|
80
|
+
Contribution(s) alone or by combination of their Contribution(s)
|
|
81
|
+
with the Work to which such Contribution(s) was submitted. If You
|
|
82
|
+
institute patent litigation against any entity (including a
|
|
83
|
+
cross-claim or counterclaim in a lawsuit) alleging that the Work
|
|
84
|
+
or a Contribution incorporated within the Work constitutes direct
|
|
85
|
+
or contributory patent infringement, then any patent licenses
|
|
86
|
+
granted to You under this License for that Work shall terminate
|
|
87
|
+
as of the date such litigation is filed.
|
|
88
|
+
|
|
89
|
+
4. Redistribution. You may reproduce and distribute copies of the
|
|
90
|
+
Work or Derivative Works thereof in any medium, with or without
|
|
91
|
+
modifications, and in Source or Object form, provided that You
|
|
92
|
+
meet the following conditions:
|
|
93
|
+
|
|
94
|
+
(a) You must give any other recipients of the Work or
|
|
95
|
+
Derivative Works a copy of this License; and
|
|
96
|
+
|
|
97
|
+
(b) You must cause any modified files to carry prominent notices
|
|
98
|
+
stating that You changed the files; and
|
|
99
|
+
|
|
100
|
+
(c) You must retain, in the Source form of any Derivative Works
|
|
101
|
+
that You distribute, all copyright, patent, trademark, and
|
|
102
|
+
attribution notices from the Source form of the Work,
|
|
103
|
+
excluding those notices that do not pertain to any part of
|
|
104
|
+
the Derivative Works; and
|
|
105
|
+
|
|
106
|
+
(d) If the Work includes a "NOTICE" text file as part of its
|
|
107
|
+
distribution, then any Derivative Works that You distribute must
|
|
108
|
+
include a readable copy of the attribution notices contained
|
|
109
|
+
within such NOTICE file, excluding those notices that do not
|
|
110
|
+
pertain to any part of the Derivative Works, in at least one
|
|
111
|
+
of the following places: within a NOTICE text file distributed
|
|
112
|
+
as part of the Derivative Works; within the Source form or
|
|
113
|
+
documentation, if provided along with the Derivative Works; or,
|
|
114
|
+
within a display generated by the Derivative Works, if and
|
|
115
|
+
wherever such third-party notices normally appear. The contents
|
|
116
|
+
of the NOTICE file are for informational purposes only and
|
|
117
|
+
do not modify the License. You may add Your own attribution
|
|
118
|
+
notices within Derivative Works that You distribute, alongside
|
|
119
|
+
or as an addendum to the NOTICE text from the Work, provided
|
|
120
|
+
that such additional attribution notices cannot be construed
|
|
121
|
+
as modifying the License.
|
|
122
|
+
|
|
123
|
+
You may add Your own copyright statement to Your modifications and
|
|
124
|
+
may provide additional or different license terms and conditions
|
|
125
|
+
for use, reproduction, or distribution of Your modifications, or
|
|
126
|
+
for any such Derivative Works as a whole, provided Your use,
|
|
127
|
+
reproduction, and distribution of the Work otherwise complies with
|
|
128
|
+
the conditions stated in this License.
|
|
129
|
+
|
|
130
|
+
5. Submission of Contributions. Unless You explicitly state otherwise,
|
|
131
|
+
any Contribution intentionally submitted for inclusion in the Work
|
|
132
|
+
by You to the Licensor shall be under the terms and conditions of
|
|
133
|
+
this License, without any additional terms or conditions.
|
|
134
|
+
Notwithstanding the above, nothing herein shall supersede or modify
|
|
135
|
+
the terms of any separate license agreement you may have executed
|
|
136
|
+
with Licensor regarding such Contributions.
|
|
137
|
+
|
|
138
|
+
6. Trademarks. This License does not grant permission to use the trade
|
|
139
|
+
names, trademarks, service marks, or product names of the Licensor,
|
|
140
|
+
except as required for describing the origin of the Work and
|
|
141
|
+
reproducing the content of the NOTICE file.
|
|
142
|
+
|
|
143
|
+
7. Disclaimer of Warranty. Unless required by applicable law or
|
|
144
|
+
agreed to in writing, Licensor provides the Work (and each
|
|
145
|
+
Contributor provides its Contributions) on an "AS IS" BASIS,
|
|
146
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
|
|
147
|
+
implied, including, without limitation, any warranties or conditions
|
|
148
|
+
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
|
|
149
|
+
PARTICULAR PURPOSE. You are solely responsible for determining the
|
|
150
|
+
appropriateness of using or redistributing the Work and assume any
|
|
151
|
+
risks associated with Your exercise of permissions under this License.
|
|
152
|
+
|
|
153
|
+
8. Limitation of Liability. In no event and under no legal theory,
|
|
154
|
+
whether in tort (including negligence), contract, or otherwise,
|
|
155
|
+
unless required by applicable law (such as deliberate and grossly
|
|
156
|
+
negligent acts) or agreed to in writing, shall any Contributor be
|
|
157
|
+
liable to You for damages, including any direct, indirect, special,
|
|
158
|
+
incidental, or consequential damages of any character arising as a
|
|
159
|
+
result of this License or out of the use or inability to use the
|
|
160
|
+
Work (including but not limited to damages for loss of goodwill,
|
|
161
|
+
work stoppage, computer failure or malfunction, or any and all
|
|
162
|
+
other commercial damages or losses), even if such Contributor
|
|
163
|
+
has been advised of the possibility of such damages.
|
|
164
|
+
|
|
165
|
+
9. Accepting Warranty or Additional Liability. While redistributing
|
|
166
|
+
the Work or Derivative Works thereof, You may accept warranty
|
|
167
|
+
or additional liability for, or impose any obligations consistent
|
|
168
|
+
with, this License. However, in accepting such obligations, You may
|
|
169
|
+
act only on Your own behalf and on Your sole responsibility, not on
|
|
170
|
+
behalf of any other Contributor, and only if You agree to indemnify,
|
|
171
|
+
defend, and hold each Contributor harmless for any liability
|
|
172
|
+
incurred by, or claims asserted against, such Contributor by reason
|
|
173
|
+
of your accepting any such warranty or additional liability.
|
|
174
|
+
|
|
175
|
+
END OF TERMS AND CONDITIONS
|
|
176
|
+
|
|
177
|
+
Copyright 2026 LedgerProof Foundation (in formation)
|
|
178
|
+
|
|
179
|
+
Licensed under the Apache License, Version 2.0 (the "License");
|
|
180
|
+
you may not use this file except in compliance with the License.
|
|
181
|
+
You may obtain a copy of the License at
|
|
182
|
+
|
|
183
|
+
http://www.apache.org/licenses/LICENSE-2.0
|
|
184
|
+
|
|
185
|
+
Unless required by applicable law or agreed to in writing, software
|
|
186
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
|
187
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
188
|
+
See the License for the specific language governing permissions and
|
|
189
|
+
limitations under the License.
|
|
@@ -0,0 +1,187 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: voyage-ledgerproof
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: LedgerProof adapter for the Voyage AI Python SDK. Side-channel transparency receipts for EU AI Act Article 50 RAG pipeline evidence.
|
|
5
|
+
Author: LedgerProof Foundation (in formation)
|
|
6
|
+
License: Apache-2.0
|
|
7
|
+
Project-URL: Homepage, https://ledgerproof.org
|
|
8
|
+
Project-URL: Repository, https://github.com/ledgerproof/ledgerproof
|
|
9
|
+
Project-URL: Documentation, https://docs.ledgerproof.org
|
|
10
|
+
Keywords: ledgerproof,voyage,voyageai,embeddings,rerank,rag,eu-ai-act,article-50,transparency,receipts
|
|
11
|
+
Classifier: Development Status :: 3 - Alpha
|
|
12
|
+
Classifier: Intended Audience :: Developers
|
|
13
|
+
Classifier: License :: OSI Approved :: Apache Software License
|
|
14
|
+
Classifier: Programming Language :: Python :: 3
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
18
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
19
|
+
Classifier: Topic :: Security :: Cryptography
|
|
20
|
+
Requires-Python: >=3.10
|
|
21
|
+
Description-Content-Type: text/markdown
|
|
22
|
+
License-File: LICENSE
|
|
23
|
+
Requires-Dist: voyageai>=0.3
|
|
24
|
+
Requires-Dist: cryptography>=41
|
|
25
|
+
Requires-Dist: cbor2>=5.0
|
|
26
|
+
Requires-Dist: pydantic>=2.0
|
|
27
|
+
Provides-Extra: test
|
|
28
|
+
Requires-Dist: pytest>=7.0; extra == "test"
|
|
29
|
+
Requires-Dist: pytest-asyncio>=0.21; extra == "test"
|
|
30
|
+
Dynamic: license-file
|
|
31
|
+
|
|
32
|
+
# ledgerproof-voyage
|
|
33
|
+
|
|
34
|
+
LedgerProof adapter for the [Voyage AI Python SDK](https://github.com/voyage-ai/voyageai-python) (embeddings + rerank).
|
|
35
|
+
|
|
36
|
+
Emits **side-channel cryptographic transparency receipts** that capture the upstream retrieval evidence
|
|
37
|
+
(embeddings, rerank scores) feeding a downstream chatbot, suitable as an evidence layer for the
|
|
38
|
+
**EU AI Act Article 50** RAG pipeline scenario.
|
|
39
|
+
|
|
40
|
+
This adapter is **not endorsed by Voyage AI** or MongoDB. It wraps the official `voyageai` SDK; it is not
|
|
41
|
+
affiliated with Voyage AI Inc. or MongoDB Inc.
|
|
42
|
+
|
|
43
|
+
## Why an embeddings adapter for an Article 50 protocol?
|
|
44
|
+
|
|
45
|
+
Voyage AI is **upstream RAG infrastructure**. Calls to `voyageai.Client.embed()` and
|
|
46
|
+
`voyageai.Client.rerank()` are **not themselves** direct Article 50(1) "AI system intended to interact
|
|
47
|
+
directly with natural persons" surfaces — they are retrieval-side substrate.
|
|
48
|
+
|
|
49
|
+
But they are the **evidence trail** for any RAG pipeline whose downstream chatbot output **is** under
|
|
50
|
+
Article 50. When a regulator (or an internal auditor, or a litigant) asks the load-bearing question
|
|
51
|
+
*"what sources did the model see, and how did they get there?"*, the answer is a chain:
|
|
52
|
+
|
|
53
|
+
```
|
|
54
|
+
user query
|
|
55
|
+
-> Voyage embed(query) [embedding_inference/v1]
|
|
56
|
+
-> Voyage embed(documents) [embedding_inference/v1]
|
|
57
|
+
-> ANN retrieval (vector DB) [out of scope]
|
|
58
|
+
-> Voyage rerank(query, candidates) [rerank_inference/v1]
|
|
59
|
+
-> downstream chatbot turn [bound by rag_pipeline_evidence/v1]
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
This adapter signs each link of that chain locally (constraint C4) so the full provenance is verifiable
|
|
63
|
+
offline from the stored signed receipts. The downstream chatbot turn is captured by the matching
|
|
64
|
+
LedgerProof chat adapter (`ledgerproof-openai`, `ledgerproof-anthropic`, `ledgerproof-cohere`, etc.) and
|
|
65
|
+
both are stitched together by a `rag_pipeline_evidence/v1` envelope.
|
|
66
|
+
|
|
67
|
+
This positioning matters because **most Tier-1 EU enterprise AI deployments under Article 50 enforcement
|
|
68
|
+
(2 August 2026) are RAG-shaped**, and the regulator's first incident-response question is invariably
|
|
69
|
+
"what evidence does the model have?". A signed Voyage embedding + rerank receipt is the cryptographic
|
|
70
|
+
answer.
|
|
71
|
+
|
|
72
|
+
## 5-minute quickstart
|
|
73
|
+
|
|
74
|
+
```bash
|
|
75
|
+
pip install ledgerproof-voyage
|
|
76
|
+
export VOYAGE_API_KEY=...
|
|
77
|
+
```
|
|
78
|
+
|
|
79
|
+
```python
|
|
80
|
+
from ledgerproof_voyage import LedgerProofVoyage, LogEmitter
|
|
81
|
+
|
|
82
|
+
client = LedgerProofVoyage(
|
|
83
|
+
deployer_id="acme-corp-eu",
|
|
84
|
+
emitter=LogEmitter(),
|
|
85
|
+
)
|
|
86
|
+
|
|
87
|
+
result = client.embed(
|
|
88
|
+
texts=["Article 50 transparency obligations apply from 2 August 2026."],
|
|
89
|
+
model="voyage-3-large",
|
|
90
|
+
input_type="document",
|
|
91
|
+
)
|
|
92
|
+
|
|
93
|
+
print(result.embeddings[0][:4], "...")
|
|
94
|
+
# Receipt has already been emitted to the side-channel.
|
|
95
|
+
```
|
|
96
|
+
|
|
97
|
+
The Voyage `EmbeddingsObject` is returned **unchanged**. The receipt is emitted to the side channel only
|
|
98
|
+
(C7).
|
|
99
|
+
|
|
100
|
+
## Three schemas
|
|
101
|
+
|
|
102
|
+
| Schema | Article 50 mapping | What it binds |
|
|
103
|
+
| --- | --- | --- |
|
|
104
|
+
| `embedding_inference/v1` | Supporting infrastructure | per-input SHA-256, per-vector SHA-256, vector dim, model_id, input_type |
|
|
105
|
+
| `rerank_inference/v1` | Supporting infrastructure | query SHA-256, per-candidate SHA-256, relevance scores, post-rerank order, model_id |
|
|
106
|
+
| `rag_pipeline_evidence/v1` | Article 50(1) variant | upstream embed + rerank receipt hashes -> downstream chat receipt hash + user query hash |
|
|
107
|
+
|
|
108
|
+
`embedding_inference/v1` and `rerank_inference/v1` carry
|
|
109
|
+
`regulatory_context.article_50_paragraph = "supporting"` — they are not Article 50(1) artefacts in
|
|
110
|
+
themselves. `rag_pipeline_evidence/v1` carries `"1"` because that is the regulated user-facing turn.
|
|
111
|
+
|
|
112
|
+
## Three integration patterns
|
|
113
|
+
|
|
114
|
+
1. **Client wrapper** (recommended) — `LedgerProofVoyage` wraps `voyageai.Client` and intercepts
|
|
115
|
+
`embed()` and `rerank()`. Sync and async (`LedgerProofAsyncVoyage`) supported.
|
|
116
|
+
2. **Decorator** — `@lpr_track_embed(deployer_id=..., model=...)` and
|
|
117
|
+
`@lpr_track_rerank(deployer_id=..., model=...)` for user-defined functions wrapping Voyage calls.
|
|
118
|
+
3. **Manual emission** — `emit_embedding_receipt(...)`, `emit_rerank_receipt(...)`, and
|
|
119
|
+
`emit_rag_pipeline_receipt(...)` for full control inside a custom RAG orchestration layer.
|
|
120
|
+
|
|
121
|
+
See `examples/`:
|
|
122
|
+
|
|
123
|
+
- `examples/01_voyage_embed_quickstart.py` — vanilla `embed()`
|
|
124
|
+
- `examples/02_voyage_rerank.py` — `rerank()` with relevance-score binding
|
|
125
|
+
- `examples/03_rag_pipeline_evidence.py` — full embed -> rerank -> (downstream chat stub) -> `rag_pipeline_evidence/v1`
|
|
126
|
+
|
|
127
|
+
## Architectural discipline (C1–C8)
|
|
128
|
+
|
|
129
|
+
This adapter is implemented under the LedgerProof protocol's load-bearing constraints:
|
|
130
|
+
|
|
131
|
+
- **C1**: **No claim of regulator endorsement. No claim of Article 40 presumption of conformity. Not
|
|
132
|
+
endorsed by Voyage AI Inc. or MongoDB Inc.** This adapter wraps the official Voyage Python SDK; it is
|
|
133
|
+
not affiliated with either company.
|
|
134
|
+
- **C4**: Local verification only. The adapter does **not** phone home to LedgerProof servers during
|
|
135
|
+
normal operation. Signed receipts verify offline given the public key.
|
|
136
|
+
- **C6**: Stream-aware signing primitives are exposed for downstream chat adapters. Voyage's own surfaces
|
|
137
|
+
(`embed`, `rerank`) are non-streaming today.
|
|
138
|
+
- **C7**: Side-channel emission only. The adapter **cannot and does not modify** the Voyage response
|
|
139
|
+
payload. The `EmbeddingsObject` / `RerankingObject` is returned unchanged.
|
|
140
|
+
|
|
141
|
+
## GDPR posture
|
|
142
|
+
|
|
143
|
+
Receipts reference content by SHA-256 hash only. The adapter **does not store**:
|
|
144
|
+
|
|
145
|
+
- raw input texts to `embed()`
|
|
146
|
+
- raw document text fed to `rerank()`
|
|
147
|
+
- raw embedding vectors (the vector is canonicalized to big-endian float64 and hashed; the hash is
|
|
148
|
+
stored, not the vector)
|
|
149
|
+
- raw user queries fed into the downstream chatbot
|
|
150
|
+
|
|
151
|
+
Identifiers (`deployer_id`, `document_id`, `tool_call_id`) are length-bounded and character-set-
|
|
152
|
+
restricted (no free-form PII). The deployer is responsible for ensuring the inputs they feed in
|
|
153
|
+
(e.g. `document_id` values) do not themselves leak PII.
|
|
154
|
+
|
|
155
|
+
## Vector canonicalization
|
|
156
|
+
|
|
157
|
+
Embedding vectors are canonicalized to **IEEE-754 big-endian float64** byte strings before hashing. This
|
|
158
|
+
is portable across machines, Python versions, and operating systems. A verifier with the original vector
|
|
159
|
+
can recompute `SHA-256(canonicalize_vector(vector))` and compare to `vector_sha256_hex` in the receipt.
|
|
160
|
+
|
|
161
|
+
Voyage exposes `output_dtype=` for higher-precision dtypes (`int8`, `uint8`, `binary`, `ubinary`); the
|
|
162
|
+
adapter records the requested `output_dtype` on `VoyageModelRef.output_dtype` for transparency, but
|
|
163
|
+
always canonicalizes through float64 for one stable wire format.
|
|
164
|
+
|
|
165
|
+
## Scope disclaimer
|
|
166
|
+
|
|
167
|
+
LedgerProof provides an **evidence layer for Article 50 transparency obligations only**.
|
|
168
|
+
|
|
169
|
+
It does **not** cover:
|
|
170
|
+
|
|
171
|
+
- Article 9 (risk management system)
|
|
172
|
+
- Article 10 (data and data governance)
|
|
173
|
+
- Article 13 (transparency and information to deployers)
|
|
174
|
+
- Article 15 (accuracy, robustness, and cybersecurity)
|
|
175
|
+
- Article 72 (post-market monitoring)
|
|
176
|
+
|
|
177
|
+
LedgerProof does **not** confer presumption of conformity under Article 40. LedgerProof is not endorsed
|
|
178
|
+
by the European Commission, the AI Office, any national competent authority, **Voyage AI Inc., or
|
|
179
|
+
MongoDB Inc.**
|
|
180
|
+
|
|
181
|
+
This adapter wraps the official `voyageai` Python SDK; it is not affiliated with Voyage AI Inc. or
|
|
182
|
+
MongoDB Inc.
|
|
183
|
+
|
|
184
|
+
## License
|
|
185
|
+
|
|
186
|
+
Apache 2.0. Copyright 2026 LedgerProof Foundation (in formation: US 501(c)(3) Delaware + Dutch Stichting
|
|
187
|
+
EU subsidiary).
|
|
@@ -0,0 +1,156 @@
|
|
|
1
|
+
# ledgerproof-voyage
|
|
2
|
+
|
|
3
|
+
LedgerProof adapter for the [Voyage AI Python SDK](https://github.com/voyage-ai/voyageai-python) (embeddings + rerank).
|
|
4
|
+
|
|
5
|
+
Emits **side-channel cryptographic transparency receipts** that capture the upstream retrieval evidence
|
|
6
|
+
(embeddings, rerank scores) feeding a downstream chatbot, suitable as an evidence layer for the
|
|
7
|
+
**EU AI Act Article 50** RAG pipeline scenario.
|
|
8
|
+
|
|
9
|
+
This adapter is **not endorsed by Voyage AI** or MongoDB. It wraps the official `voyageai` SDK; it is not
|
|
10
|
+
affiliated with Voyage AI Inc. or MongoDB Inc.
|
|
11
|
+
|
|
12
|
+
## Why an embeddings adapter for an Article 50 protocol?
|
|
13
|
+
|
|
14
|
+
Voyage AI is **upstream RAG infrastructure**. Calls to `voyageai.Client.embed()` and
|
|
15
|
+
`voyageai.Client.rerank()` are **not themselves** direct Article 50(1) "AI system intended to interact
|
|
16
|
+
directly with natural persons" surfaces — they are retrieval-side substrate.
|
|
17
|
+
|
|
18
|
+
But they are the **evidence trail** for any RAG pipeline whose downstream chatbot output **is** under
|
|
19
|
+
Article 50. When a regulator (or an internal auditor, or a litigant) asks the load-bearing question
|
|
20
|
+
*"what sources did the model see, and how did they get there?"*, the answer is a chain:
|
|
21
|
+
|
|
22
|
+
```
|
|
23
|
+
user query
|
|
24
|
+
-> Voyage embed(query) [embedding_inference/v1]
|
|
25
|
+
-> Voyage embed(documents) [embedding_inference/v1]
|
|
26
|
+
-> ANN retrieval (vector DB) [out of scope]
|
|
27
|
+
-> Voyage rerank(query, candidates) [rerank_inference/v1]
|
|
28
|
+
-> downstream chatbot turn [bound by rag_pipeline_evidence/v1]
|
|
29
|
+
```
|
|
30
|
+
|
|
31
|
+
This adapter signs each link of that chain locally (constraint C4) so the full provenance is verifiable
|
|
32
|
+
offline from the stored signed receipts. The downstream chatbot turn is captured by the matching
|
|
33
|
+
LedgerProof chat adapter (`ledgerproof-openai`, `ledgerproof-anthropic`, `ledgerproof-cohere`, etc.) and
|
|
34
|
+
both are stitched together by a `rag_pipeline_evidence/v1` envelope.
|
|
35
|
+
|
|
36
|
+
This positioning matters because **most Tier-1 EU enterprise AI deployments under Article 50 enforcement
|
|
37
|
+
(2 August 2026) are RAG-shaped**, and the regulator's first incident-response question is invariably
|
|
38
|
+
"what evidence does the model have?". A signed Voyage embedding + rerank receipt is the cryptographic
|
|
39
|
+
answer.
|
|
40
|
+
|
|
41
|
+
## 5-minute quickstart
|
|
42
|
+
|
|
43
|
+
```bash
|
|
44
|
+
pip install ledgerproof-voyage
|
|
45
|
+
export VOYAGE_API_KEY=...
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
```python
|
|
49
|
+
from ledgerproof_voyage import LedgerProofVoyage, LogEmitter
|
|
50
|
+
|
|
51
|
+
client = LedgerProofVoyage(
|
|
52
|
+
deployer_id="acme-corp-eu",
|
|
53
|
+
emitter=LogEmitter(),
|
|
54
|
+
)
|
|
55
|
+
|
|
56
|
+
result = client.embed(
|
|
57
|
+
texts=["Article 50 transparency obligations apply from 2 August 2026."],
|
|
58
|
+
model="voyage-3-large",
|
|
59
|
+
input_type="document",
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
print(result.embeddings[0][:4], "...")
|
|
63
|
+
# Receipt has already been emitted to the side-channel.
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
The Voyage `EmbeddingsObject` is returned **unchanged**. The receipt is emitted to the side channel only
|
|
67
|
+
(C7).
|
|
68
|
+
|
|
69
|
+
## Three schemas
|
|
70
|
+
|
|
71
|
+
| Schema | Article 50 mapping | What it binds |
|
|
72
|
+
| --- | --- | --- |
|
|
73
|
+
| `embedding_inference/v1` | Supporting infrastructure | per-input SHA-256, per-vector SHA-256, vector dim, model_id, input_type |
|
|
74
|
+
| `rerank_inference/v1` | Supporting infrastructure | query SHA-256, per-candidate SHA-256, relevance scores, post-rerank order, model_id |
|
|
75
|
+
| `rag_pipeline_evidence/v1` | Article 50(1) variant | upstream embed + rerank receipt hashes -> downstream chat receipt hash + user query hash |
|
|
76
|
+
|
|
77
|
+
`embedding_inference/v1` and `rerank_inference/v1` carry
|
|
78
|
+
`regulatory_context.article_50_paragraph = "supporting"` — they are not Article 50(1) artefacts in
|
|
79
|
+
themselves. `rag_pipeline_evidence/v1` carries `"1"` because that is the regulated user-facing turn.
|
|
80
|
+
|
|
81
|
+
## Three integration patterns
|
|
82
|
+
|
|
83
|
+
1. **Client wrapper** (recommended) — `LedgerProofVoyage` wraps `voyageai.Client` and intercepts
|
|
84
|
+
`embed()` and `rerank()`. Sync and async (`LedgerProofAsyncVoyage`) supported.
|
|
85
|
+
2. **Decorator** — `@lpr_track_embed(deployer_id=..., model=...)` and
|
|
86
|
+
`@lpr_track_rerank(deployer_id=..., model=...)` for user-defined functions wrapping Voyage calls.
|
|
87
|
+
3. **Manual emission** — `emit_embedding_receipt(...)`, `emit_rerank_receipt(...)`, and
|
|
88
|
+
`emit_rag_pipeline_receipt(...)` for full control inside a custom RAG orchestration layer.
|
|
89
|
+
|
|
90
|
+
See `examples/`:
|
|
91
|
+
|
|
92
|
+
- `examples/01_voyage_embed_quickstart.py` — vanilla `embed()`
|
|
93
|
+
- `examples/02_voyage_rerank.py` — `rerank()` with relevance-score binding
|
|
94
|
+
- `examples/03_rag_pipeline_evidence.py` — full embed -> rerank -> (downstream chat stub) -> `rag_pipeline_evidence/v1`
|
|
95
|
+
|
|
96
|
+
## Architectural discipline (C1–C8)
|
|
97
|
+
|
|
98
|
+
This adapter is implemented under the LedgerProof protocol's load-bearing constraints:
|
|
99
|
+
|
|
100
|
+
- **C1**: **No claim of regulator endorsement. No claim of Article 40 presumption of conformity. Not
|
|
101
|
+
endorsed by Voyage AI Inc. or MongoDB Inc.** This adapter wraps the official Voyage Python SDK; it is
|
|
102
|
+
not affiliated with either company.
|
|
103
|
+
- **C4**: Local verification only. The adapter does **not** phone home to LedgerProof servers during
|
|
104
|
+
normal operation. Signed receipts verify offline given the public key.
|
|
105
|
+
- **C6**: Stream-aware signing primitives are exposed for downstream chat adapters. Voyage's own surfaces
|
|
106
|
+
(`embed`, `rerank`) are non-streaming today.
|
|
107
|
+
- **C7**: Side-channel emission only. The adapter **cannot and does not modify** the Voyage response
|
|
108
|
+
payload. The `EmbeddingsObject` / `RerankingObject` is returned unchanged.
|
|
109
|
+
|
|
110
|
+
## GDPR posture
|
|
111
|
+
|
|
112
|
+
Receipts reference content by SHA-256 hash only. The adapter **does not store**:
|
|
113
|
+
|
|
114
|
+
- raw input texts to `embed()`
|
|
115
|
+
- raw document text fed to `rerank()`
|
|
116
|
+
- raw embedding vectors (the vector is canonicalized to big-endian float64 and hashed; the hash is
|
|
117
|
+
stored, not the vector)
|
|
118
|
+
- raw user queries fed into the downstream chatbot
|
|
119
|
+
|
|
120
|
+
Identifiers (`deployer_id`, `document_id`, `tool_call_id`) are length-bounded and character-set-
|
|
121
|
+
restricted (no free-form PII). The deployer is responsible for ensuring the inputs they feed in
|
|
122
|
+
(e.g. `document_id` values) do not themselves leak PII.
|
|
123
|
+
|
|
124
|
+
## Vector canonicalization
|
|
125
|
+
|
|
126
|
+
Embedding vectors are canonicalized to **IEEE-754 big-endian float64** byte strings before hashing. This
|
|
127
|
+
is portable across machines, Python versions, and operating systems. A verifier with the original vector
|
|
128
|
+
can recompute `SHA-256(canonicalize_vector(vector))` and compare to `vector_sha256_hex` in the receipt.
|
|
129
|
+
|
|
130
|
+
Voyage exposes `output_dtype=` for higher-precision dtypes (`int8`, `uint8`, `binary`, `ubinary`); the
|
|
131
|
+
adapter records the requested `output_dtype` on `VoyageModelRef.output_dtype` for transparency, but
|
|
132
|
+
always canonicalizes through float64 for one stable wire format.
|
|
133
|
+
|
|
134
|
+
## Scope disclaimer
|
|
135
|
+
|
|
136
|
+
LedgerProof provides an **evidence layer for Article 50 transparency obligations only**.
|
|
137
|
+
|
|
138
|
+
It does **not** cover:
|
|
139
|
+
|
|
140
|
+
- Article 9 (risk management system)
|
|
141
|
+
- Article 10 (data and data governance)
|
|
142
|
+
- Article 13 (transparency and information to deployers)
|
|
143
|
+
- Article 15 (accuracy, robustness, and cybersecurity)
|
|
144
|
+
- Article 72 (post-market monitoring)
|
|
145
|
+
|
|
146
|
+
LedgerProof does **not** confer presumption of conformity under Article 40. LedgerProof is not endorsed
|
|
147
|
+
by the European Commission, the AI Office, any national competent authority, **Voyage AI Inc., or
|
|
148
|
+
MongoDB Inc.**
|
|
149
|
+
|
|
150
|
+
This adapter wraps the official `voyageai` Python SDK; it is not affiliated with Voyage AI Inc. or
|
|
151
|
+
MongoDB Inc.
|
|
152
|
+
|
|
153
|
+
## License
|
|
154
|
+
|
|
155
|
+
Apache 2.0. Copyright 2026 LedgerProof Foundation (in formation: US 501(c)(3) Delaware + Dutch Stichting
|
|
156
|
+
EU subsidiary).
|
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
"""
|
|
2
|
+
LedgerProof adapter for the Voyage AI Python SDK (embeddings + rerank).
|
|
3
|
+
|
|
4
|
+
Side-channel cryptographic transparency receipts for EU AI Act Article 50 RAG
|
|
5
|
+
pipeline evidence. Voyage AI is upstream RAG infrastructure; this adapter binds
|
|
6
|
+
embedding + rerank evidence to a downstream chatbot turn so the full
|
|
7
|
+
retrieval -> generation chain is cryptographically verifiable.
|
|
8
|
+
|
|
9
|
+
Discipline:
|
|
10
|
+
- C1: No regulator endorsement. No Article 40 presumption of conformity.
|
|
11
|
+
Not endorsed by Voyage AI / MongoDB.
|
|
12
|
+
- C4: Offline verification only; no phone-home.
|
|
13
|
+
- C6: Stream-aware SHA-256 primitives available for downstream chat receipts.
|
|
14
|
+
- C7: Side-channel emission only; never modifies the Voyage response.
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
from .async_client_wrapper import LedgerProofAsyncVoyage
|
|
18
|
+
from .client_wrapper import LedgerProofVoyage
|
|
19
|
+
from .decorator import lpr_track_embed, lpr_track_rerank
|
|
20
|
+
from .emitter import (
|
|
21
|
+
Emitter,
|
|
22
|
+
LogEmitter,
|
|
23
|
+
MultiEmitter,
|
|
24
|
+
QueueEmitter,
|
|
25
|
+
StderrEmitter,
|
|
26
|
+
WebhookEmitter,
|
|
27
|
+
)
|
|
28
|
+
from .manual import (
|
|
29
|
+
build_embedding_refs,
|
|
30
|
+
build_rerank_result_refs,
|
|
31
|
+
build_voyage_model_ref,
|
|
32
|
+
emit_embedding_receipt,
|
|
33
|
+
emit_rag_pipeline_receipt,
|
|
34
|
+
emit_rerank_receipt,
|
|
35
|
+
extract_embeddings,
|
|
36
|
+
extract_rerank_results,
|
|
37
|
+
extract_total_tokens,
|
|
38
|
+
)
|
|
39
|
+
from .schema import (
|
|
40
|
+
DownstreamChatRef,
|
|
41
|
+
EmbeddingRef,
|
|
42
|
+
ReceiptV1,
|
|
43
|
+
RegulatoryContext,
|
|
44
|
+
RerankResultRef,
|
|
45
|
+
UpstreamReceiptRef,
|
|
46
|
+
VoyageModelRef,
|
|
47
|
+
build_embedding_inference_receipt,
|
|
48
|
+
build_rag_pipeline_evidence_receipt,
|
|
49
|
+
build_rerank_inference_receipt,
|
|
50
|
+
)
|
|
51
|
+
from .signer import (
|
|
52
|
+
AwsKmsEd25519Signer,
|
|
53
|
+
Ed25519Signer,
|
|
54
|
+
GcpKmsEd25519Signer,
|
|
55
|
+
Signer,
|
|
56
|
+
verify,
|
|
57
|
+
)
|
|
58
|
+
from .version import __version__
|
|
59
|
+
|
|
60
|
+
__all__ = [
|
|
61
|
+
"__version__",
|
|
62
|
+
# client wrappers
|
|
63
|
+
"LedgerProofVoyage",
|
|
64
|
+
"LedgerProofAsyncVoyage",
|
|
65
|
+
# decorators
|
|
66
|
+
"lpr_track_embed",
|
|
67
|
+
"lpr_track_rerank",
|
|
68
|
+
# manual
|
|
69
|
+
"emit_embedding_receipt",
|
|
70
|
+
"emit_rerank_receipt",
|
|
71
|
+
"emit_rag_pipeline_receipt",
|
|
72
|
+
"build_embedding_refs",
|
|
73
|
+
"build_rerank_result_refs",
|
|
74
|
+
"build_voyage_model_ref",
|
|
75
|
+
"extract_embeddings",
|
|
76
|
+
"extract_rerank_results",
|
|
77
|
+
"extract_total_tokens",
|
|
78
|
+
# schemas
|
|
79
|
+
"EmbeddingRef",
|
|
80
|
+
"RerankResultRef",
|
|
81
|
+
"UpstreamReceiptRef",
|
|
82
|
+
"DownstreamChatRef",
|
|
83
|
+
"VoyageModelRef",
|
|
84
|
+
"ReceiptV1",
|
|
85
|
+
"RegulatoryContext",
|
|
86
|
+
"build_embedding_inference_receipt",
|
|
87
|
+
"build_rerank_inference_receipt",
|
|
88
|
+
"build_rag_pipeline_evidence_receipt",
|
|
89
|
+
# emitters
|
|
90
|
+
"Emitter",
|
|
91
|
+
"LogEmitter",
|
|
92
|
+
"StderrEmitter",
|
|
93
|
+
"WebhookEmitter",
|
|
94
|
+
"QueueEmitter",
|
|
95
|
+
"MultiEmitter",
|
|
96
|
+
# signers
|
|
97
|
+
"Signer",
|
|
98
|
+
"Ed25519Signer",
|
|
99
|
+
"AwsKmsEd25519Signer",
|
|
100
|
+
"GcpKmsEd25519Signer",
|
|
101
|
+
"verify",
|
|
102
|
+
]
|