voyage-ledgerproof 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (26) hide show
  1. voyage_ledgerproof-0.1.0/LICENSE +189 -0
  2. voyage_ledgerproof-0.1.0/PKG-INFO +187 -0
  3. voyage_ledgerproof-0.1.0/README.md +156 -0
  4. voyage_ledgerproof-0.1.0/ledgerproof_voyage/__init__.py +102 -0
  5. voyage_ledgerproof-0.1.0/ledgerproof_voyage/async_client_wrapper.py +120 -0
  6. voyage_ledgerproof-0.1.0/ledgerproof_voyage/canonical.py +106 -0
  7. voyage_ledgerproof-0.1.0/ledgerproof_voyage/client_wrapper.py +168 -0
  8. voyage_ledgerproof-0.1.0/ledgerproof_voyage/decorator.py +164 -0
  9. voyage_ledgerproof-0.1.0/ledgerproof_voyage/emitter.py +102 -0
  10. voyage_ledgerproof-0.1.0/ledgerproof_voyage/manual.py +411 -0
  11. voyage_ledgerproof-0.1.0/ledgerproof_voyage/schema.py +242 -0
  12. voyage_ledgerproof-0.1.0/ledgerproof_voyage/signer.py +123 -0
  13. voyage_ledgerproof-0.1.0/ledgerproof_voyage/version.py +1 -0
  14. voyage_ledgerproof-0.1.0/pyproject.toml +51 -0
  15. voyage_ledgerproof-0.1.0/setup.cfg +4 -0
  16. voyage_ledgerproof-0.1.0/tests/test_canonical.py +71 -0
  17. voyage_ledgerproof-0.1.0/tests/test_client_wrapper.py +221 -0
  18. voyage_ledgerproof-0.1.0/tests/test_emitter.py +55 -0
  19. voyage_ledgerproof-0.1.0/tests/test_manual.py +236 -0
  20. voyage_ledgerproof-0.1.0/tests/test_rag_pipeline.py +142 -0
  21. voyage_ledgerproof-0.1.0/tests/test_schema.py +226 -0
  22. voyage_ledgerproof-0.1.0/voyage_ledgerproof.egg-info/PKG-INFO +187 -0
  23. voyage_ledgerproof-0.1.0/voyage_ledgerproof.egg-info/SOURCES.txt +24 -0
  24. voyage_ledgerproof-0.1.0/voyage_ledgerproof.egg-info/dependency_links.txt +1 -0
  25. voyage_ledgerproof-0.1.0/voyage_ledgerproof.egg-info/requires.txt +8 -0
  26. voyage_ledgerproof-0.1.0/voyage_ledgerproof.egg-info/top_level.txt +1 -0
@@ -0,0 +1,189 @@
1
+ Apache License
2
+ Version 2.0, January 2004
3
+ http://www.apache.org/licenses/
4
+
5
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6
+
7
+ 1. Definitions.
8
+
9
+ "License" shall mean the terms and conditions for use, reproduction,
10
+ and distribution as defined by Sections 1 through 9 of this document.
11
+
12
+ "Licensor" shall mean the copyright owner or entity authorized by
13
+ the copyright owner that is granting the License.
14
+
15
+ "Legal Entity" shall mean the union of the acting entity and all
16
+ other entities that control, are controlled by, or are under common
17
+ control with that entity. For the purposes of this definition,
18
+ "control" means (i) the power, direct or indirect, to cause the
19
+ direction or management of such entity, whether by contract or
20
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
21
+ outstanding shares, or (iii) beneficial ownership of such entity.
22
+
23
+ "You" (or "Your") shall mean an individual or Legal Entity
24
+ exercising permissions granted by this License.
25
+
26
+ "Source" form shall mean the preferred form for making modifications,
27
+ including but not limited to software source code, documentation
28
+ source, and configuration files.
29
+
30
+ "Object" form shall mean any form resulting from mechanical
31
+ transformation or translation of a Source form, including but
32
+ not limited to compiled object code, generated documentation,
33
+ and conversions to other media types.
34
+
35
+ "Work" shall mean the work of authorship, whether in Source or
36
+ Object form, made available under the License, as indicated by a
37
+ copyright notice that is included in or attached to the work
38
+ (an example is provided in the Appendix below).
39
+
40
+ "Derivative Works" shall mean any work, whether in Source or Object
41
+ form, that is based on (or derived from) the Work and for which the
42
+ editorial revisions, annotations, elaborations, or other modifications
43
+ represent, as a whole, an original work of authorship. For the purposes
44
+ of this License, Derivative Works shall not include works that remain
45
+ separable from, or merely link (or bind by name) to the interfaces of,
46
+ the Work and Derivative Works thereof.
47
+
48
+ "Contribution" shall mean any work of authorship, including
49
+ the original version of the Work and any modifications or additions
50
+ to that Work or Derivative Works thereof, that is intentionally
51
+ submitted to Licensor for inclusion in the Work by the copyright owner
52
+ or by an individual or Legal Entity authorized to submit on behalf of
53
+ the copyright owner. For the purposes of this definition, "submitted"
54
+ means any form of electronic, verbal, or written communication sent
55
+ to the Licensor or its representatives, including but not limited to
56
+ communication on electronic mailing lists, source code control systems,
57
+ and issue tracking systems that are managed by, or on behalf of, the
58
+ Licensor for the purpose of discussing and improving the Work, but
59
+ excluding communication that is conspicuously marked or otherwise
60
+ designated in writing by the copyright owner as "Not a Contribution."
61
+
62
+ "Contributor" shall mean Licensor and any individual or Legal Entity
63
+ on behalf of whom a Contribution has been received by Licensor and
64
+ subsequently incorporated within the Work.
65
+
66
+ 2. Grant of Copyright License. Subject to the terms and conditions of
67
+ this License, each Contributor hereby grants to You a perpetual,
68
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69
+ copyright license to reproduce, prepare Derivative Works of,
70
+ publicly display, publicly perform, sublicense, and distribute the
71
+ Work and such Derivative Works in Source or Object form.
72
+
73
+ 3. Grant of Patent License. Subject to the terms and conditions of
74
+ this License, each Contributor hereby grants to You a perpetual,
75
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76
+ (except as stated in this section) patent license to make, have made,
77
+ use, offer to sell, sell, import, and otherwise transfer the Work,
78
+ where such license applies only to those patent claims licensable
79
+ by such Contributor that are necessarily infringed by their
80
+ Contribution(s) alone or by combination of their Contribution(s)
81
+ with the Work to which such Contribution(s) was submitted. If You
82
+ institute patent litigation against any entity (including a
83
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
84
+ or a Contribution incorporated within the Work constitutes direct
85
+ or contributory patent infringement, then any patent licenses
86
+ granted to You under this License for that Work shall terminate
87
+ as of the date such litigation is filed.
88
+
89
+ 4. Redistribution. You may reproduce and distribute copies of the
90
+ Work or Derivative Works thereof in any medium, with or without
91
+ modifications, and in Source or Object form, provided that You
92
+ meet the following conditions:
93
+
94
+ (a) You must give any other recipients of the Work or
95
+ Derivative Works a copy of this License; and
96
+
97
+ (b) You must cause any modified files to carry prominent notices
98
+ stating that You changed the files; and
99
+
100
+ (c) You must retain, in the Source form of any Derivative Works
101
+ that You distribute, all copyright, patent, trademark, and
102
+ attribution notices from the Source form of the Work,
103
+ excluding those notices that do not pertain to any part of
104
+ the Derivative Works; and
105
+
106
+ (d) If the Work includes a "NOTICE" text file as part of its
107
+ distribution, then any Derivative Works that You distribute must
108
+ include a readable copy of the attribution notices contained
109
+ within such NOTICE file, excluding those notices that do not
110
+ pertain to any part of the Derivative Works, in at least one
111
+ of the following places: within a NOTICE text file distributed
112
+ as part of the Derivative Works; within the Source form or
113
+ documentation, if provided along with the Derivative Works; or,
114
+ within a display generated by the Derivative Works, if and
115
+ wherever such third-party notices normally appear. The contents
116
+ of the NOTICE file are for informational purposes only and
117
+ do not modify the License. You may add Your own attribution
118
+ notices within Derivative Works that You distribute, alongside
119
+ or as an addendum to the NOTICE text from the Work, provided
120
+ that such additional attribution notices cannot be construed
121
+ as modifying the License.
122
+
123
+ You may add Your own copyright statement to Your modifications and
124
+ may provide additional or different license terms and conditions
125
+ for use, reproduction, or distribution of Your modifications, or
126
+ for any such Derivative Works as a whole, provided Your use,
127
+ reproduction, and distribution of the Work otherwise complies with
128
+ the conditions stated in this License.
129
+
130
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
131
+ any Contribution intentionally submitted for inclusion in the Work
132
+ by You to the Licensor shall be under the terms and conditions of
133
+ this License, without any additional terms or conditions.
134
+ Notwithstanding the above, nothing herein shall supersede or modify
135
+ the terms of any separate license agreement you may have executed
136
+ with Licensor regarding such Contributions.
137
+
138
+ 6. Trademarks. This License does not grant permission to use the trade
139
+ names, trademarks, service marks, or product names of the Licensor,
140
+ except as required for describing the origin of the Work and
141
+ reproducing the content of the NOTICE file.
142
+
143
+ 7. Disclaimer of Warranty. Unless required by applicable law or
144
+ agreed to in writing, Licensor provides the Work (and each
145
+ Contributor provides its Contributions) on an "AS IS" BASIS,
146
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147
+ implied, including, without limitation, any warranties or conditions
148
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149
+ PARTICULAR PURPOSE. You are solely responsible for determining the
150
+ appropriateness of using or redistributing the Work and assume any
151
+ risks associated with Your exercise of permissions under this License.
152
+
153
+ 8. Limitation of Liability. In no event and under no legal theory,
154
+ whether in tort (including negligence), contract, or otherwise,
155
+ unless required by applicable law (such as deliberate and grossly
156
+ negligent acts) or agreed to in writing, shall any Contributor be
157
+ liable to You for damages, including any direct, indirect, special,
158
+ incidental, or consequential damages of any character arising as a
159
+ result of this License or out of the use or inability to use the
160
+ Work (including but not limited to damages for loss of goodwill,
161
+ work stoppage, computer failure or malfunction, or any and all
162
+ other commercial damages or losses), even if such Contributor
163
+ has been advised of the possibility of such damages.
164
+
165
+ 9. Accepting Warranty or Additional Liability. While redistributing
166
+ the Work or Derivative Works thereof, You may accept warranty
167
+ or additional liability for, or impose any obligations consistent
168
+ with, this License. However, in accepting such obligations, You may
169
+ act only on Your own behalf and on Your sole responsibility, not on
170
+ behalf of any other Contributor, and only if You agree to indemnify,
171
+ defend, and hold each Contributor harmless for any liability
172
+ incurred by, or claims asserted against, such Contributor by reason
173
+ of your accepting any such warranty or additional liability.
174
+
175
+ END OF TERMS AND CONDITIONS
176
+
177
+ Copyright 2026 LedgerProof Foundation (in formation)
178
+
179
+ Licensed under the Apache License, Version 2.0 (the "License");
180
+ you may not use this file except in compliance with the License.
181
+ You may obtain a copy of the License at
182
+
183
+ http://www.apache.org/licenses/LICENSE-2.0
184
+
185
+ Unless required by applicable law or agreed to in writing, software
186
+ distributed under the License is distributed on an "AS IS" BASIS,
187
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
188
+ See the License for the specific language governing permissions and
189
+ limitations under the License.
@@ -0,0 +1,187 @@
1
+ Metadata-Version: 2.4
2
+ Name: voyage-ledgerproof
3
+ Version: 0.1.0
4
+ Summary: LedgerProof adapter for the Voyage AI Python SDK. Side-channel transparency receipts for EU AI Act Article 50 RAG pipeline evidence.
5
+ Author: LedgerProof Foundation (in formation)
6
+ License: Apache-2.0
7
+ Project-URL: Homepage, https://ledgerproof.org
8
+ Project-URL: Repository, https://github.com/ledgerproof/ledgerproof
9
+ Project-URL: Documentation, https://docs.ledgerproof.org
10
+ Keywords: ledgerproof,voyage,voyageai,embeddings,rerank,rag,eu-ai-act,article-50,transparency,receipts
11
+ Classifier: Development Status :: 3 - Alpha
12
+ Classifier: Intended Audience :: Developers
13
+ Classifier: License :: OSI Approved :: Apache Software License
14
+ Classifier: Programming Language :: Python :: 3
15
+ Classifier: Programming Language :: Python :: 3.10
16
+ Classifier: Programming Language :: Python :: 3.11
17
+ Classifier: Programming Language :: Python :: 3.12
18
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
19
+ Classifier: Topic :: Security :: Cryptography
20
+ Requires-Python: >=3.10
21
+ Description-Content-Type: text/markdown
22
+ License-File: LICENSE
23
+ Requires-Dist: voyageai>=0.3
24
+ Requires-Dist: cryptography>=41
25
+ Requires-Dist: cbor2>=5.0
26
+ Requires-Dist: pydantic>=2.0
27
+ Provides-Extra: test
28
+ Requires-Dist: pytest>=7.0; extra == "test"
29
+ Requires-Dist: pytest-asyncio>=0.21; extra == "test"
30
+ Dynamic: license-file
31
+
32
+ # ledgerproof-voyage
33
+
34
+ LedgerProof adapter for the [Voyage AI Python SDK](https://github.com/voyage-ai/voyageai-python) (embeddings + rerank).
35
+
36
+ Emits **side-channel cryptographic transparency receipts** that capture the upstream retrieval evidence
37
+ (embeddings, rerank scores) feeding a downstream chatbot, suitable as an evidence layer for the
38
+ **EU AI Act Article 50** RAG pipeline scenario.
39
+
40
+ This adapter is **not endorsed by Voyage AI** or MongoDB. It wraps the official `voyageai` SDK; it is not
41
+ affiliated with Voyage AI Inc. or MongoDB Inc.
42
+
43
+ ## Why an embeddings adapter for an Article 50 protocol?
44
+
45
+ Voyage AI is **upstream RAG infrastructure**. Calls to `voyageai.Client.embed()` and
46
+ `voyageai.Client.rerank()` are **not themselves** direct Article 50(1) "AI system intended to interact
47
+ directly with natural persons" surfaces — they are retrieval-side substrate.
48
+
49
+ But they are the **evidence trail** for any RAG pipeline whose downstream chatbot output **is** under
50
+ Article 50. When a regulator (or an internal auditor, or a litigant) asks the load-bearing question
51
+ *"what sources did the model see, and how did they get there?"*, the answer is a chain:
52
+
53
+ ```
54
+ user query
55
+ -> Voyage embed(query) [embedding_inference/v1]
56
+ -> Voyage embed(documents) [embedding_inference/v1]
57
+ -> ANN retrieval (vector DB) [out of scope]
58
+ -> Voyage rerank(query, candidates) [rerank_inference/v1]
59
+ -> downstream chatbot turn [bound by rag_pipeline_evidence/v1]
60
+ ```
61
+
62
+ This adapter signs each link of that chain locally (constraint C4) so the full provenance is verifiable
63
+ offline from the stored signed receipts. The downstream chatbot turn is captured by the matching
64
+ LedgerProof chat adapter (`ledgerproof-openai`, `ledgerproof-anthropic`, `ledgerproof-cohere`, etc.) and
65
+ both are stitched together by a `rag_pipeline_evidence/v1` envelope.
66
+
67
+ This positioning matters because **most Tier-1 EU enterprise AI deployments under Article 50 enforcement
68
+ (2 August 2026) are RAG-shaped**, and the regulator's first incident-response question is invariably
69
+ "what evidence does the model have?". A signed Voyage embedding + rerank receipt is the cryptographic
70
+ answer.
71
+
72
+ ## 5-minute quickstart
73
+
74
+ ```bash
75
+ pip install ledgerproof-voyage
76
+ export VOYAGE_API_KEY=...
77
+ ```
78
+
79
+ ```python
80
+ from ledgerproof_voyage import LedgerProofVoyage, LogEmitter
81
+
82
+ client = LedgerProofVoyage(
83
+ deployer_id="acme-corp-eu",
84
+ emitter=LogEmitter(),
85
+ )
86
+
87
+ result = client.embed(
88
+ texts=["Article 50 transparency obligations apply from 2 August 2026."],
89
+ model="voyage-3-large",
90
+ input_type="document",
91
+ )
92
+
93
+ print(result.embeddings[0][:4], "...")
94
+ # Receipt has already been emitted to the side-channel.
95
+ ```
96
+
97
+ The Voyage `EmbeddingsObject` is returned **unchanged**. The receipt is emitted to the side channel only
98
+ (C7).
99
+
100
+ ## Three schemas
101
+
102
+ | Schema | Article 50 mapping | What it binds |
103
+ | --- | --- | --- |
104
+ | `embedding_inference/v1` | Supporting infrastructure | per-input SHA-256, per-vector SHA-256, vector dim, model_id, input_type |
105
+ | `rerank_inference/v1` | Supporting infrastructure | query SHA-256, per-candidate SHA-256, relevance scores, post-rerank order, model_id |
106
+ | `rag_pipeline_evidence/v1` | Article 50(1) variant | upstream embed + rerank receipt hashes -> downstream chat receipt hash + user query hash |
107
+
108
+ `embedding_inference/v1` and `rerank_inference/v1` carry
109
+ `regulatory_context.article_50_paragraph = "supporting"` — they are not Article 50(1) artefacts in
110
+ themselves. `rag_pipeline_evidence/v1` carries `"1"` because that is the regulated user-facing turn.
111
+
112
+ ## Three integration patterns
113
+
114
+ 1. **Client wrapper** (recommended) — `LedgerProofVoyage` wraps `voyageai.Client` and intercepts
115
+ `embed()` and `rerank()`. Sync and async (`LedgerProofAsyncVoyage`) supported.
116
+ 2. **Decorator** — `@lpr_track_embed(deployer_id=..., model=...)` and
117
+ `@lpr_track_rerank(deployer_id=..., model=...)` for user-defined functions wrapping Voyage calls.
118
+ 3. **Manual emission** — `emit_embedding_receipt(...)`, `emit_rerank_receipt(...)`, and
119
+ `emit_rag_pipeline_receipt(...)` for full control inside a custom RAG orchestration layer.
120
+
121
+ See `examples/`:
122
+
123
+ - `examples/01_voyage_embed_quickstart.py` — vanilla `embed()`
124
+ - `examples/02_voyage_rerank.py` — `rerank()` with relevance-score binding
125
+ - `examples/03_rag_pipeline_evidence.py` — full embed -> rerank -> (downstream chat stub) -> `rag_pipeline_evidence/v1`
126
+
127
+ ## Architectural discipline (C1–C8)
128
+
129
+ This adapter is implemented under the LedgerProof protocol's load-bearing constraints:
130
+
131
+ - **C1**: **No claim of regulator endorsement. No claim of Article 40 presumption of conformity. Not
132
+ endorsed by Voyage AI Inc. or MongoDB Inc.** This adapter wraps the official Voyage Python SDK; it is
133
+ not affiliated with either company.
134
+ - **C4**: Local verification only. The adapter does **not** phone home to LedgerProof servers during
135
+ normal operation. Signed receipts verify offline given the public key.
136
+ - **C6**: Stream-aware signing primitives are exposed for downstream chat adapters. Voyage's own surfaces
137
+ (`embed`, `rerank`) are non-streaming today.
138
+ - **C7**: Side-channel emission only. The adapter **cannot and does not modify** the Voyage response
139
+ payload. The `EmbeddingsObject` / `RerankingObject` is returned unchanged.
140
+
141
+ ## GDPR posture
142
+
143
+ Receipts reference content by SHA-256 hash only. The adapter **does not store**:
144
+
145
+ - raw input texts to `embed()`
146
+ - raw document text fed to `rerank()`
147
+ - raw embedding vectors (the vector is canonicalized to big-endian float64 and hashed; the hash is
148
+ stored, not the vector)
149
+ - raw user queries fed into the downstream chatbot
150
+
151
+ Identifiers (`deployer_id`, `document_id`, `tool_call_id`) are length-bounded and character-set-
152
+ restricted (no free-form PII). The deployer is responsible for ensuring the inputs they feed in
153
+ (e.g. `document_id` values) do not themselves leak PII.
154
+
155
+ ## Vector canonicalization
156
+
157
+ Embedding vectors are canonicalized to **IEEE-754 big-endian float64** byte strings before hashing. This
158
+ is portable across machines, Python versions, and operating systems. A verifier with the original vector
159
+ can recompute `SHA-256(canonicalize_vector(vector))` and compare to `vector_sha256_hex` in the receipt.
160
+
161
+ Voyage exposes `output_dtype=` for higher-precision dtypes (`int8`, `uint8`, `binary`, `ubinary`); the
162
+ adapter records the requested `output_dtype` on `VoyageModelRef.output_dtype` for transparency, but
163
+ always canonicalizes through float64 for one stable wire format.
164
+
165
+ ## Scope disclaimer
166
+
167
+ LedgerProof provides an **evidence layer for Article 50 transparency obligations only**.
168
+
169
+ It does **not** cover:
170
+
171
+ - Article 9 (risk management system)
172
+ - Article 10 (data and data governance)
173
+ - Article 13 (transparency and information to deployers)
174
+ - Article 15 (accuracy, robustness, and cybersecurity)
175
+ - Article 72 (post-market monitoring)
176
+
177
+ LedgerProof does **not** confer presumption of conformity under Article 40. LedgerProof is not endorsed
178
+ by the European Commission, the AI Office, any national competent authority, **Voyage AI Inc., or
179
+ MongoDB Inc.**
180
+
181
+ This adapter wraps the official `voyageai` Python SDK; it is not affiliated with Voyage AI Inc. or
182
+ MongoDB Inc.
183
+
184
+ ## License
185
+
186
+ Apache 2.0. Copyright 2026 LedgerProof Foundation (in formation: US 501(c)(3) Delaware + Dutch Stichting
187
+ EU subsidiary).
@@ -0,0 +1,156 @@
1
+ # ledgerproof-voyage
2
+
3
+ LedgerProof adapter for the [Voyage AI Python SDK](https://github.com/voyage-ai/voyageai-python) (embeddings + rerank).
4
+
5
+ Emits **side-channel cryptographic transparency receipts** that capture the upstream retrieval evidence
6
+ (embeddings, rerank scores) feeding a downstream chatbot, suitable as an evidence layer for the
7
+ **EU AI Act Article 50** RAG pipeline scenario.
8
+
9
+ This adapter is **not endorsed by Voyage AI** or MongoDB. It wraps the official `voyageai` SDK; it is not
10
+ affiliated with Voyage AI Inc. or MongoDB Inc.
11
+
12
+ ## Why an embeddings adapter for an Article 50 protocol?
13
+
14
+ Voyage AI is **upstream RAG infrastructure**. Calls to `voyageai.Client.embed()` and
15
+ `voyageai.Client.rerank()` are **not themselves** direct Article 50(1) "AI system intended to interact
16
+ directly with natural persons" surfaces — they are retrieval-side substrate.
17
+
18
+ But they are the **evidence trail** for any RAG pipeline whose downstream chatbot output **is** under
19
+ Article 50. When a regulator (or an internal auditor, or a litigant) asks the load-bearing question
20
+ *"what sources did the model see, and how did they get there?"*, the answer is a chain:
21
+
22
+ ```
23
+ user query
24
+ -> Voyage embed(query) [embedding_inference/v1]
25
+ -> Voyage embed(documents) [embedding_inference/v1]
26
+ -> ANN retrieval (vector DB) [out of scope]
27
+ -> Voyage rerank(query, candidates) [rerank_inference/v1]
28
+ -> downstream chatbot turn [bound by rag_pipeline_evidence/v1]
29
+ ```
30
+
31
+ This adapter signs each link of that chain locally (constraint C4) so the full provenance is verifiable
32
+ offline from the stored signed receipts. The downstream chatbot turn is captured by the matching
33
+ LedgerProof chat adapter (`ledgerproof-openai`, `ledgerproof-anthropic`, `ledgerproof-cohere`, etc.) and
34
+ both are stitched together by a `rag_pipeline_evidence/v1` envelope.
35
+
36
+ This positioning matters because **most Tier-1 EU enterprise AI deployments under Article 50 enforcement
37
+ (2 August 2026) are RAG-shaped**, and the regulator's first incident-response question is invariably
38
+ "what evidence does the model have?". A signed Voyage embedding + rerank receipt is the cryptographic
39
+ answer.
40
+
41
+ ## 5-minute quickstart
42
+
43
+ ```bash
44
+ pip install ledgerproof-voyage
45
+ export VOYAGE_API_KEY=...
46
+ ```
47
+
48
+ ```python
49
+ from ledgerproof_voyage import LedgerProofVoyage, LogEmitter
50
+
51
+ client = LedgerProofVoyage(
52
+ deployer_id="acme-corp-eu",
53
+ emitter=LogEmitter(),
54
+ )
55
+
56
+ result = client.embed(
57
+ texts=["Article 50 transparency obligations apply from 2 August 2026."],
58
+ model="voyage-3-large",
59
+ input_type="document",
60
+ )
61
+
62
+ print(result.embeddings[0][:4], "...")
63
+ # Receipt has already been emitted to the side-channel.
64
+ ```
65
+
66
+ The Voyage `EmbeddingsObject` is returned **unchanged**. The receipt is emitted to the side channel only
67
+ (C7).
68
+
69
+ ## Three schemas
70
+
71
+ | Schema | Article 50 mapping | What it binds |
72
+ | --- | --- | --- |
73
+ | `embedding_inference/v1` | Supporting infrastructure | per-input SHA-256, per-vector SHA-256, vector dim, model_id, input_type |
74
+ | `rerank_inference/v1` | Supporting infrastructure | query SHA-256, per-candidate SHA-256, relevance scores, post-rerank order, model_id |
75
+ | `rag_pipeline_evidence/v1` | Article 50(1) variant | upstream embed + rerank receipt hashes -> downstream chat receipt hash + user query hash |
76
+
77
+ `embedding_inference/v1` and `rerank_inference/v1` carry
78
+ `regulatory_context.article_50_paragraph = "supporting"` — they are not Article 50(1) artefacts in
79
+ themselves. `rag_pipeline_evidence/v1` carries `"1"` because that is the regulated user-facing turn.
80
+
81
+ ## Three integration patterns
82
+
83
+ 1. **Client wrapper** (recommended) — `LedgerProofVoyage` wraps `voyageai.Client` and intercepts
84
+ `embed()` and `rerank()`. Sync and async (`LedgerProofAsyncVoyage`) supported.
85
+ 2. **Decorator** — `@lpr_track_embed(deployer_id=..., model=...)` and
86
+ `@lpr_track_rerank(deployer_id=..., model=...)` for user-defined functions wrapping Voyage calls.
87
+ 3. **Manual emission** — `emit_embedding_receipt(...)`, `emit_rerank_receipt(...)`, and
88
+ `emit_rag_pipeline_receipt(...)` for full control inside a custom RAG orchestration layer.
89
+
90
+ See `examples/`:
91
+
92
+ - `examples/01_voyage_embed_quickstart.py` — vanilla `embed()`
93
+ - `examples/02_voyage_rerank.py` — `rerank()` with relevance-score binding
94
+ - `examples/03_rag_pipeline_evidence.py` — full embed -> rerank -> (downstream chat stub) -> `rag_pipeline_evidence/v1`
95
+
96
+ ## Architectural discipline (C1–C8)
97
+
98
+ This adapter is implemented under the LedgerProof protocol's load-bearing constraints:
99
+
100
+ - **C1**: **No claim of regulator endorsement. No claim of Article 40 presumption of conformity. Not
101
+ endorsed by Voyage AI Inc. or MongoDB Inc.** This adapter wraps the official Voyage Python SDK; it is
102
+ not affiliated with either company.
103
+ - **C4**: Local verification only. The adapter does **not** phone home to LedgerProof servers during
104
+ normal operation. Signed receipts verify offline given the public key.
105
+ - **C6**: Stream-aware signing primitives are exposed for downstream chat adapters. Voyage's own surfaces
106
+ (`embed`, `rerank`) are non-streaming today.
107
+ - **C7**: Side-channel emission only. The adapter **cannot and does not modify** the Voyage response
108
+ payload. The `EmbeddingsObject` / `RerankingObject` is returned unchanged.
109
+
110
+ ## GDPR posture
111
+
112
+ Receipts reference content by SHA-256 hash only. The adapter **does not store**:
113
+
114
+ - raw input texts to `embed()`
115
+ - raw document text fed to `rerank()`
116
+ - raw embedding vectors (the vector is canonicalized to big-endian float64 and hashed; the hash is
117
+ stored, not the vector)
118
+ - raw user queries fed into the downstream chatbot
119
+
120
+ Identifiers (`deployer_id`, `document_id`, `tool_call_id`) are length-bounded and character-set-
121
+ restricted (no free-form PII). The deployer is responsible for ensuring the inputs they feed in
122
+ (e.g. `document_id` values) do not themselves leak PII.
123
+
124
+ ## Vector canonicalization
125
+
126
+ Embedding vectors are canonicalized to **IEEE-754 big-endian float64** byte strings before hashing. This
127
+ is portable across machines, Python versions, and operating systems. A verifier with the original vector
128
+ can recompute `SHA-256(canonicalize_vector(vector))` and compare to `vector_sha256_hex` in the receipt.
129
+
130
+ Voyage exposes `output_dtype=` for higher-precision dtypes (`int8`, `uint8`, `binary`, `ubinary`); the
131
+ adapter records the requested `output_dtype` on `VoyageModelRef.output_dtype` for transparency, but
132
+ always canonicalizes through float64 for one stable wire format.
133
+
134
+ ## Scope disclaimer
135
+
136
+ LedgerProof provides an **evidence layer for Article 50 transparency obligations only**.
137
+
138
+ It does **not** cover:
139
+
140
+ - Article 9 (risk management system)
141
+ - Article 10 (data and data governance)
142
+ - Article 13 (transparency and information to deployers)
143
+ - Article 15 (accuracy, robustness, and cybersecurity)
144
+ - Article 72 (post-market monitoring)
145
+
146
+ LedgerProof does **not** confer presumption of conformity under Article 40. LedgerProof is not endorsed
147
+ by the European Commission, the AI Office, any national competent authority, **Voyage AI Inc., or
148
+ MongoDB Inc.**
149
+
150
+ This adapter wraps the official `voyageai` Python SDK; it is not affiliated with Voyage AI Inc. or
151
+ MongoDB Inc.
152
+
153
+ ## License
154
+
155
+ Apache 2.0. Copyright 2026 LedgerProof Foundation (in formation: US 501(c)(3) Delaware + Dutch Stichting
156
+ EU subsidiary).
@@ -0,0 +1,102 @@
1
+ """
2
+ LedgerProof adapter for the Voyage AI Python SDK (embeddings + rerank).
3
+
4
+ Side-channel cryptographic transparency receipts for EU AI Act Article 50 RAG
5
+ pipeline evidence. Voyage AI is upstream RAG infrastructure; this adapter binds
6
+ embedding + rerank evidence to a downstream chatbot turn so the full
7
+ retrieval -> generation chain is cryptographically verifiable.
8
+
9
+ Discipline:
10
+ - C1: No regulator endorsement. No Article 40 presumption of conformity.
11
+ Not endorsed by Voyage AI / MongoDB.
12
+ - C4: Offline verification only; no phone-home.
13
+ - C6: Stream-aware SHA-256 primitives available for downstream chat receipts.
14
+ - C7: Side-channel emission only; never modifies the Voyage response.
15
+ """
16
+
17
+ from .async_client_wrapper import LedgerProofAsyncVoyage
18
+ from .client_wrapper import LedgerProofVoyage
19
+ from .decorator import lpr_track_embed, lpr_track_rerank
20
+ from .emitter import (
21
+ Emitter,
22
+ LogEmitter,
23
+ MultiEmitter,
24
+ QueueEmitter,
25
+ StderrEmitter,
26
+ WebhookEmitter,
27
+ )
28
+ from .manual import (
29
+ build_embedding_refs,
30
+ build_rerank_result_refs,
31
+ build_voyage_model_ref,
32
+ emit_embedding_receipt,
33
+ emit_rag_pipeline_receipt,
34
+ emit_rerank_receipt,
35
+ extract_embeddings,
36
+ extract_rerank_results,
37
+ extract_total_tokens,
38
+ )
39
+ from .schema import (
40
+ DownstreamChatRef,
41
+ EmbeddingRef,
42
+ ReceiptV1,
43
+ RegulatoryContext,
44
+ RerankResultRef,
45
+ UpstreamReceiptRef,
46
+ VoyageModelRef,
47
+ build_embedding_inference_receipt,
48
+ build_rag_pipeline_evidence_receipt,
49
+ build_rerank_inference_receipt,
50
+ )
51
+ from .signer import (
52
+ AwsKmsEd25519Signer,
53
+ Ed25519Signer,
54
+ GcpKmsEd25519Signer,
55
+ Signer,
56
+ verify,
57
+ )
58
+ from .version import __version__
59
+
60
+ __all__ = [
61
+ "__version__",
62
+ # client wrappers
63
+ "LedgerProofVoyage",
64
+ "LedgerProofAsyncVoyage",
65
+ # decorators
66
+ "lpr_track_embed",
67
+ "lpr_track_rerank",
68
+ # manual
69
+ "emit_embedding_receipt",
70
+ "emit_rerank_receipt",
71
+ "emit_rag_pipeline_receipt",
72
+ "build_embedding_refs",
73
+ "build_rerank_result_refs",
74
+ "build_voyage_model_ref",
75
+ "extract_embeddings",
76
+ "extract_rerank_results",
77
+ "extract_total_tokens",
78
+ # schemas
79
+ "EmbeddingRef",
80
+ "RerankResultRef",
81
+ "UpstreamReceiptRef",
82
+ "DownstreamChatRef",
83
+ "VoyageModelRef",
84
+ "ReceiptV1",
85
+ "RegulatoryContext",
86
+ "build_embedding_inference_receipt",
87
+ "build_rerank_inference_receipt",
88
+ "build_rag_pipeline_evidence_receipt",
89
+ # emitters
90
+ "Emitter",
91
+ "LogEmitter",
92
+ "StderrEmitter",
93
+ "WebhookEmitter",
94
+ "QueueEmitter",
95
+ "MultiEmitter",
96
+ # signers
97
+ "Signer",
98
+ "Ed25519Signer",
99
+ "AwsKmsEd25519Signer",
100
+ "GcpKmsEd25519Signer",
101
+ "verify",
102
+ ]