vek 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vek/__init__.py +28 -0
- vek/api.py +434 -0
- vek/cli.py +284 -0
- vek/core.py +41 -0
- vek/db.py +196 -0
- vek/graph.py +119 -0
- vek/hooks.py +153 -0
- vek/integrity.py +125 -0
- vek/py.typed +0 -0
- vek/repo.py +110 -0
- vek/session.py +84 -0
- vek/transfer.py +198 -0
- vek-0.2.0.dist-info/METADATA +167 -0
- vek-0.2.0.dist-info/RECORD +17 -0
- vek-0.2.0.dist-info/WHEEL +4 -0
- vek-0.2.0.dist-info/entry_points.txt +2 -0
- vek-0.2.0.dist-info/licenses/LICENSE +0 -0
vek/__init__.py
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
"""vek - Content-addressed execution store for AI agents."""
|
|
2
|
+
|
|
3
|
+
from vek.api import (
|
|
4
|
+
init, store, log, branch, fork, diff, replay,
|
|
5
|
+
show, cat_file, status, tag, fsck, gc,
|
|
6
|
+
merge, log_graph, export, import_data,
|
|
7
|
+
)
|
|
8
|
+
from vek.hooks import AsyncSession as _AsyncSession
|
|
9
|
+
from vek.hooks import hook, wrap
|
|
10
|
+
from vek.session import Session as _Session
|
|
11
|
+
|
|
12
|
+
__version__ = "0.2.0"
|
|
13
|
+
__all__ = [
|
|
14
|
+
"init", "store", "log", "branch", "fork", "diff", "replay",
|
|
15
|
+
"show", "cat_file", "status", "tag", "fsck", "gc",
|
|
16
|
+
"merge", "log_graph", "export", "import_data",
|
|
17
|
+
"session", "async_session", "wrap", "hook",
|
|
18
|
+
]
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def session(**kwargs):
|
|
22
|
+
"""Open an auto-recording execution session."""
|
|
23
|
+
return _Session(**kwargs)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def async_session(**kwargs):
|
|
27
|
+
"""Open an async auto-recording execution session."""
|
|
28
|
+
return _AsyncSession(**kwargs)
|
vek/api.py
ADDED
|
@@ -0,0 +1,434 @@
|
|
|
1
|
+
"""High-level public operations.
|
|
2
|
+
|
|
3
|
+
Every function either opens its own DB connection (top-level calls)
|
|
4
|
+
or accepts an injected one (for use inside a Session).
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import json
|
|
10
|
+
from datetime import datetime, timezone
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
|
|
13
|
+
from vek.core import canonical, hash_blob, hash_node
|
|
14
|
+
from vek.db import DB
|
|
15
|
+
from vek.graph import graph_log as _graph_log, json_diff
|
|
16
|
+
from vek.integrity import fsck as _fsck, gc as _gc
|
|
17
|
+
from vek.transfer import (
|
|
18
|
+
export_json as _export_json,
|
|
19
|
+
export_jsonl as _export_jsonl,
|
|
20
|
+
import_json as _import_json,
|
|
21
|
+
import_jsonl as _import_jsonl,
|
|
22
|
+
)
|
|
23
|
+
from vek.repo import (
|
|
24
|
+
DB_NAME,
|
|
25
|
+
find,
|
|
26
|
+
init as _repo_init,
|
|
27
|
+
read_head,
|
|
28
|
+
write_head,
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class VekError(Exception):
|
|
33
|
+
"""Any vek-specific runtime error."""
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
# ---------------------------------------------------------------------- helpers
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def _open(start: Path | None = None) -> tuple[Path, DB]:
|
|
40
|
+
"""Locate .vek/ and return (vek_dir, db)."""
|
|
41
|
+
vd = find(start)
|
|
42
|
+
if vd is None:
|
|
43
|
+
raise VekError("not a vek repository (run `vek init`)")
|
|
44
|
+
return vd, DB(vd / DB_NAME)
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def _resolve(db: DB, h: str) -> str:
|
|
48
|
+
"""Resolve a potentially short hash prefix to its full hash."""
|
|
49
|
+
try:
|
|
50
|
+
return db.resolve_prefix(h)
|
|
51
|
+
except KeyError:
|
|
52
|
+
raise VekError(f"object not found: {h}")
|
|
53
|
+
except ValueError as exc:
|
|
54
|
+
raise VekError(str(exc))
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
# ------------------------------------------------------------------- public API
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def init(path: str | Path | None = None) -> Path:
|
|
61
|
+
"""Initialise a .vek repository. Idempotent."""
|
|
62
|
+
vd = _repo_init(Path(path) if path else None)
|
|
63
|
+
# Ensure the SQLite schema exists.
|
|
64
|
+
DB(vd / DB_NAME).close()
|
|
65
|
+
return vd
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def store(
|
|
69
|
+
tool: str,
|
|
70
|
+
input: object,
|
|
71
|
+
output: object,
|
|
72
|
+
*,
|
|
73
|
+
parent: str | None = ..., # type: ignore[assignment]
|
|
74
|
+
_vd: Path | None = None,
|
|
75
|
+
_db: DB | None = None,
|
|
76
|
+
) -> str:
|
|
77
|
+
"""Record one tool call. Returns the node hash.
|
|
78
|
+
|
|
79
|
+
When called outside a session, the new node is automatically
|
|
80
|
+
chained to the tip of the current branch (like ``git commit``).
|
|
81
|
+
"""
|
|
82
|
+
own_db = _db is None
|
|
83
|
+
if own_db:
|
|
84
|
+
vd, db = _open()
|
|
85
|
+
else:
|
|
86
|
+
vd, db = _vd, _db # type: ignore[assignment]
|
|
87
|
+
|
|
88
|
+
# --- store input / output blobs (content-addressed, deduped) ---
|
|
89
|
+
in_blob = canonical(input)
|
|
90
|
+
out_blob = canonical(output)
|
|
91
|
+
in_hash = hash_blob(in_blob)
|
|
92
|
+
out_hash = hash_blob(out_blob)
|
|
93
|
+
db.put_object(in_hash, in_blob)
|
|
94
|
+
db.put_object(out_hash, out_blob)
|
|
95
|
+
|
|
96
|
+
# --- resolve parent ---
|
|
97
|
+
branch_name = read_head(vd)
|
|
98
|
+
if parent is ...:
|
|
99
|
+
parent = db.get_ref(branch_name)
|
|
100
|
+
|
|
101
|
+
# --- build node ---
|
|
102
|
+
ts = datetime.now(timezone.utc).isoformat()
|
|
103
|
+
node_payload = canonical(
|
|
104
|
+
dict(
|
|
105
|
+
tool=tool,
|
|
106
|
+
input_hash=in_hash,
|
|
107
|
+
output_hash=out_hash,
|
|
108
|
+
parent_hash=parent,
|
|
109
|
+
timestamp=ts,
|
|
110
|
+
)
|
|
111
|
+
)
|
|
112
|
+
node_hash = hash_node(node_payload)
|
|
113
|
+
|
|
114
|
+
db.put_node(node_hash, tool, in_hash, out_hash, parent, ts)
|
|
115
|
+
|
|
116
|
+
# --- advance branch pointer ---
|
|
117
|
+
db.set_ref(branch_name, node_hash)
|
|
118
|
+
|
|
119
|
+
if own_db:
|
|
120
|
+
db.close()
|
|
121
|
+
return node_hash
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
def log(n: int = 20, *, branch_name: str | None = None) -> list[dict]:
|
|
125
|
+
"""Return the last *n* nodes on the current (or given) branch."""
|
|
126
|
+
vd, db = _open()
|
|
127
|
+
ref = branch_name or read_head(vd)
|
|
128
|
+
tip = db.get_ref(ref)
|
|
129
|
+
if tip is None:
|
|
130
|
+
db.close()
|
|
131
|
+
return []
|
|
132
|
+
chain = db.walk(tip)[:n]
|
|
133
|
+
db.close()
|
|
134
|
+
return chain
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
def branch(name: str | None = None) -> str | list[tuple[str, str]]:
|
|
138
|
+
"""List branches (*name*=None) or create/switch to a branch.
|
|
139
|
+
|
|
140
|
+
- If the branch already exists, just switch HEAD to it.
|
|
141
|
+
- If the branch is new, copy the current tip and switch.
|
|
142
|
+
"""
|
|
143
|
+
vd, db = _open()
|
|
144
|
+
if name is None:
|
|
145
|
+
refs = db.list_refs()
|
|
146
|
+
db.close()
|
|
147
|
+
return refs
|
|
148
|
+
existing = db.get_ref(name)
|
|
149
|
+
if existing is None:
|
|
150
|
+
# New branch — copy current tip
|
|
151
|
+
current = read_head(vd)
|
|
152
|
+
tip = db.get_ref(current)
|
|
153
|
+
if tip:
|
|
154
|
+
db.set_ref(name, tip)
|
|
155
|
+
write_head(vd, name)
|
|
156
|
+
db.close()
|
|
157
|
+
return name
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
def fork(node_hash: str, branch_name: str | None = None) -> str:
|
|
161
|
+
"""Create a new branch rooted at *node_hash* and switch to it."""
|
|
162
|
+
vd, db = _open()
|
|
163
|
+
node_hash = _resolve(db, node_hash)
|
|
164
|
+
node = db.get_node(node_hash)
|
|
165
|
+
if node is None:
|
|
166
|
+
db.close()
|
|
167
|
+
raise VekError(f"node not found: {node_hash}")
|
|
168
|
+
bname = branch_name or f"fork-{node_hash[:8]}"
|
|
169
|
+
db.set_ref(bname, node_hash)
|
|
170
|
+
write_head(vd, bname)
|
|
171
|
+
db.close()
|
|
172
|
+
return bname
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
def diff(hash1: str, hash2: str) -> dict:
|
|
176
|
+
"""Compare two nodes and their input/output blobs."""
|
|
177
|
+
vd, db = _open()
|
|
178
|
+
hash1 = _resolve(db, hash1)
|
|
179
|
+
hash2 = _resolve(db, hash2)
|
|
180
|
+
n1 = db.get_node(hash1)
|
|
181
|
+
n2 = db.get_node(hash2)
|
|
182
|
+
if n1 is None or n2 is None:
|
|
183
|
+
db.close()
|
|
184
|
+
raise VekError("one or both nodes not found")
|
|
185
|
+
|
|
186
|
+
result: dict = {
|
|
187
|
+
"node1": n1,
|
|
188
|
+
"node2": n2,
|
|
189
|
+
"input_match": n1["input_hash"] == n2["input_hash"],
|
|
190
|
+
"output_match": n1["output_hash"] == n2["output_hash"],
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
if not result["input_match"]:
|
|
194
|
+
i1 = json.loads(db.get_object(n1["input_hash"]) or b"null")
|
|
195
|
+
i2 = json.loads(db.get_object(n2["input_hash"]) or b"null")
|
|
196
|
+
result["input_diff"] = json_diff(i1, i2)
|
|
197
|
+
|
|
198
|
+
if not result["output_match"]:
|
|
199
|
+
o1 = json.loads(db.get_object(n1["output_hash"]) or b"null")
|
|
200
|
+
o2 = json.loads(db.get_object(n2["output_hash"]) or b"null")
|
|
201
|
+
result["output_diff"] = json_diff(o1, o2)
|
|
202
|
+
|
|
203
|
+
db.close()
|
|
204
|
+
return result
|
|
205
|
+
|
|
206
|
+
|
|
207
|
+
def replay(node_hash: str) -> list[dict]:
|
|
208
|
+
"""Return the full execution chain from root to *node_hash*,
|
|
209
|
+
with input/output content materialised inline."""
|
|
210
|
+
vd, db = _open()
|
|
211
|
+
node_hash = _resolve(db, node_hash)
|
|
212
|
+
chain = db.walk(node_hash)
|
|
213
|
+
if not chain:
|
|
214
|
+
db.close()
|
|
215
|
+
raise VekError(f"node not found: {node_hash}")
|
|
216
|
+
enriched = []
|
|
217
|
+
for node in reversed(chain): # root-first order
|
|
218
|
+
entry = dict(node)
|
|
219
|
+
entry["input"] = json.loads(db.get_object(node["input_hash"]) or b"null")
|
|
220
|
+
entry["output"] = json.loads(db.get_object(node["output_hash"]) or b"null")
|
|
221
|
+
enriched.append(entry)
|
|
222
|
+
db.close()
|
|
223
|
+
return enriched
|
|
224
|
+
|
|
225
|
+
|
|
226
|
+
def show(node_hash: str) -> dict:
|
|
227
|
+
"""Return full node details with materialised input/output."""
|
|
228
|
+
vd, db = _open()
|
|
229
|
+
node_hash = _resolve(db, node_hash)
|
|
230
|
+
node = db.get_node(node_hash)
|
|
231
|
+
if node is None:
|
|
232
|
+
db.close()
|
|
233
|
+
raise VekError(f"node not found: {node_hash}")
|
|
234
|
+
result = dict(node)
|
|
235
|
+
result["input"] = json.loads(db.get_object(node["input_hash"]) or b"null")
|
|
236
|
+
result["output"] = json.loads(db.get_object(node["output_hash"]) or b"null")
|
|
237
|
+
db.close()
|
|
238
|
+
return result
|
|
239
|
+
|
|
240
|
+
|
|
241
|
+
def cat_file(obj_hash: str) -> bytes:
|
|
242
|
+
"""Return raw content of a content-addressed object."""
|
|
243
|
+
vd, db = _open()
|
|
244
|
+
obj_hash = _resolve(db, obj_hash)
|
|
245
|
+
blob = db.get_object(obj_hash)
|
|
246
|
+
if blob is None:
|
|
247
|
+
db.close()
|
|
248
|
+
raise VekError(f"object not found: {obj_hash}")
|
|
249
|
+
db.close()
|
|
250
|
+
return blob
|
|
251
|
+
|
|
252
|
+
|
|
253
|
+
def status() -> dict:
|
|
254
|
+
"""Return repository status summary."""
|
|
255
|
+
vd, db = _open()
|
|
256
|
+
branch_name = read_head(vd)
|
|
257
|
+
tip = db.get_ref(branch_name)
|
|
258
|
+
result = {
|
|
259
|
+
"branch": branch_name,
|
|
260
|
+
"tip": tip,
|
|
261
|
+
"nodes": db.count_nodes(),
|
|
262
|
+
"objects": db.count_objects(),
|
|
263
|
+
"refs": db.count_refs(),
|
|
264
|
+
}
|
|
265
|
+
db.close()
|
|
266
|
+
return result
|
|
267
|
+
|
|
268
|
+
|
|
269
|
+
# ----------------------------------------------------------------- tags
|
|
270
|
+
|
|
271
|
+
TAG_PREFIX = "tag/"
|
|
272
|
+
|
|
273
|
+
|
|
274
|
+
def tag(name: str | None = None, node_hash: str | None = None) -> str | list[tuple[str, str]]:
|
|
275
|
+
"""Create or list lightweight tags.
|
|
276
|
+
|
|
277
|
+
- ``tag()`` — list all tags
|
|
278
|
+
- ``tag("v1")`` — tag current tip
|
|
279
|
+
- ``tag("v1", hash)`` — tag a specific node
|
|
280
|
+
"""
|
|
281
|
+
vd, db = _open()
|
|
282
|
+
if name is None:
|
|
283
|
+
rows = db._conn.execute(
|
|
284
|
+
"SELECT name, hash FROM refs WHERE name LIKE ? ORDER BY name",
|
|
285
|
+
(TAG_PREFIX + "%",),
|
|
286
|
+
).fetchall()
|
|
287
|
+
db.close()
|
|
288
|
+
return [(n.removeprefix(TAG_PREFIX), h) for n, h in rows]
|
|
289
|
+
|
|
290
|
+
if node_hash is not None:
|
|
291
|
+
node_hash = _resolve(db, node_hash)
|
|
292
|
+
else:
|
|
293
|
+
branch_name = read_head(vd)
|
|
294
|
+
node_hash = db.get_ref(branch_name)
|
|
295
|
+
if node_hash is None:
|
|
296
|
+
db.close()
|
|
297
|
+
raise VekError("nothing to tag (empty branch)")
|
|
298
|
+
|
|
299
|
+
ref_name = TAG_PREFIX + name
|
|
300
|
+
existing = db.get_ref(ref_name)
|
|
301
|
+
if existing is not None:
|
|
302
|
+
db.close()
|
|
303
|
+
raise VekError(f"tag '{name}' already exists")
|
|
304
|
+
|
|
305
|
+
db.set_ref(ref_name, node_hash)
|
|
306
|
+
db.close()
|
|
307
|
+
return name
|
|
308
|
+
|
|
309
|
+
|
|
310
|
+
# ----------------------------------------------------------------- merge
|
|
311
|
+
|
|
312
|
+
|
|
313
|
+
def merge(target_branch: str) -> str:
|
|
314
|
+
"""Merge *target_branch* into the current branch.
|
|
315
|
+
|
|
316
|
+
Creates a merge node with two parents: the current tip (parent_hash)
|
|
317
|
+
and the target branch tip (merge_parent). The merge node records
|
|
318
|
+
the tool as ``__merge__`` with both branch names as input.
|
|
319
|
+
|
|
320
|
+
Returns the merge node hash.
|
|
321
|
+
"""
|
|
322
|
+
vd, db = _open()
|
|
323
|
+
current = read_head(vd)
|
|
324
|
+
our_tip = db.get_ref(current)
|
|
325
|
+
their_tip = db.get_ref(target_branch)
|
|
326
|
+
|
|
327
|
+
if our_tip is None:
|
|
328
|
+
db.close()
|
|
329
|
+
raise VekError(f"current branch '{current}' has no commits")
|
|
330
|
+
if their_tip is None:
|
|
331
|
+
db.close()
|
|
332
|
+
raise VekError(f"branch '{target_branch}' not found or empty")
|
|
333
|
+
if our_tip == their_tip:
|
|
334
|
+
db.close()
|
|
335
|
+
raise VekError("already up to date")
|
|
336
|
+
|
|
337
|
+
# Check that target isn't an ancestor of current (already merged)
|
|
338
|
+
ancestor_hashes = {n["hash"] for n in db.walk(our_tip)}
|
|
339
|
+
if their_tip in ancestor_hashes:
|
|
340
|
+
db.close()
|
|
341
|
+
raise VekError(f"'{target_branch}' is already an ancestor of '{current}'")
|
|
342
|
+
|
|
343
|
+
merge_input = canonical({"merge": [current, target_branch]})
|
|
344
|
+
merge_output = canonical({"merged_tips": [our_tip, their_tip]})
|
|
345
|
+
in_hash = hash_blob(merge_input)
|
|
346
|
+
out_hash = hash_blob(merge_output)
|
|
347
|
+
db.put_object(in_hash, merge_input)
|
|
348
|
+
db.put_object(out_hash, merge_output)
|
|
349
|
+
|
|
350
|
+
ts = datetime.now(timezone.utc).isoformat()
|
|
351
|
+
node_payload = canonical(
|
|
352
|
+
dict(
|
|
353
|
+
tool="__merge__",
|
|
354
|
+
input_hash=in_hash,
|
|
355
|
+
output_hash=out_hash,
|
|
356
|
+
parent_hash=our_tip,
|
|
357
|
+
merge_parent=their_tip,
|
|
358
|
+
timestamp=ts,
|
|
359
|
+
)
|
|
360
|
+
)
|
|
361
|
+
node_hash = hash_node(node_payload)
|
|
362
|
+
db.put_node(node_hash, "__merge__", in_hash, out_hash, our_tip, ts, merge_parent=their_tip)
|
|
363
|
+
db.set_ref(current, node_hash)
|
|
364
|
+
|
|
365
|
+
db.close()
|
|
366
|
+
return node_hash
|
|
367
|
+
|
|
368
|
+
|
|
369
|
+
# --------------------------------------------------------------- graph log
|
|
370
|
+
|
|
371
|
+
|
|
372
|
+
def log_graph(*, branch_name: str | None = None, limit: int = 30) -> list[str]:
|
|
373
|
+
"""Return ASCII DAG lines (like ``git log --graph --oneline``)."""
|
|
374
|
+
_vd, db = _open()
|
|
375
|
+
lines = _graph_log(db, branch_name=branch_name, limit=limit)
|
|
376
|
+
db.close()
|
|
377
|
+
return lines
|
|
378
|
+
|
|
379
|
+
|
|
380
|
+
# -------------------------------------------------------------- integrity
|
|
381
|
+
|
|
382
|
+
|
|
383
|
+
def fsck() -> list[dict]:
|
|
384
|
+
"""Verify repository integrity. Returns list of errors."""
|
|
385
|
+
_vd, db = _open()
|
|
386
|
+
errors = _fsck(db)
|
|
387
|
+
db.close()
|
|
388
|
+
return errors
|
|
389
|
+
|
|
390
|
+
|
|
391
|
+
def gc(*, dry_run: bool = False) -> dict:
|
|
392
|
+
"""Remove unreachable nodes and orphaned objects."""
|
|
393
|
+
_vd, db = _open()
|
|
394
|
+
result = _gc(db, dry_run=dry_run)
|
|
395
|
+
db.close()
|
|
396
|
+
return result
|
|
397
|
+
|
|
398
|
+
|
|
399
|
+
# --------------------------------------------------------------- transfer
|
|
400
|
+
|
|
401
|
+
|
|
402
|
+
def export(*, branch: str | None = None, format: str = "json") -> dict | str:
|
|
403
|
+
"""Export execution chains.
|
|
404
|
+
|
|
405
|
+
- ``format="json"`` returns a dict.
|
|
406
|
+
- ``format="jsonl"`` returns a newline-delimited JSON string.
|
|
407
|
+
"""
|
|
408
|
+
_vd, db = _open()
|
|
409
|
+
if format == "jsonl":
|
|
410
|
+
import io
|
|
411
|
+
buf = io.StringIO()
|
|
412
|
+
_export_jsonl(db, buf, branch=branch)
|
|
413
|
+
db.close()
|
|
414
|
+
return buf.getvalue()
|
|
415
|
+
result = _export_json(db, branch=branch)
|
|
416
|
+
db.close()
|
|
417
|
+
return result
|
|
418
|
+
|
|
419
|
+
|
|
420
|
+
def import_data(data: dict | str, *, format: str = "json") -> dict:
|
|
421
|
+
"""Import execution chains.
|
|
422
|
+
|
|
423
|
+
- ``format="json"``: *data* is a dict (from ``export``).
|
|
424
|
+
- ``format="jsonl"``: *data* is a JSONL string.
|
|
425
|
+
"""
|
|
426
|
+
_vd, db = _open()
|
|
427
|
+
if format == "jsonl":
|
|
428
|
+
import io
|
|
429
|
+
buf = io.StringIO(data) # type: ignore[arg-type]
|
|
430
|
+
result = _import_jsonl(db, buf)
|
|
431
|
+
else:
|
|
432
|
+
result = _import_json(db, data) # type: ignore[arg-type]
|
|
433
|
+
db.close()
|
|
434
|
+
return result
|