vek 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
vek/__init__.py ADDED
@@ -0,0 +1,28 @@
1
+ """vek - Content-addressed execution store for AI agents."""
2
+
3
+ from vek.api import (
4
+ init, store, log, branch, fork, diff, replay,
5
+ show, cat_file, status, tag, fsck, gc,
6
+ merge, log_graph, export, import_data,
7
+ )
8
+ from vek.hooks import AsyncSession as _AsyncSession
9
+ from vek.hooks import hook, wrap
10
+ from vek.session import Session as _Session
11
+
12
+ __version__ = "0.2.0"
13
+ __all__ = [
14
+ "init", "store", "log", "branch", "fork", "diff", "replay",
15
+ "show", "cat_file", "status", "tag", "fsck", "gc",
16
+ "merge", "log_graph", "export", "import_data",
17
+ "session", "async_session", "wrap", "hook",
18
+ ]
19
+
20
+
21
+ def session(**kwargs):
22
+ """Open an auto-recording execution session."""
23
+ return _Session(**kwargs)
24
+
25
+
26
+ def async_session(**kwargs):
27
+ """Open an async auto-recording execution session."""
28
+ return _AsyncSession(**kwargs)
vek/api.py ADDED
@@ -0,0 +1,434 @@
1
+ """High-level public operations.
2
+
3
+ Every function either opens its own DB connection (top-level calls)
4
+ or accepts an injected one (for use inside a Session).
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import json
10
+ from datetime import datetime, timezone
11
+ from pathlib import Path
12
+
13
+ from vek.core import canonical, hash_blob, hash_node
14
+ from vek.db import DB
15
+ from vek.graph import graph_log as _graph_log, json_diff
16
+ from vek.integrity import fsck as _fsck, gc as _gc
17
+ from vek.transfer import (
18
+ export_json as _export_json,
19
+ export_jsonl as _export_jsonl,
20
+ import_json as _import_json,
21
+ import_jsonl as _import_jsonl,
22
+ )
23
+ from vek.repo import (
24
+ DB_NAME,
25
+ find,
26
+ init as _repo_init,
27
+ read_head,
28
+ write_head,
29
+ )
30
+
31
+
32
+ class VekError(Exception):
33
+ """Any vek-specific runtime error."""
34
+
35
+
36
+ # ---------------------------------------------------------------------- helpers
37
+
38
+
39
+ def _open(start: Path | None = None) -> tuple[Path, DB]:
40
+ """Locate .vek/ and return (vek_dir, db)."""
41
+ vd = find(start)
42
+ if vd is None:
43
+ raise VekError("not a vek repository (run `vek init`)")
44
+ return vd, DB(vd / DB_NAME)
45
+
46
+
47
+ def _resolve(db: DB, h: str) -> str:
48
+ """Resolve a potentially short hash prefix to its full hash."""
49
+ try:
50
+ return db.resolve_prefix(h)
51
+ except KeyError:
52
+ raise VekError(f"object not found: {h}")
53
+ except ValueError as exc:
54
+ raise VekError(str(exc))
55
+
56
+
57
+ # ------------------------------------------------------------------- public API
58
+
59
+
60
+ def init(path: str | Path | None = None) -> Path:
61
+ """Initialise a .vek repository. Idempotent."""
62
+ vd = _repo_init(Path(path) if path else None)
63
+ # Ensure the SQLite schema exists.
64
+ DB(vd / DB_NAME).close()
65
+ return vd
66
+
67
+
68
+ def store(
69
+ tool: str,
70
+ input: object,
71
+ output: object,
72
+ *,
73
+ parent: str | None = ..., # type: ignore[assignment]
74
+ _vd: Path | None = None,
75
+ _db: DB | None = None,
76
+ ) -> str:
77
+ """Record one tool call. Returns the node hash.
78
+
79
+ When called outside a session, the new node is automatically
80
+ chained to the tip of the current branch (like ``git commit``).
81
+ """
82
+ own_db = _db is None
83
+ if own_db:
84
+ vd, db = _open()
85
+ else:
86
+ vd, db = _vd, _db # type: ignore[assignment]
87
+
88
+ # --- store input / output blobs (content-addressed, deduped) ---
89
+ in_blob = canonical(input)
90
+ out_blob = canonical(output)
91
+ in_hash = hash_blob(in_blob)
92
+ out_hash = hash_blob(out_blob)
93
+ db.put_object(in_hash, in_blob)
94
+ db.put_object(out_hash, out_blob)
95
+
96
+ # --- resolve parent ---
97
+ branch_name = read_head(vd)
98
+ if parent is ...:
99
+ parent = db.get_ref(branch_name)
100
+
101
+ # --- build node ---
102
+ ts = datetime.now(timezone.utc).isoformat()
103
+ node_payload = canonical(
104
+ dict(
105
+ tool=tool,
106
+ input_hash=in_hash,
107
+ output_hash=out_hash,
108
+ parent_hash=parent,
109
+ timestamp=ts,
110
+ )
111
+ )
112
+ node_hash = hash_node(node_payload)
113
+
114
+ db.put_node(node_hash, tool, in_hash, out_hash, parent, ts)
115
+
116
+ # --- advance branch pointer ---
117
+ db.set_ref(branch_name, node_hash)
118
+
119
+ if own_db:
120
+ db.close()
121
+ return node_hash
122
+
123
+
124
+ def log(n: int = 20, *, branch_name: str | None = None) -> list[dict]:
125
+ """Return the last *n* nodes on the current (or given) branch."""
126
+ vd, db = _open()
127
+ ref = branch_name or read_head(vd)
128
+ tip = db.get_ref(ref)
129
+ if tip is None:
130
+ db.close()
131
+ return []
132
+ chain = db.walk(tip)[:n]
133
+ db.close()
134
+ return chain
135
+
136
+
137
+ def branch(name: str | None = None) -> str | list[tuple[str, str]]:
138
+ """List branches (*name*=None) or create/switch to a branch.
139
+
140
+ - If the branch already exists, just switch HEAD to it.
141
+ - If the branch is new, copy the current tip and switch.
142
+ """
143
+ vd, db = _open()
144
+ if name is None:
145
+ refs = db.list_refs()
146
+ db.close()
147
+ return refs
148
+ existing = db.get_ref(name)
149
+ if existing is None:
150
+ # New branch — copy current tip
151
+ current = read_head(vd)
152
+ tip = db.get_ref(current)
153
+ if tip:
154
+ db.set_ref(name, tip)
155
+ write_head(vd, name)
156
+ db.close()
157
+ return name
158
+
159
+
160
+ def fork(node_hash: str, branch_name: str | None = None) -> str:
161
+ """Create a new branch rooted at *node_hash* and switch to it."""
162
+ vd, db = _open()
163
+ node_hash = _resolve(db, node_hash)
164
+ node = db.get_node(node_hash)
165
+ if node is None:
166
+ db.close()
167
+ raise VekError(f"node not found: {node_hash}")
168
+ bname = branch_name or f"fork-{node_hash[:8]}"
169
+ db.set_ref(bname, node_hash)
170
+ write_head(vd, bname)
171
+ db.close()
172
+ return bname
173
+
174
+
175
+ def diff(hash1: str, hash2: str) -> dict:
176
+ """Compare two nodes and their input/output blobs."""
177
+ vd, db = _open()
178
+ hash1 = _resolve(db, hash1)
179
+ hash2 = _resolve(db, hash2)
180
+ n1 = db.get_node(hash1)
181
+ n2 = db.get_node(hash2)
182
+ if n1 is None or n2 is None:
183
+ db.close()
184
+ raise VekError("one or both nodes not found")
185
+
186
+ result: dict = {
187
+ "node1": n1,
188
+ "node2": n2,
189
+ "input_match": n1["input_hash"] == n2["input_hash"],
190
+ "output_match": n1["output_hash"] == n2["output_hash"],
191
+ }
192
+
193
+ if not result["input_match"]:
194
+ i1 = json.loads(db.get_object(n1["input_hash"]) or b"null")
195
+ i2 = json.loads(db.get_object(n2["input_hash"]) or b"null")
196
+ result["input_diff"] = json_diff(i1, i2)
197
+
198
+ if not result["output_match"]:
199
+ o1 = json.loads(db.get_object(n1["output_hash"]) or b"null")
200
+ o2 = json.loads(db.get_object(n2["output_hash"]) or b"null")
201
+ result["output_diff"] = json_diff(o1, o2)
202
+
203
+ db.close()
204
+ return result
205
+
206
+
207
+ def replay(node_hash: str) -> list[dict]:
208
+ """Return the full execution chain from root to *node_hash*,
209
+ with input/output content materialised inline."""
210
+ vd, db = _open()
211
+ node_hash = _resolve(db, node_hash)
212
+ chain = db.walk(node_hash)
213
+ if not chain:
214
+ db.close()
215
+ raise VekError(f"node not found: {node_hash}")
216
+ enriched = []
217
+ for node in reversed(chain): # root-first order
218
+ entry = dict(node)
219
+ entry["input"] = json.loads(db.get_object(node["input_hash"]) or b"null")
220
+ entry["output"] = json.loads(db.get_object(node["output_hash"]) or b"null")
221
+ enriched.append(entry)
222
+ db.close()
223
+ return enriched
224
+
225
+
226
+ def show(node_hash: str) -> dict:
227
+ """Return full node details with materialised input/output."""
228
+ vd, db = _open()
229
+ node_hash = _resolve(db, node_hash)
230
+ node = db.get_node(node_hash)
231
+ if node is None:
232
+ db.close()
233
+ raise VekError(f"node not found: {node_hash}")
234
+ result = dict(node)
235
+ result["input"] = json.loads(db.get_object(node["input_hash"]) or b"null")
236
+ result["output"] = json.loads(db.get_object(node["output_hash"]) or b"null")
237
+ db.close()
238
+ return result
239
+
240
+
241
+ def cat_file(obj_hash: str) -> bytes:
242
+ """Return raw content of a content-addressed object."""
243
+ vd, db = _open()
244
+ obj_hash = _resolve(db, obj_hash)
245
+ blob = db.get_object(obj_hash)
246
+ if blob is None:
247
+ db.close()
248
+ raise VekError(f"object not found: {obj_hash}")
249
+ db.close()
250
+ return blob
251
+
252
+
253
+ def status() -> dict:
254
+ """Return repository status summary."""
255
+ vd, db = _open()
256
+ branch_name = read_head(vd)
257
+ tip = db.get_ref(branch_name)
258
+ result = {
259
+ "branch": branch_name,
260
+ "tip": tip,
261
+ "nodes": db.count_nodes(),
262
+ "objects": db.count_objects(),
263
+ "refs": db.count_refs(),
264
+ }
265
+ db.close()
266
+ return result
267
+
268
+
269
+ # ----------------------------------------------------------------- tags
270
+
271
+ TAG_PREFIX = "tag/"
272
+
273
+
274
+ def tag(name: str | None = None, node_hash: str | None = None) -> str | list[tuple[str, str]]:
275
+ """Create or list lightweight tags.
276
+
277
+ - ``tag()`` — list all tags
278
+ - ``tag("v1")`` — tag current tip
279
+ - ``tag("v1", hash)`` — tag a specific node
280
+ """
281
+ vd, db = _open()
282
+ if name is None:
283
+ rows = db._conn.execute(
284
+ "SELECT name, hash FROM refs WHERE name LIKE ? ORDER BY name",
285
+ (TAG_PREFIX + "%",),
286
+ ).fetchall()
287
+ db.close()
288
+ return [(n.removeprefix(TAG_PREFIX), h) for n, h in rows]
289
+
290
+ if node_hash is not None:
291
+ node_hash = _resolve(db, node_hash)
292
+ else:
293
+ branch_name = read_head(vd)
294
+ node_hash = db.get_ref(branch_name)
295
+ if node_hash is None:
296
+ db.close()
297
+ raise VekError("nothing to tag (empty branch)")
298
+
299
+ ref_name = TAG_PREFIX + name
300
+ existing = db.get_ref(ref_name)
301
+ if existing is not None:
302
+ db.close()
303
+ raise VekError(f"tag '{name}' already exists")
304
+
305
+ db.set_ref(ref_name, node_hash)
306
+ db.close()
307
+ return name
308
+
309
+
310
+ # ----------------------------------------------------------------- merge
311
+
312
+
313
+ def merge(target_branch: str) -> str:
314
+ """Merge *target_branch* into the current branch.
315
+
316
+ Creates a merge node with two parents: the current tip (parent_hash)
317
+ and the target branch tip (merge_parent). The merge node records
318
+ the tool as ``__merge__`` with both branch names as input.
319
+
320
+ Returns the merge node hash.
321
+ """
322
+ vd, db = _open()
323
+ current = read_head(vd)
324
+ our_tip = db.get_ref(current)
325
+ their_tip = db.get_ref(target_branch)
326
+
327
+ if our_tip is None:
328
+ db.close()
329
+ raise VekError(f"current branch '{current}' has no commits")
330
+ if their_tip is None:
331
+ db.close()
332
+ raise VekError(f"branch '{target_branch}' not found or empty")
333
+ if our_tip == their_tip:
334
+ db.close()
335
+ raise VekError("already up to date")
336
+
337
+ # Check that target isn't an ancestor of current (already merged)
338
+ ancestor_hashes = {n["hash"] for n in db.walk(our_tip)}
339
+ if their_tip in ancestor_hashes:
340
+ db.close()
341
+ raise VekError(f"'{target_branch}' is already an ancestor of '{current}'")
342
+
343
+ merge_input = canonical({"merge": [current, target_branch]})
344
+ merge_output = canonical({"merged_tips": [our_tip, their_tip]})
345
+ in_hash = hash_blob(merge_input)
346
+ out_hash = hash_blob(merge_output)
347
+ db.put_object(in_hash, merge_input)
348
+ db.put_object(out_hash, merge_output)
349
+
350
+ ts = datetime.now(timezone.utc).isoformat()
351
+ node_payload = canonical(
352
+ dict(
353
+ tool="__merge__",
354
+ input_hash=in_hash,
355
+ output_hash=out_hash,
356
+ parent_hash=our_tip,
357
+ merge_parent=their_tip,
358
+ timestamp=ts,
359
+ )
360
+ )
361
+ node_hash = hash_node(node_payload)
362
+ db.put_node(node_hash, "__merge__", in_hash, out_hash, our_tip, ts, merge_parent=their_tip)
363
+ db.set_ref(current, node_hash)
364
+
365
+ db.close()
366
+ return node_hash
367
+
368
+
369
+ # --------------------------------------------------------------- graph log
370
+
371
+
372
+ def log_graph(*, branch_name: str | None = None, limit: int = 30) -> list[str]:
373
+ """Return ASCII DAG lines (like ``git log --graph --oneline``)."""
374
+ _vd, db = _open()
375
+ lines = _graph_log(db, branch_name=branch_name, limit=limit)
376
+ db.close()
377
+ return lines
378
+
379
+
380
+ # -------------------------------------------------------------- integrity
381
+
382
+
383
+ def fsck() -> list[dict]:
384
+ """Verify repository integrity. Returns list of errors."""
385
+ _vd, db = _open()
386
+ errors = _fsck(db)
387
+ db.close()
388
+ return errors
389
+
390
+
391
+ def gc(*, dry_run: bool = False) -> dict:
392
+ """Remove unreachable nodes and orphaned objects."""
393
+ _vd, db = _open()
394
+ result = _gc(db, dry_run=dry_run)
395
+ db.close()
396
+ return result
397
+
398
+
399
+ # --------------------------------------------------------------- transfer
400
+
401
+
402
+ def export(*, branch: str | None = None, format: str = "json") -> dict | str:
403
+ """Export execution chains.
404
+
405
+ - ``format="json"`` returns a dict.
406
+ - ``format="jsonl"`` returns a newline-delimited JSON string.
407
+ """
408
+ _vd, db = _open()
409
+ if format == "jsonl":
410
+ import io
411
+ buf = io.StringIO()
412
+ _export_jsonl(db, buf, branch=branch)
413
+ db.close()
414
+ return buf.getvalue()
415
+ result = _export_json(db, branch=branch)
416
+ db.close()
417
+ return result
418
+
419
+
420
+ def import_data(data: dict | str, *, format: str = "json") -> dict:
421
+ """Import execution chains.
422
+
423
+ - ``format="json"``: *data* is a dict (from ``export``).
424
+ - ``format="jsonl"``: *data* is a JSONL string.
425
+ """
426
+ _vd, db = _open()
427
+ if format == "jsonl":
428
+ import io
429
+ buf = io.StringIO(data) # type: ignore[arg-type]
430
+ result = _import_jsonl(db, buf)
431
+ else:
432
+ result = _import_json(db, data) # type: ignore[arg-type]
433
+ db.close()
434
+ return result