tidebase 0.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
tidebase/__init__.py ADDED
@@ -0,0 +1,771 @@
1
+ """Tidebase Python SDK.
2
+
3
+ Tidebase is an open-source checkpoint layer for AI agents: wrap your steps,
4
+ and failed runs resume from the last safe point — in your own Postgres,
5
+ without moving execution into a new runtime.
6
+
7
+ from tidebase import Tidebase
8
+
9
+ tide = Tidebase() # reads TIDEBASE_URL, default http://localhost:7373
10
+
11
+ def workflow(run, input):
12
+ plan = run.step("plan", lambda: make_plan(input))
13
+ run.state.set({"status": "writing", "progress": 0.7})
14
+ return run.step("write-report", lambda: write_report(plan))
15
+
16
+ tide.run("generate-report", workflow, run_id=run_id)
17
+
18
+ Zero dependencies (stdlib only). Mirrors @tidebase/sdk semantics: completed
19
+ steps replay from checkpoints, leases fence concurrent workers, gates resolve
20
+ exactly once, and unkeyed external writes classify as manual_review.
21
+ """
22
+
23
+ from __future__ import annotations
24
+
25
+ import hashlib
26
+ import hmac
27
+ import json
28
+ import os
29
+ import time
30
+ import uuid
31
+ from concurrent.futures import ThreadPoolExecutor
32
+ from typing import Any, Callable, Iterator, Optional
33
+ from urllib import error as urlerror
34
+ from urllib import parse as urlparse
35
+ from urllib import request as urlrequest
36
+
37
+ __all__ = [
38
+ "Tidebase",
39
+ "RunContext",
40
+ "TidebaseError",
41
+ "RunCancelled",
42
+ "GateDecision",
43
+ "new_run_id",
44
+ "verify_webhook_signature",
45
+ ]
46
+
47
+
48
+ class TidebaseError(RuntimeError):
49
+ """A Tidebase API request failed."""
50
+
51
+ def __init__(self, status: int, body: str, path: str):
52
+ super().__init__(f"Tidebase request failed: {status} {body} ({path})")
53
+ self.status = status
54
+ self.body = body
55
+ self.path = path
56
+
57
+
58
+ class RunCancelled(RuntimeError):
59
+ """The run was cancelled while this worker held it. Let it propagate."""
60
+
61
+ def __init__(self, run_id: str):
62
+ super().__init__(f"Run {run_id} was cancelled")
63
+ self.run_id = run_id
64
+
65
+
66
+ class GateDecision:
67
+ def __init__(self, gate: dict):
68
+ self.gate_id: str = gate["id"]
69
+ self.name: str = gate["name"]
70
+ self.status: str = gate["status"]
71
+ self.decision: str = gate["decision"]
72
+ self.actor: Optional[str] = gate.get("actor")
73
+ self.payload: Any = gate.get("decisionPayload")
74
+
75
+ @property
76
+ def approved(self) -> bool:
77
+ return self.decision == "approved"
78
+
79
+
80
+ def new_run_id() -> str:
81
+ return "run_" + uuid.uuid4().hex
82
+
83
+
84
+ def _stable_stringify(value: Any) -> str:
85
+ """Deterministic JSON with sorted object keys.
86
+
87
+ Matches the TypeScript SDK's stableStringify for the common JSON types so
88
+ both SDKs compute the same input hash. (Caveat: floats that JavaScript
89
+ prints without a fractional part, e.g. 1.0, differ — avoid mixing SDKs on
90
+ a step whose input contains such floats.)
91
+ """
92
+ if value is None or not isinstance(value, (dict, list)):
93
+ return json.dumps(value, ensure_ascii=False, separators=(",", ":"))
94
+ if isinstance(value, list):
95
+ return "[" + ",".join(_stable_stringify(v) for v in value) + "]"
96
+ return (
97
+ "{"
98
+ + ",".join(
99
+ f"{json.dumps(str(k), ensure_ascii=False)}:{_stable_stringify(value[k])}"
100
+ for k in sorted(value)
101
+ )
102
+ + "}"
103
+ )
104
+
105
+
106
+ def _hash_stable(value: Any) -> str:
107
+ return hashlib.sha256(_stable_stringify(value).encode("utf-8")).hexdigest()
108
+
109
+
110
+ def _classify_resume_decision(options: dict) -> str:
111
+ """Mirror of the server's inferReplay and the TS SDK's classification."""
112
+ replay = options.get("replay")
113
+ if replay == "manual":
114
+ return "manual_review"
115
+ if replay == "never":
116
+ return "fail_hard"
117
+ if replay == "auto":
118
+ return "safe_replay"
119
+ side_effects = [e for e in (options.get("sideEffects") or []) if e]
120
+ reads_only = len(side_effects) > 0 and all(e == "read" for e in side_effects)
121
+ writes_externally = len(side_effects) > 0 and not reads_only
122
+ if writes_externally and not options.get("idempotencyKey"):
123
+ return "manual_review"
124
+ return "safe_replay"
125
+
126
+
127
+ def verify_webhook_signature(body: bytes, signature_header: Optional[str], secret: str) -> bool:
128
+ """Verify an HMAC-signed Tidebase webhook payload (timing-safe)."""
129
+ if not signature_header:
130
+ return False
131
+ if signature_header.startswith("sha256="):
132
+ signature_header = signature_header[len("sha256="):]
133
+ expected = hmac.new(secret.encode("utf-8"), body, hashlib.sha256).hexdigest()
134
+ return hmac.compare_digest(expected, signature_header)
135
+
136
+
137
+ def _serialize_error(error: BaseException) -> dict:
138
+ return {"name": type(error).__name__, "message": str(error)}
139
+
140
+
141
+ class Tidebase:
142
+ def __init__(
143
+ self,
144
+ url: Optional[str] = None,
145
+ api_key: Optional[str] = None,
146
+ webhook_secret: Optional[str] = None,
147
+ ):
148
+ self.url = (url or os.environ.get("TIDEBASE_URL") or "http://localhost:7373").rstrip("/")
149
+ self.api_key = api_key or os.environ.get("TIDEBASE_API_KEY")
150
+ self.webhook_secret = webhook_secret or os.environ.get("TIDEBASE_WEBHOOK_SECRET")
151
+ self.runs = RunsClient(self)
152
+ self.queues = QueuesClient(self)
153
+ self.schedules = SchedulesClient(self)
154
+ self._workflows: dict = {}
155
+
156
+ def workflow(self, name: str, fn: Optional[Callable] = None):
157
+ """Register a workflow for the work loop / webhook handler. Usable as
158
+ a decorator: @tide.workflow("generate-report")."""
159
+ if fn is None:
160
+ def decorator(f):
161
+ self._workflows[name] = f
162
+ return f
163
+ return decorator
164
+ self._workflows[name] = fn
165
+ return fn
166
+
167
+ # ---- transport -------------------------------------------------------
168
+
169
+ def request(self, method: str, path: str, body: Any = None) -> Any:
170
+ data = None if body is None else json.dumps(body).encode("utf-8")
171
+ headers = {"content-type": "application/json"}
172
+ if self.api_key:
173
+ headers["authorization"] = f"Bearer {self.api_key}"
174
+ req = urlrequest.Request(self.url + path, data=data, headers=headers, method=method)
175
+ try:
176
+ with urlrequest.urlopen(req) as response:
177
+ return json.loads(response.read().decode("utf-8"))
178
+ except urlerror.HTTPError as e:
179
+ body_text = e.read().decode("utf-8", "replace")
180
+ try:
181
+ if json.loads(body_text).get("code") == "run_cancelled":
182
+ parts = path.split("/")
183
+ run_id = parts[2] if len(parts) > 2 and parts[1] == "runs" else "unknown"
184
+ raise RunCancelled(run_id) from None
185
+ except (ValueError, KeyError, AttributeError):
186
+ pass
187
+ raise TidebaseError(e.code, body_text, path) from None
188
+
189
+ # ---- workflows -------------------------------------------------------
190
+
191
+ def run(
192
+ self,
193
+ workflow_name: str,
194
+ workflow: Callable[["RunContext", Any], Any],
195
+ run_id: Optional[str] = None,
196
+ input: Any = None,
197
+ metadata: Optional[dict] = None,
198
+ recovery_webhook: Optional[str] = None,
199
+ channels: Optional[list] = None,
200
+ ) -> Any:
201
+ """Create (or resume, when run_id is given) a run and execute the workflow.
202
+
203
+ Completed steps inside the workflow replay from their checkpoints; only
204
+ unfinished steps execute.
205
+ """
206
+ if run_id is None:
207
+ run = self.runs.create(
208
+ workflow_name,
209
+ input=input,
210
+ metadata=metadata,
211
+ recovery_webhook=recovery_webhook,
212
+ channels=channels,
213
+ )
214
+ else:
215
+ run = self.runs.get(run_id)["run"]
216
+
217
+ if run["workflowName"] != workflow_name:
218
+ raise ValueError(
219
+ f"Run {run['id']} belongs to workflow {run['workflowName']}, not {workflow_name}"
220
+ )
221
+ if run["status"] == "completed":
222
+ return run["result"]
223
+
224
+ begin = self.request("POST", f"/runs/{run['id']}/begin")
225
+ return self._execute(run["id"], run["input"], begin["leaseOwner"], workflow)
226
+
227
+ def _execute(self, run_id: str, input: Any, lease_owner: str, workflow: Callable) -> Any:
228
+ context = RunContext(self, run_id, lease_owner)
229
+ try:
230
+ result = workflow(context, input)
231
+ self.request("POST", f"/runs/{run_id}/complete", {"result": result})
232
+ return result
233
+ except RunCancelled:
234
+ # The run is already terminal — never report failure after cancel.
235
+ raise
236
+ except BaseException as error:
237
+ try:
238
+ self.request("POST", f"/runs/{run_id}/fail", {"error": _serialize_error(error)})
239
+ except Exception:
240
+ pass
241
+ raise
242
+
243
+ def enqueue(
244
+ self,
245
+ workflow_name: str,
246
+ *,
247
+ queue: str = "default",
248
+ input: Any = None,
249
+ metadata: Optional[dict] = None,
250
+ dedupe_key: Optional[str] = None,
251
+ delay_s: Optional[float] = None,
252
+ run_at: Optional[str] = None,
253
+ max_attempts: Optional[int] = None,
254
+ priority: Optional[int] = None,
255
+ deadline_s: Optional[float] = None,
256
+ recovery_webhook: Optional[str] = None,
257
+ ) -> dict:
258
+ """Enqueue a workflow as a durable queued run. Returns
259
+ {"run": ..., "deduplicated": bool}."""
260
+ body: dict = {"workflowName": workflow_name}
261
+ if input is not None:
262
+ body["input"] = input
263
+ if metadata is not None:
264
+ body["metadata"] = metadata
265
+ if dedupe_key is not None:
266
+ body["dedupeKey"] = dedupe_key
267
+ if delay_s is not None:
268
+ body["delayMs"] = int(delay_s * 1000)
269
+ if run_at is not None:
270
+ body["runAt"] = run_at
271
+ if max_attempts is not None:
272
+ body["maxAttempts"] = max_attempts
273
+ if priority is not None:
274
+ body["priority"] = priority
275
+ if deadline_s is not None:
276
+ body["deadlineMs"] = int(deadline_s * 1000)
277
+ if recovery_webhook is not None:
278
+ body["recoveryWebhook"] = recovery_webhook
279
+ return self.request("POST", f"/queues/{urlparse.quote(queue, safe='')}/enqueue", body)
280
+
281
+ def work(
282
+ self,
283
+ queues: Optional[list] = None,
284
+ *,
285
+ lease_owner: Optional[str] = None,
286
+ poll_s: float = 1.0,
287
+ limit: int = 1,
288
+ stop: Optional[Callable[[], bool]] = None,
289
+ on_error: Optional[Callable[[BaseException, dict], None]] = None,
290
+ ) -> None:
291
+ """Pull-mode worker loop: claim ready runs and execute their
292
+ registered workflows (register with @tide.workflow). Runs until
293
+ stop() returns True."""
294
+ queues = queues or ["default"]
295
+ while not (stop and stop()):
296
+ claim = self.request(
297
+ "POST",
298
+ "/queues/claim",
299
+ {"queues": queues, "leaseOwner": lease_owner, "limit": limit}
300
+ if lease_owner
301
+ else {"queues": queues, "limit": limit},
302
+ )
303
+ for run in claim["runs"]:
304
+ workflow = self._workflows.get(run["workflowName"])
305
+ if workflow is None:
306
+ try:
307
+ self.request(
308
+ "POST",
309
+ f"/runs/{run['id']}/fail",
310
+ {"error": {"message": f"no workflow registered for {run['workflowName']}"}},
311
+ )
312
+ except Exception:
313
+ pass
314
+ continue
315
+ try:
316
+ self._execute(run["id"], run["input"], claim["leaseOwner"], workflow)
317
+ except BaseException as error:
318
+ if on_error:
319
+ on_error(error, run)
320
+ if not claim["runs"]:
321
+ time.sleep(poll_s)
322
+
323
+
324
+ class RunsClient:
325
+ def __init__(self, client: Tidebase):
326
+ self._client = client
327
+
328
+ def create(
329
+ self,
330
+ workflow_name: str,
331
+ input: Any = None,
332
+ metadata: Optional[dict] = None,
333
+ recovery_webhook: Optional[str] = None,
334
+ channels: Optional[list] = None,
335
+ ) -> dict:
336
+ body: dict = {}
337
+ if input is not None:
338
+ body["input"] = input
339
+ if metadata is not None:
340
+ body["metadata"] = metadata
341
+ if recovery_webhook is not None:
342
+ body["recoveryWebhook"] = recovery_webhook
343
+ if channels is not None:
344
+ body["channels"] = channels
345
+ quoted = urlparse.quote(workflow_name, safe="")
346
+ return self._client.request("POST", f"/runs/{quoted}", body)["run"]
347
+
348
+ def list(self) -> list:
349
+ return self._client.request("GET", "/runs")["runs"]
350
+
351
+ def get(self, run_id: str) -> dict:
352
+ """Full run detail: run, steps, state, gates, events, usage, children."""
353
+ return self._client.request("GET", f"/runs/{run_id}")
354
+
355
+ def recover(self, run_id: str, reason: str = "manual") -> dict:
356
+ return self._client.request("POST", f"/runs/{run_id}/recover", {"reason": reason})
357
+
358
+ def cancel(self, run_id: str, reason: Optional[str] = None, actor: Optional[str] = None) -> dict:
359
+ """Cancel a run: authoritative and one-way. In-flight workers observe
360
+ it at their next step/gate boundary."""
361
+ body: dict = {}
362
+ if reason is not None:
363
+ body["reason"] = reason
364
+ if actor is not None:
365
+ body["actor"] = actor
366
+ return self._client.request("POST", f"/runs/{run_id}/cancel", body)["run"]
367
+
368
+ def subscribe(self, run_id: str, after: int = 0) -> Iterator[dict]:
369
+ """Yield run events from the SSE stream (blocking generator)."""
370
+ token = f"&token={urlparse.quote(self._client.api_key)}" if self._client.api_key else ""
371
+ url = f"{self._client.url}/runs/{run_id}/events?after={after}{token}"
372
+ req = urlrequest.Request(url, headers={"accept": "text/event-stream"})
373
+ with urlrequest.urlopen(req) as response:
374
+ for raw in response:
375
+ line = raw.decode("utf-8").rstrip("\n")
376
+ if line.startswith("data:"):
377
+ yield json.loads(line[len("data:"):].strip())
378
+
379
+
380
+ class QueuesClient:
381
+ def __init__(self, client: "Tidebase"):
382
+ self._client = client
383
+
384
+ def configure(
385
+ self,
386
+ name: str,
387
+ *,
388
+ concurrency: Optional[int] = None,
389
+ rate_per_minute: Optional[int] = None,
390
+ invoke_url: Optional[str] = None,
391
+ ) -> dict:
392
+ body: dict = {}
393
+ if concurrency is not None:
394
+ body["concurrency"] = concurrency
395
+ if rate_per_minute is not None:
396
+ body["ratePerMinute"] = rate_per_minute
397
+ if invoke_url is not None:
398
+ body["invokeUrl"] = invoke_url
399
+ return self._client.request("PUT", f"/queues/{urlparse.quote(name, safe='')}/config", body)
400
+
401
+ def list(self) -> list:
402
+ return self._client.request("GET", "/queues")["queues"]
403
+
404
+
405
+ class SchedulesClient:
406
+ def __init__(self, client: "Tidebase"):
407
+ self._client = client
408
+
409
+ def set(
410
+ self,
411
+ name: str,
412
+ *,
413
+ cron: str,
414
+ workflow_name: str,
415
+ input: Any = None,
416
+ queue: str = "default",
417
+ max_attempts: Optional[int] = None,
418
+ enabled: bool = True,
419
+ ) -> dict:
420
+ body: dict = {"cron": cron, "workflowName": workflow_name, "queue": queue, "enabled": enabled}
421
+ if input is not None:
422
+ body["input"] = input
423
+ if max_attempts is not None:
424
+ body["maxAttempts"] = max_attempts
425
+ return self._client.request("PUT", f"/schedules/{urlparse.quote(name, safe='')}", body)["schedule"]
426
+
427
+ def list(self) -> list:
428
+ return self._client.request("GET", "/schedules")["schedules"]
429
+
430
+ def delete(self, name: str) -> dict:
431
+ return self._client.request("DELETE", f"/schedules/{urlparse.quote(name, safe='')}")
432
+
433
+
434
+ class RunContext:
435
+ def __init__(self, client: Tidebase, run_id: str, lease_owner: str):
436
+ self._client = client
437
+ self.run_id = run_id
438
+ self._lease_owner = lease_owner
439
+ self.state = RunState(client, run_id)
440
+ self.usage = RunUsage(client, run_id)
441
+ self.snapshots = RunSnapshots(client, run_id)
442
+
443
+ # ---- steps -----------------------------------------------------------
444
+
445
+ def step(
446
+ self,
447
+ name: str,
448
+ fn: Callable[[], Any],
449
+ *,
450
+ input: Any = None,
451
+ input_hash: Optional[str] = None,
452
+ retries: int = 0,
453
+ timeout_s: Optional[float] = None,
454
+ side_effects: Optional[list] = None,
455
+ idempotency_key: Optional[str] = None,
456
+ replay: Optional[str] = None,
457
+ checkpoint_invariant: Any = None,
458
+ verified_by: Any = None,
459
+ credentials: Optional[list] = None,
460
+ ) -> Any:
461
+ """Checkpoint a unit of work. On replay the stored output is returned
462
+ without executing fn. External writes should declare side_effects and
463
+ an idempotency_key, or final failures classify as manual_review."""
464
+ options: dict = {}
465
+ if input is not None:
466
+ options["input"] = input
467
+ if retries:
468
+ options["retries"] = retries
469
+ if timeout_s is not None:
470
+ options["timeoutMs"] = int(timeout_s * 1000)
471
+ if side_effects is not None:
472
+ options["sideEffects"] = side_effects
473
+ if idempotency_key is not None:
474
+ options["idempotencyKey"] = idempotency_key
475
+ if replay is not None:
476
+ options["replay"] = replay
477
+ if checkpoint_invariant is not None:
478
+ options["checkpointInvariant"] = checkpoint_invariant
479
+ if verified_by is not None:
480
+ options["verifiedBy"] = verified_by
481
+ if credentials is not None:
482
+ options["credentials"] = credentials
483
+
484
+ resolved_hash = input_hash or _hash_stable(input if input is not None else None)
485
+
486
+ def begin() -> dict:
487
+ return self._client.request(
488
+ "POST",
489
+ f"/runs/{self.run_id}/steps/begin",
490
+ {
491
+ "name": name,
492
+ "inputHash": resolved_hash,
493
+ "input": input,
494
+ "options": options,
495
+ "leaseOwner": self._lease_owner,
496
+ },
497
+ )
498
+
499
+ current = begin()
500
+ if current["action"] == "return":
501
+ return current["output"]
502
+ if current["action"] == "cancelled":
503
+ raise RunCancelled(self.run_id)
504
+ if current["action"] == "input_mismatch":
505
+ raise ValueError(
506
+ f"Step {name} input hash changed for this run. "
507
+ f"Expected {current['expectedInputHash']}, got {current['actualInputHash']}"
508
+ )
509
+ if current["action"] == "locked":
510
+ raise RuntimeError(f"Step {name} is currently leased by another worker")
511
+
512
+ attempts = max(1, retries + 1)
513
+ for attempt in range(1, attempts + 1):
514
+ if attempt > 1:
515
+ # A retryable failure released the lease server-side; re-begin
516
+ # to acquire a fresh lease before reporting results.
517
+ current = begin()
518
+ if current["action"] == "return":
519
+ return current["output"]
520
+ if current["action"] == "cancelled":
521
+ raise RunCancelled(self.run_id)
522
+ if current["action"] == "locked":
523
+ raise RuntimeError(f"Step {name} is currently leased by another worker")
524
+ if current["action"] == "input_mismatch":
525
+ raise ValueError(f"Step {name} input hash changed for this run")
526
+ try:
527
+ result = fn()
528
+ self._client.request(
529
+ "POST",
530
+ f"/runs/{self.run_id}/steps/{current['step']['id']}/complete",
531
+ {"leaseOwner": current["leaseOwner"], "output": result},
532
+ )
533
+ return result
534
+ except BaseException as error:
535
+ retryable = attempt < attempts
536
+ try:
537
+ self._client.request(
538
+ "POST",
539
+ f"/runs/{self.run_id}/steps/{current['step']['id']}/fail",
540
+ {
541
+ "leaseOwner": current["leaseOwner"],
542
+ "retryable": retryable,
543
+ "resumeDecision": "auto_retry"
544
+ if retryable
545
+ else _classify_resume_decision(options),
546
+ "error": _serialize_error(error),
547
+ },
548
+ )
549
+ except Exception:
550
+ pass
551
+ if not retryable:
552
+ raise
553
+ raise RuntimeError(f"Step {name} failed")
554
+
555
+ # ---- gates -----------------------------------------------------------
556
+
557
+ def gate(
558
+ self,
559
+ name: str,
560
+ prompt: str,
561
+ *,
562
+ data: Any = None,
563
+ channels: Optional[list] = None,
564
+ capability: Optional[dict] = None,
565
+ timeout_s: Optional[float] = None,
566
+ poll_s: float = 1.0,
567
+ ) -> GateDecision:
568
+ """Pause the run on a durable gate until a human decides. The decision
569
+ resolves exactly once and replays on resume."""
570
+ body: dict = {
571
+ "name": name,
572
+ "prompt": prompt,
573
+ "data": data if data is not None else {},
574
+ "channels": channels or [],
575
+ "capability": capability,
576
+ }
577
+ if timeout_s is not None:
578
+ body["timeoutMs"] = int(timeout_s * 1000)
579
+ begun = self._client.request("POST", f"/runs/{self.run_id}/gates/begin", body)
580
+
581
+ deadline = time.monotonic() + timeout_s if timeout_s else None
582
+ gate = begun["gate"]
583
+ while gate["status"] == "pending":
584
+ if deadline and time.monotonic() > deadline:
585
+ raise TimeoutError(f"Gate {name} timed out")
586
+ time.sleep(poll_s)
587
+ polled = self._client.request("GET", f"/runs/{self.run_id}/gates/{gate['id']}")
588
+ if polled.get("runStatus") == "cancelled":
589
+ raise RunCancelled(self.run_id)
590
+ gate = polled["gate"]
591
+
592
+ if gate["decision"] not in ("approved", "rejected", "canceled"):
593
+ raise RuntimeError(f"Gate {name} resolved with unsupported decision {gate['decision']}")
594
+ return GateDecision(gate)
595
+
596
+ # ---- children & fanout -------------------------------------------------
597
+
598
+ def child(
599
+ self,
600
+ workflow_name: str,
601
+ workflow: Callable[["RunContext", Any], Any],
602
+ *,
603
+ name: Optional[str] = None,
604
+ input: Any = None,
605
+ metadata: Optional[dict] = None,
606
+ recovery_webhook: Optional[str] = None,
607
+ channels: Optional[list] = None,
608
+ edge_type: str = "child",
609
+ edge_metadata: Optional[dict] = None,
610
+ ) -> Any:
611
+ """Run a child workflow. Child creation is idempotent by edge name, so a
612
+ resumed parent reuses the existing child run."""
613
+ body = {
614
+ "name": name or workflow_name,
615
+ "workflowName": workflow_name,
616
+ "input": input,
617
+ "metadata": metadata,
618
+ "recoveryWebhook": recovery_webhook,
619
+ "channels": channels,
620
+ "edgeType": edge_type,
621
+ "edgeMetadata": edge_metadata,
622
+ }
623
+ response = self._client.request(
624
+ "POST",
625
+ f"/runs/{self.run_id}/children",
626
+ {k: v for k, v in body.items() if v is not None},
627
+ )
628
+ return self._client.run(workflow_name, workflow, run_id=response["run"]["id"])
629
+
630
+ def fanout(self, name: str, children: list, *, checkpoint: Optional[str] = None) -> list:
631
+ """Run children in parallel as child runs and join durably. Each child
632
+ is a dict: {"name", "workflow", optional "workflow_name", "input"}."""
633
+ with ThreadPoolExecutor(max_workers=max(1, len(children))) as pool:
634
+ futures = [
635
+ pool.submit(
636
+ self.child,
637
+ child.get("workflow_name") or child["name"],
638
+ child["workflow"],
639
+ name=child["name"],
640
+ input=child.get("input"),
641
+ metadata=child.get("metadata"),
642
+ edge_type="fanout",
643
+ edge_metadata={"fanout": name},
644
+ )
645
+ for child in children
646
+ ]
647
+ results = [f.result() for f in futures]
648
+
649
+ return self.step(
650
+ f"join:{checkpoint or name}",
651
+ lambda: results,
652
+ input={
653
+ "fanout": name,
654
+ "join": "all",
655
+ "children": [child["name"] for child in children],
656
+ },
657
+ replay="auto",
658
+ checkpoint_invariant="all child run results were collected",
659
+ )
660
+
661
+
662
+ class RunState:
663
+ def __init__(self, client: Tidebase, run_id: str):
664
+ self._client = client
665
+ self._run_id = run_id
666
+
667
+ def _write(self, method: str, value: Any, **options: Any) -> Any:
668
+ body = {"value": value, **_state_options(options)}
669
+ return self._client.request(method, f"/runs/{self._run_id}/state", body)
670
+
671
+ def set(self, value: Any, **options: Any) -> Any:
672
+ return self._write("PUT", value, **options)
673
+
674
+ def patch(self, value: dict, **options: Any) -> Any:
675
+ return self._write("PATCH", value, **options)
676
+
677
+ def save(self, label: str, **options: Any) -> dict:
678
+ body = {"label": label, **_state_options(options)}
679
+ return self._client.request("POST", f"/runs/{self._run_id}/state/save", body)
680
+
681
+ def versions(self, stream: Optional[str] = None, labeled: Optional[bool] = None) -> list:
682
+ params = []
683
+ if stream:
684
+ params.append(f"stream={urlparse.quote(stream)}")
685
+ if labeled is not None:
686
+ params.append(f"labeled={'true' if labeled else 'false'}")
687
+ suffix = "?" + "&".join(params) if params else ""
688
+ return self._client.request("GET", f"/runs/{self._run_id}/state/versions{suffix}")[
689
+ "stateVersions"
690
+ ]
691
+
692
+
693
+ def _state_options(options: dict) -> dict:
694
+ mapping = {
695
+ "stream": "stream",
696
+ "label": "label",
697
+ "reason": "reason",
698
+ "importance": "importance",
699
+ "metadata": "metadata",
700
+ "created_by": "createdBy",
701
+ }
702
+ return {mapping[k]: v for k, v in options.items() if v is not None and k in mapping}
703
+
704
+
705
+ class RunUsage:
706
+ def __init__(self, client: Tidebase, run_id: str):
707
+ self._client = client
708
+ self._run_id = run_id
709
+
710
+ def record(
711
+ self,
712
+ *,
713
+ step_id: Optional[str] = None,
714
+ kind: Optional[str] = None,
715
+ provider: Optional[str] = None,
716
+ model: Optional[str] = None,
717
+ label: Optional[str] = None,
718
+ quantity: Optional[float] = None,
719
+ unit: Optional[str] = None,
720
+ input_tokens: Optional[int] = None,
721
+ output_tokens: Optional[int] = None,
722
+ total_tokens: Optional[int] = None,
723
+ cost_usd: Optional[float] = None,
724
+ metadata: Optional[dict] = None,
725
+ ) -> Any:
726
+ body = {
727
+ "stepId": step_id,
728
+ "kind": kind,
729
+ "provider": provider,
730
+ "model": model,
731
+ "label": label,
732
+ "quantity": quantity,
733
+ "unit": unit,
734
+ "inputTokens": input_tokens,
735
+ "outputTokens": output_tokens,
736
+ "totalTokens": total_tokens,
737
+ "costUsd": cost_usd,
738
+ "metadata": metadata,
739
+ }
740
+ body = {k: v for k, v in body.items() if v is not None}
741
+ return self._client.request("POST", f"/runs/{self._run_id}/usage", body)
742
+
743
+
744
+ class RunSnapshots:
745
+ def __init__(self, client: Tidebase, run_id: str):
746
+ self._client = client
747
+ self._run_id = run_id
748
+
749
+ def create(
750
+ self,
751
+ label: str,
752
+ state: Any,
753
+ *,
754
+ target: Optional[dict] = None,
755
+ reason: Optional[str] = None,
756
+ metadata: Optional[dict] = None,
757
+ created_by: Optional[str] = None,
758
+ ) -> dict:
759
+ body: dict = {"label": label, "state": state}
760
+ if target is not None:
761
+ body["target"] = target
762
+ if reason is not None:
763
+ body["reason"] = reason
764
+ if metadata is not None:
765
+ body["metadata"] = metadata
766
+ if created_by is not None:
767
+ body["createdBy"] = created_by
768
+ return self._client.request("POST", f"/runs/{self._run_id}/snapshots", body)
769
+
770
+ def list(self) -> list:
771
+ return self._client.request("GET", f"/runs/{self._run_id}/snapshots")["snapshots"]
tidebase/aio.py ADDED
@@ -0,0 +1,263 @@
1
+ """Async support for the Tidebase Python SDK.
2
+
3
+ `AsyncTidebase` mirrors `tidebase.Tidebase` for asyncio codebases: workflows
4
+ and steps may be `async def`, HTTP calls run off the event loop via
5
+ `asyncio.to_thread`, and cancellation (`tidebase.RunCancelled`) propagates
6
+ through awaits like any other exception — no cleanup branch can miss it.
7
+
8
+ from tidebase.aio import AsyncTidebase
9
+
10
+ tide = AsyncTidebase()
11
+
12
+ @tide.workflow("generate-report")
13
+ async def generate_report(run, input):
14
+ plan = await run.step("plan", lambda: make_plan(input)) # sync step
15
+ text = await run.step("draft", draft_async) # async step
16
+ decision = await run.gate("approve", "Send it?")
17
+ return await run.step("send", lambda: send(text))
18
+
19
+ await tide.run("generate-report", generate_report, input={...})
20
+ # or: await tide.work(["default"]) # async worker loop
21
+ """
22
+
23
+ from __future__ import annotations
24
+
25
+ import asyncio
26
+ import inspect
27
+ from typing import Any, Callable, Optional
28
+
29
+ from . import (
30
+ GateDecision,
31
+ RunCancelled,
32
+ RunContext,
33
+ Tidebase,
34
+ _classify_resume_decision,
35
+ _hash_stable,
36
+ _serialize_error,
37
+ )
38
+
39
+ __all__ = ["AsyncTidebase", "AsyncRunContext"]
40
+
41
+
42
+ async def _maybe_await(value: Any) -> Any:
43
+ if inspect.isawaitable(value):
44
+ return await value
45
+ return value
46
+
47
+
48
+ class AsyncRunContext:
49
+ """Async run context: same checkpoint protocol as RunContext, but the
50
+ user's step function may be `async def` and is awaited on the event loop
51
+ (only the HTTP calls run in a thread)."""
52
+
53
+ def __init__(self, inner: RunContext):
54
+ self._inner = inner
55
+ self._client = inner._client # noqa: SLF001 — same package
56
+ self.run_id = inner.run_id
57
+
58
+ async def _request(self, method: str, path: str, body: Any = None) -> Any:
59
+ return await asyncio.to_thread(self._client.request, method, path, body)
60
+
61
+ async def step(
62
+ self,
63
+ name: str,
64
+ fn: Callable[[], Any],
65
+ *,
66
+ input: Any = None,
67
+ input_hash: Optional[str] = None,
68
+ retries: int = 0,
69
+ side_effects: Optional[list] = None,
70
+ idempotency_key: Optional[str] = None,
71
+ replay: Optional[str] = None,
72
+ checkpoint_invariant: Any = None,
73
+ ) -> Any:
74
+ options: dict = {}
75
+ if input is not None:
76
+ options["input"] = input
77
+ if retries:
78
+ options["retries"] = retries
79
+ if side_effects is not None:
80
+ options["sideEffects"] = side_effects
81
+ if idempotency_key is not None:
82
+ options["idempotencyKey"] = idempotency_key
83
+ if replay is not None:
84
+ options["replay"] = replay
85
+ if checkpoint_invariant is not None:
86
+ options["checkpointInvariant"] = checkpoint_invariant
87
+
88
+ resolved_hash = input_hash or _hash_stable(input if input is not None else None)
89
+
90
+ async def begin() -> dict:
91
+ return await self._request(
92
+ "POST",
93
+ f"/runs/{self.run_id}/steps/begin",
94
+ {
95
+ "name": name,
96
+ "inputHash": resolved_hash,
97
+ "input": input,
98
+ "options": options,
99
+ "leaseOwner": self._inner._lease_owner, # noqa: SLF001
100
+ },
101
+ )
102
+
103
+ current = await begin()
104
+ if current["action"] == "return":
105
+ return current["output"]
106
+ if current["action"] == "cancelled":
107
+ raise RunCancelled(self.run_id)
108
+ if current["action"] in ("locked", "input_mismatch"):
109
+ raise RuntimeError(f"Step {name}: {current['action']}")
110
+
111
+ attempts = max(1, retries + 1)
112
+ for attempt in range(1, attempts + 1):
113
+ if attempt > 1:
114
+ current = await begin()
115
+ if current["action"] == "return":
116
+ return current["output"]
117
+ if current["action"] == "cancelled":
118
+ raise RunCancelled(self.run_id)
119
+ if current["action"] in ("locked", "input_mismatch"):
120
+ raise RuntimeError(f"Step {name}: {current['action']}")
121
+ try:
122
+ result = await _maybe_await(fn())
123
+ await self._request(
124
+ "POST",
125
+ f"/runs/{self.run_id}/steps/{current['step']['id']}/complete",
126
+ {"leaseOwner": current["leaseOwner"], "output": result},
127
+ )
128
+ return result
129
+ except BaseException as error:
130
+ retryable = attempt < attempts
131
+ try:
132
+ await self._request(
133
+ "POST",
134
+ f"/runs/{self.run_id}/steps/{current['step']['id']}/fail",
135
+ {
136
+ "leaseOwner": current["leaseOwner"],
137
+ "retryable": retryable,
138
+ "resumeDecision": "auto_retry"
139
+ if retryable
140
+ else _classify_resume_decision(options),
141
+ "error": _serialize_error(error),
142
+ },
143
+ )
144
+ except Exception:
145
+ pass
146
+ if not retryable:
147
+ raise
148
+ raise RuntimeError(f"Step {name} failed")
149
+
150
+ async def gate(self, name: str, prompt: str, **options: Any) -> GateDecision:
151
+ return await asyncio.to_thread(self._inner.gate, name, prompt, **options)
152
+
153
+ async def state_set(self, value: Any, **options: Any) -> Any:
154
+ return await asyncio.to_thread(self._inner.state.set, value, **options)
155
+
156
+ async def state_patch(self, value: dict, **options: Any) -> Any:
157
+ return await asyncio.to_thread(self._inner.state.patch, value, **options)
158
+
159
+ async def state_save(self, label: str, **options: Any) -> Any:
160
+ return await asyncio.to_thread(self._inner.state.save, label, **options)
161
+
162
+ async def usage_record(self, **options: Any) -> Any:
163
+ return await asyncio.to_thread(self._inner.usage.record, **options)
164
+
165
+
166
+ class AsyncTidebase:
167
+ def __init__(self, url: Optional[str] = None, api_key: Optional[str] = None):
168
+ self._sync = Tidebase(url=url, api_key=api_key)
169
+ self._workflows: dict = {}
170
+ self.runs = self._sync.runs
171
+ self.queues = self._sync.queues
172
+ self.schedules = self._sync.schedules
173
+
174
+ def workflow(self, name: str, fn: Optional[Callable] = None):
175
+ if fn is None:
176
+ def decorator(f):
177
+ self._workflows[name] = f
178
+ return f
179
+ return decorator
180
+ self._workflows[name] = fn
181
+ return fn
182
+
183
+ async def run(
184
+ self,
185
+ workflow_name: str,
186
+ workflow: Optional[Callable] = None,
187
+ *,
188
+ run_id: Optional[str] = None,
189
+ input: Any = None,
190
+ **create_options: Any,
191
+ ) -> Any:
192
+ workflow = workflow or self._workflows.get(workflow_name)
193
+ if workflow is None:
194
+ raise ValueError(f"no workflow registered for {workflow_name}")
195
+
196
+ if run_id is None:
197
+ run = await asyncio.to_thread(
198
+ self._sync.runs.create, workflow_name, input=input, **create_options
199
+ )
200
+ else:
201
+ detail = await asyncio.to_thread(self._sync.runs.get, run_id)
202
+ run = detail["run"]
203
+ if run["status"] == "completed":
204
+ return run["result"]
205
+
206
+ begin = await asyncio.to_thread(self._sync.request, "POST", f"/runs/{run['id']}/begin")
207
+ return await self._execute(run["id"], run["input"], begin["leaseOwner"], workflow)
208
+
209
+ async def _execute(self, run_id: str, input: Any, lease_owner: str, workflow: Callable) -> Any:
210
+ context = AsyncRunContext(RunContext(self._sync, run_id, lease_owner))
211
+ try:
212
+ result = await _maybe_await(workflow(context, input))
213
+ await asyncio.to_thread(
214
+ self._sync.request, "POST", f"/runs/{run_id}/complete", {"result": result}
215
+ )
216
+ return result
217
+ except RunCancelled:
218
+ raise
219
+ except BaseException as error:
220
+ try:
221
+ await asyncio.to_thread(
222
+ self._sync.request,
223
+ "POST",
224
+ f"/runs/{run_id}/fail",
225
+ {"error": _serialize_error(error)},
226
+ )
227
+ except Exception:
228
+ pass
229
+ raise
230
+
231
+ async def enqueue(self, workflow_name: str, **options: Any) -> dict:
232
+ return await asyncio.to_thread(self._sync.enqueue, workflow_name, **options)
233
+
234
+ async def cancel(self, run_id: str, reason: Optional[str] = None, actor: Optional[str] = None) -> dict:
235
+ return await asyncio.to_thread(self._sync.runs.cancel, run_id, reason, actor)
236
+
237
+ async def work(
238
+ self,
239
+ queues: Optional[list] = None,
240
+ *,
241
+ poll_s: float = 1.0,
242
+ limit: int = 1,
243
+ on_error: Optional[Callable[[BaseException, dict], None]] = None,
244
+ ) -> None:
245
+ """Async worker loop. Cancel the surrounding task to stop."""
246
+ queues = queues or ["default"]
247
+ while True:
248
+ claim = await asyncio.to_thread(
249
+ self._sync.request, "POST", "/queues/claim", {"queues": queues, "limit": limit}
250
+ )
251
+ for run in claim["runs"]:
252
+ workflow = self._workflows.get(run["workflowName"])
253
+ if workflow is None:
254
+ continue
255
+ try:
256
+ await self._execute(run["id"], run["input"], claim["leaseOwner"], workflow)
257
+ except asyncio.CancelledError:
258
+ raise
259
+ except BaseException as error:
260
+ if on_error:
261
+ on_error(error, run)
262
+ if not claim["runs"]:
263
+ await asyncio.sleep(poll_s)
@@ -0,0 +1,72 @@
1
+ Metadata-Version: 2.4
2
+ Name: tidebase
3
+ Version: 0.5.0
4
+ Summary: Python SDK for Tidebase — the open-source checkpoint layer for AI agents. Wrap your steps, and failed runs resume from the last safe point, in your own Postgres.
5
+ Project-URL: Homepage, https://tidebase.dev
6
+ Project-URL: Repository, https://github.com/BlueprintLabIO/tidebase
7
+ Project-URL: Documentation, https://tidebase.dev/docs/
8
+ Author: BlueprintLab
9
+ License-Expression: Apache-2.0
10
+ Keywords: ai-agents,checkpoint,durable,human-in-the-loop,postgres,resume,workflow
11
+ Classifier: Development Status :: 3 - Alpha
12
+ Classifier: Intended Audience :: Developers
13
+ Classifier: Programming Language :: Python :: 3
14
+ Classifier: Topic :: Software Development :: Libraries
15
+ Requires-Python: >=3.9
16
+ Description-Content-Type: text/markdown
17
+
18
+ # tidebase (Python SDK)
19
+
20
+ Python SDK for [Tidebase](https://tidebase.dev) — the open-source checkpoint layer for AI agents: wrap your steps, and failed runs resume from the last safe point — in your own Postgres, without moving execution into a new runtime.
21
+
22
+ Zero dependencies (stdlib only), Python 3.9+.
23
+
24
+ ```python
25
+ from tidebase import Tidebase
26
+
27
+ tide = Tidebase() # reads TIDEBASE_URL (default http://localhost:7373) and TIDEBASE_API_KEY
28
+
29
+ def workflow(run, input):
30
+ plan = run.step("plan", lambda: make_plan(input))
31
+ sources = run.step("fetch-sources", lambda: fetch_sources(plan))
32
+
33
+ run.state.set({"status": "writing", "progress": 0.7})
34
+
35
+ decision = run.gate("approve-report", "Send the report to the customer?")
36
+ if not decision.approved:
37
+ raise RuntimeError("not approved")
38
+
39
+ return run.step("write-report", lambda: write_report(sources))
40
+
41
+ tide.run("generate-report", workflow, run_id=run_id)
42
+ ```
43
+
44
+ Re-invoke with the same `run_id` after a crash: completed steps return from their checkpoints instantly; only unfinished steps execute.
45
+
46
+ ## Surface
47
+
48
+ | Call | Does |
49
+ |---|---|
50
+ | `tide.run(name, workflow, run_id=…, input=…)` | Create or resume a run |
51
+ | `run.step(name, fn, side_effects=…, idempotency_key=…, retries=…)` | Checkpoint a unit of work; replays from storage on resume |
52
+ | `run.state.set / patch / save / versions` | Live state + versioned history (snapshot = labeled version) |
53
+ | `run.gate(name, prompt)` | Durable human approval; resolves exactly once |
54
+ | `run.child(...)` / `run.fanout(name, children)` | Subagents as child runs, idempotent by edge name, durable join |
55
+ | `run.usage.record(kind=…, input_tokens=…, cost_usd=…)` | Per-run token/cost ledger, no LLM proxy |
56
+ | `tide.runs.create / get / list / recover / subscribe` | Run API + SSE event stream |
57
+ | `tidebase.verify_webhook_signature(body, header, secret)` | Verify signed recovery/channel webhooks |
58
+
59
+ External writes should declare `side_effects` and an `idempotency_key`; otherwise a final failure is classified `manual_review` instead of silently retrying — that's the [replay contract](https://tidebase.dev/docs/replay-contract-is-it-safe-to-rerun/).
60
+
61
+ ## Tests
62
+
63
+ Integration tests assert the durability invariants against a real server:
64
+
65
+ ```bash
66
+ docker compose up -d postgres && pnpm server # in the repo root
67
+ python3 -m unittest discover sdk-python/tests -v
68
+ ```
69
+
70
+ ## Status
71
+
72
+ Alpha, like the rest of Tidebase. The step input hash matches the TypeScript SDK for common JSON types, so both SDKs can drive the same run (caveat: floats like `1.0` hash differently between the two — avoid mixing SDKs on steps whose input contains them).
@@ -0,0 +1,5 @@
1
+ tidebase/__init__.py,sha256=QxmRjJGZlXgpZL_06cLYIBsT_P1IM5c7YZH684tzPKk,28678
2
+ tidebase/aio.py,sha256=AnS5U-aunHYNOMqVyEaZsKtUjvnJKA-a9MkmqGdiDfM,9902
3
+ tidebase-0.5.0.dist-info/METADATA,sha256=0G9XlrCyzqXiR7N-JGtuDFuvEPWPE1Sb9nohoyxls_k,3443
4
+ tidebase-0.5.0.dist-info/WHEEL,sha256=mffPy8wBnZQn2VnJUU5jE99KsxaSfiyMHV9Yt0aLVxs,87
5
+ tidebase-0.5.0.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: hatchling 1.30.1
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any