toro-queue 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
toro/worker.py ADDED
@@ -0,0 +1,525 @@
1
+ """Worker: the consumer side. Pulls jobs and runs a processor over them.
2
+
3
+ Reliability model (this is the core — see DESIGN.md):
4
+ * A blocking BLMOVE wakes the worker and moves a job id from `wait` to
5
+ `active`. `MOVE_TO_ACTIVE` then locks + loads it.
6
+ * Job acquisition (lock + load) funnels through ONE Lua routine, shared by the
7
+ blocking path and by fetch-next. That routine is the seed of a future
8
+ `moveToActive`: to add priorities/markers we change only which job it picks.
9
+ * Fetch-next: the finish scripts commit the current job AND acquire the next
10
+ one in the same round trip, so a busy worker loops without going back to the
11
+ blocking pop. It only re-blocks when the queue drains.
12
+ * On pickup the worker locks the job (`<id>:lock = <token> PX lockDuration`)
13
+ and a renewer extends it. If a worker dies, its lock expires and a background
14
+ mark-and-sweep recovers the job. Token-guarded finishes guarantee a result
15
+ is committed exactly once even though a handler may run more than once.
16
+ """
17
+
18
+ from __future__ import annotations
19
+
20
+ import asyncio
21
+ import contextlib
22
+ import json
23
+ import os
24
+ import socket
25
+ import time
26
+ import traceback
27
+ import uuid
28
+ from collections.abc import Awaitable, Callable
29
+ from typing import Any, cast
30
+
31
+ from redis.asyncio import Redis
32
+
33
+ from . import scripts
34
+ from .connection import connect
35
+ from .job import Job, JobContext, JobOptions
36
+ from .keys import Keys
37
+ from .scheduler import next_run
38
+
39
+ Processor = Callable[[Job], Awaitable[Any]]
40
+
41
+
42
+ def _now_ms() -> int:
43
+ return int(time.time() * 1000)
44
+
45
+
46
+ def _pairs(flat: list | None) -> dict:
47
+ """Turn a flat HGETALL array [k, v, k, v, ...] into a dict."""
48
+ if not flat:
49
+ return {}
50
+ it = iter(flat)
51
+ return dict(zip(it, it, strict=False))
52
+
53
+
54
+ def compute_backoff(backoff: Any, attempts_made: int) -> int:
55
+ """Delay (ms) before the next attempt. `backoff` is None/0, an int (fixed ms),
56
+ or {"type": "fixed"|"exponential", "delay": ms}. Exponential doubles per attempt.
57
+ Pure function so it can be unit-tested without a Redis-bound Worker.
58
+ """
59
+ if not backoff:
60
+ return 0
61
+ if isinstance(backoff, (int, float)):
62
+ return int(backoff)
63
+ delay = backoff.get("delay", 0)
64
+ if backoff.get("type") == "exponential":
65
+ return int(delay * (2 ** (attempts_made - 1)))
66
+ return int(delay)
67
+
68
+
69
+ class Worker:
70
+ """The consumer side: claims jobs, runs the processor, and recovers stalls."""
71
+
72
+ def __init__(
73
+ self,
74
+ name: str,
75
+ processor: Processor,
76
+ *,
77
+ connection: Redis | None = None,
78
+ url: str = "redis://localhost:6379",
79
+ prefix: str = "toro",
80
+ concurrency: int = 1,
81
+ rate_limit: dict | None = None,
82
+ block_timeout: float = 5.0,
83
+ lock_duration: int = 30000,
84
+ lock_renew_time: int | None = None,
85
+ renew_locks: bool = True,
86
+ stalled_interval: int = 30000,
87
+ max_stalled_count: int = 1,
88
+ grace_period: float = 30.0,
89
+ heartbeat_interval: int = 5000,
90
+ ) -> None:
91
+ self.name = name
92
+ self.processor = processor
93
+ self.keys = Keys(name, prefix)
94
+ self.redis = connection or connect(url)
95
+ self.concurrency = concurrency
96
+ # Queue-wide rate limit, shared by all workers via one token bucket in Redis.
97
+ # `{"max": N, "duration": ms}` = at most N jobs per duration. All workers on a
98
+ # queue should pass the SAME config so the shared bucket behaves consistently.
99
+ if rate_limit is not None and (
100
+ int(rate_limit.get("max", 0)) <= 0 or int(rate_limit.get("duration", 0)) <= 0
101
+ ):
102
+ raise ValueError("rate_limit needs {'max': positive, 'duration': positive ms}")
103
+ self.rl_max = int(rate_limit["max"]) if rate_limit else 0
104
+ self.rl_duration = int(rate_limit["duration"]) if rate_limit else 0
105
+ self.block_timeout = block_timeout
106
+
107
+ # Reliability knobs.
108
+ self.token = uuid.uuid4().hex
109
+ self.lock_duration = lock_duration
110
+ self.lock_renew_time = lock_renew_time or lock_duration // 2
111
+ self.renew_locks = renew_locks
112
+ self.stalled_interval = stalled_interval
113
+ self.max_stalled_count = max_stalled_count
114
+ self.grace_period = grace_period
115
+ self.heartbeat_interval = heartbeat_interval
116
+
117
+ self._running = False
118
+ self._tasks: list[asyncio.Task] = []
119
+ self._process_tasks: list[asyncio.Task] = []
120
+
121
+ # Presence + throughput for the "workers" view; flushed to Redis each heartbeat.
122
+ self.started_at = 0
123
+ self._processed = 0
124
+ self._failed = 0
125
+ self._current: set[str] = set()
126
+ # "running" until a graceful stop flips it to "stopping" — the dashboard shows
127
+ # a live "draining" state, and a worker that then vanishes was mid-shutdown,
128
+ # not a crash. (The only honest way to know graceful; absence can't say why.)
129
+ self._state = "running"
130
+
131
+ self._move_to_active = self.redis.register_script(scripts.MOVE_TO_ACTIVE)
132
+ self._extend_lock = self.redis.register_script(scripts.EXTEND_LOCK)
133
+ self._move_to_completed = self.redis.register_script(scripts.MOVE_TO_COMPLETED)
134
+ self._move_to_failed = self.redis.register_script(scripts.MOVE_TO_FAILED)
135
+ self._move_stalled = self.redis.register_script(scripts.MOVE_STALLED)
136
+ self._promote_delayed = self.redis.register_script(scripts.PROMOTE_DELAYED)
137
+ self._add_scheduled = self.redis.register_script(scripts.ADD_SCHEDULED)
138
+
139
+ # Simple event callbacks: worker.on("completed", fn)
140
+ self._handlers: dict[str, list[Callable]] = {}
141
+
142
+ def on(self, event: str, fn: Callable) -> None:
143
+ self._handlers.setdefault(event, []).append(fn)
144
+
145
+ def _emit(self, event: str, *args: Any) -> None:
146
+ for fn in self._handlers.get(event, []):
147
+ fn(*args)
148
+
149
+ async def run(self) -> None:
150
+ """Start processing until stop() is called. Awaitable forever."""
151
+ self._running = True
152
+ self.started_at = _now_ms()
153
+ await self._write_heartbeat() # register at once so the worker shows up immediately
154
+ self._process_tasks = [
155
+ asyncio.create_task(self._process_loop()) for _ in range(self.concurrency)
156
+ ]
157
+ bg = [asyncio.create_task(self._promote_loop())]
158
+ if self.stalled_interval > 0:
159
+ bg.append(asyncio.create_task(self._stalled_loop()))
160
+ if self.heartbeat_interval > 0:
161
+ bg.append(asyncio.create_task(self._heartbeat_loop()))
162
+ self._tasks = [*self._process_tasks, *bg]
163
+ with contextlib.suppress(asyncio.CancelledError):
164
+ await asyncio.gather(*self._tasks)
165
+
166
+ async def stop(self, grace_period: float | None = None) -> None:
167
+ """Graceful shutdown: stop fetching new jobs, let in-flight jobs finish
168
+ (up to `grace_period` seconds), then cancel the rest and disconnect.
169
+ """
170
+ grace = self.grace_period if grace_period is None else grace_period
171
+ self._running = False
172
+ # Flip to "stopping" (shown as "draining" in the dashboard) and flush it now, so
173
+ # this worker reads as shutting down in real time (a later vanish = graceful, not crash).
174
+ self._state = "stopping"
175
+ with contextlib.suppress(Exception):
176
+ await self._write_heartbeat()
177
+ # Wake an idle worker parked on BZPOPMIN so it notices the shutdown.
178
+ with contextlib.suppress(Exception): # pragma: no cover
179
+ await self.redis.zadd(self.keys.marker, {"0": 0})
180
+ # Let process loops drain their current job and exit on their own.
181
+ if self._process_tasks:
182
+ await asyncio.wait(self._process_tasks, timeout=grace)
183
+ # Force-cancel anything left (jobs past the grace period + background loops).
184
+ for t in self._tasks:
185
+ t.cancel()
186
+ await asyncio.gather(*self._tasks, return_exceptions=True)
187
+ with contextlib.suppress(Exception):
188
+ await self._deregister() # drop our presence record so we vanish at once
189
+ await self.redis.aclose()
190
+
191
+ # ---- presence / heartbeat ---------------------------------------------
192
+
193
+ async def _heartbeat_loop(self) -> None:
194
+ while self._running:
195
+ await asyncio.sleep(self.heartbeat_interval / 1000)
196
+ with contextlib.suppress(Exception):
197
+ await self._write_heartbeat()
198
+
199
+ async def _write_heartbeat(self) -> None:
200
+ """Flush this worker's presence record and register it as live."""
201
+ now = _now_ms()
202
+ await self.redis.hset(
203
+ self.keys.worker(self.token),
204
+ mapping={
205
+ "id": self.token,
206
+ "host": socket.gethostname(),
207
+ "pid": os.getpid(),
208
+ "queue": self.name,
209
+ "concurrency": self.concurrency,
210
+ "started": self.started_at,
211
+ "heartbeat": now,
212
+ "processed": self._processed,
213
+ "failed": self._failed,
214
+ "current": json.dumps(sorted(self._current)),
215
+ "state": self._state,
216
+ },
217
+ )
218
+ await self.redis.zadd(self.keys.workers, {self.token: now})
219
+
220
+ async def _deregister(self) -> None:
221
+ await self._record_departure("stopped") # graceful shutdown
222
+ await self.redis.zrem(self.keys.workers, self.token)
223
+ await self.redis.delete(self.keys.worker(self.token))
224
+
225
+ async def _record_departure(self, reason: str) -> None:
226
+ """Append to the capped death-log so the dashboard can show what left and why."""
227
+ now = _now_ms()
228
+ rec = json.dumps(
229
+ {
230
+ "id": self.token,
231
+ "host": socket.gethostname(),
232
+ "pid": os.getpid(),
233
+ "queue": self.name,
234
+ "concurrency": self.concurrency,
235
+ "processed": self._processed,
236
+ "failed": self._failed,
237
+ "started": self.started_at,
238
+ "last_seen": now,
239
+ "current": sorted(self._current), # what it was running at the end
240
+ "reason": reason,
241
+ "at": now,
242
+ }
243
+ )
244
+ await self.redis.lpush(self.keys.departed, rec)
245
+ await self.redis.ltrim(self.keys.departed, 0, 49)
246
+
247
+ # ---- the hot path -----------------------------------------------------
248
+
249
+ async def _process_loop(self) -> None:
250
+ while self._running:
251
+ try:
252
+ # The marker only wakes us; the real claim is the atomic
253
+ # MOVE_TO_ACTIVE below. A timeout (None) is fine — we still try
254
+ # to acquire, so a missed marker can never strand a job.
255
+ await self.redis.bzpopmin(self.keys.marker, self.block_timeout)
256
+ except asyncio.CancelledError:
257
+ raise
258
+ except Exception:
259
+ await asyncio.sleep(0.1)
260
+ continue
261
+ if not self._running:
262
+ break # shutting down — don't claim a new job
263
+ loaded = await self._acquire()
264
+ # Keep processing as long as each finish hands us the next job.
265
+ while loaded is not None and self._running:
266
+ loaded = await self._handle(loaded)
267
+
268
+ async def _acquire(self) -> tuple[str, dict] | None:
269
+ """Pop the highest-priority job into `active`, lock + load it."""
270
+ res = await self._move_to_active(
271
+ keys=[
272
+ self.keys.prioritized,
273
+ self.keys.active,
274
+ self.keys.marker,
275
+ self.keys.stalled,
276
+ self.keys.base,
277
+ self.keys.pc,
278
+ self.keys.meta_paused,
279
+ self.keys.limiter,
280
+ ],
281
+ args=[self.token, self.lock_duration, _now_ms(), self.rl_max, self.rl_duration],
282
+ )
283
+ if res and res[0] == scripts.RL_SENTINEL:
284
+ await self._on_rate_limited(int(res[1]))
285
+ return None
286
+ return self._loaded(res)
287
+
288
+ async def _on_rate_limited(self, retry_ms: int) -> None:
289
+ """Rate limited: wait until a token frees up (the job stays queued, no
290
+ attempt consumed), then re-arm the marker so we re-check immediately.
291
+ Capped at block_timeout so shutdown stays responsive on long waits.
292
+ """
293
+ self._emit("rate-limited", retry_ms)
294
+ await asyncio.sleep(min(retry_ms, self.block_timeout * 1000) / 1000)
295
+ if self._running:
296
+ with contextlib.suppress(Exception): # pragma: no cover
297
+ await self.redis.zadd(self.keys.marker, {"0": 0})
298
+
299
+ def _loaded(self, res: list | None) -> tuple[str, dict] | None:
300
+ if not res:
301
+ return None
302
+ fields = _pairs(res[0])
303
+ if not fields:
304
+ return None
305
+ return (res[1], fields)
306
+
307
+ async def _handle(self, loaded: tuple[str, dict]) -> tuple[str, dict] | None:
308
+ job_id, fields = loaded
309
+ job = Job.from_hash(job_id, fields)
310
+ # Give the handler the ability to report progress and append logs.
311
+ job._ctx = JobContext( # noqa: SLF001 — the worker injects the job's runtime context
312
+ redis=self.redis,
313
+ job_key=self.keys.job(job_id),
314
+ events_key=self.keys.events,
315
+ logs_key=self.keys.logs(job_id),
316
+ job_id=job_id,
317
+ )
318
+ # A scheduler job mints its successor on first pickup, so the schedule
319
+ # stays on time regardless of how long (or whether) this run succeeds.
320
+ if fields.get("schedulerId") and job.attempts_made == 1:
321
+ await self._schedule_next(fields["schedulerId"])
322
+ renewer = asyncio.create_task(self._renew_loop(job_id)) if self.renew_locks else None
323
+ self._current.add(job_id) # so the heartbeat reports what we're running
324
+ try:
325
+ result = await self.processor(job)
326
+ except Exception as exc:
327
+ await self.redis.hset(self.keys.job(job_id), "stacktrace", traceback.format_exc())
328
+ self._failed += 1
329
+ nxt = await self._finish_failed(job, exc)
330
+ else:
331
+ self._processed += 1
332
+ nxt = await self._finish_completed(job, result)
333
+ finally:
334
+ self._current.discard(job_id)
335
+ if renewer is not None:
336
+ renewer.cancel()
337
+ return nxt
338
+
339
+ async def _finish_completed(self, job: Job, result: Any) -> tuple[str, dict] | None:
340
+ res = await self._move_to_completed(
341
+ keys=[
342
+ self.keys.active,
343
+ self.keys.completed,
344
+ self.keys.job(job.id),
345
+ self.keys.lock(job.id),
346
+ self.keys.prioritized,
347
+ self.keys.marker,
348
+ self.keys.stalled,
349
+ self.keys.base,
350
+ self.keys.pc,
351
+ self.keys.events,
352
+ self.keys.meta_paused,
353
+ self.keys.limiter,
354
+ ],
355
+ args=[
356
+ job.id,
357
+ json.dumps(result),
358
+ _now_ms(),
359
+ self.token,
360
+ self._fetch_flag(),
361
+ self.lock_duration,
362
+ *JobOptions.keep_args(job.opts.remove_on_complete),
363
+ self.rl_max,
364
+ self.rl_duration,
365
+ ],
366
+ )
367
+ if res in (scripts.LOCK_LOST, scripts.NOT_ACTIVE): # finish script's int sentinel
368
+ self._emit("lock-lost", job.id)
369
+ return None
370
+ job.returnvalue = result
371
+ self._emit("completed", job, result)
372
+ return self._next_from(res)
373
+
374
+ async def _finish_failed(self, job: Job, exc: Exception) -> tuple[str, dict] | None:
375
+ res = await self._move_to_failed(
376
+ keys=[
377
+ self.keys.active,
378
+ self.keys.prioritized,
379
+ self.keys.delayed,
380
+ self.keys.failed,
381
+ self.keys.job(job.id),
382
+ self.keys.lock(job.id),
383
+ self.keys.marker,
384
+ self.keys.stalled,
385
+ self.keys.base,
386
+ self.keys.pc,
387
+ self.keys.events,
388
+ self.keys.meta_paused,
389
+ self.keys.limiter,
390
+ ],
391
+ args=[
392
+ job.id,
393
+ str(exc),
394
+ _now_ms(),
395
+ job.attempts_made,
396
+ job.opts.attempts,
397
+ self._backoff_delay(job),
398
+ self.token,
399
+ self._fetch_flag(),
400
+ self.lock_duration,
401
+ *JobOptions.keep_args(job.opts.remove_on_fail),
402
+ self.rl_max,
403
+ self.rl_duration,
404
+ ],
405
+ )
406
+ if res in (scripts.LOCK_LOST, scripts.NOT_ACTIVE): # finish script's int sentinel
407
+ self._emit("lock-lost", job.id)
408
+ return None
409
+ job.failed_reason = str(exc)
410
+ self._emit("failed" if res[0] == scripts.OUTCOME_FAILED else "retrying", job, exc)
411
+ return self._next_from(res)
412
+
413
+ async def _schedule_next(self, scheduler_id: str) -> None:
414
+ """Enqueue the next occurrence of a scheduler (idempotent, stops if removed)."""
415
+ template = await self.redis.hgetall(self.keys.scheduler(scheduler_id))
416
+ if not template or await self.redis.zscore(self.keys.repeat, scheduler_id) is None:
417
+ return # scheduler was removed — stop the chain
418
+ every = int(template["every"]) if template.get("every") else None
419
+ cron = cast("str | None", template.get("cron") or None)
420
+ now = _now_ms()
421
+ when = next_run(now, every=every, cron=cron)
422
+ await self.redis.zadd(self.keys.repeat, {scheduler_id: when})
423
+ opts = json.loads(template["opts"])
424
+ await self._add_scheduled(
425
+ keys=[self.keys.delayed, self.keys.base],
426
+ args=[
427
+ f"repeat:{scheduler_id}:{when}",
428
+ template["name"],
429
+ template["data"],
430
+ template["opts"],
431
+ now,
432
+ when,
433
+ opts.get("priority", 0),
434
+ scheduler_id,
435
+ ],
436
+ )
437
+
438
+ def _fetch_flag(self) -> str:
439
+ # Don't fetch a next job while shutting down — let the queue drain cleanly.
440
+ return "1" if self._running else "0"
441
+
442
+ def _next_from(self, res: Any) -> tuple[str, dict] | None:
443
+ if isinstance(res, (list, tuple)) and len(res) >= 3:
444
+ return (res[2], _pairs(res[1]))
445
+ return None
446
+
447
+ # ---- locks & recovery -------------------------------------------------
448
+
449
+ async def _renew_loop(self, job_id: str) -> None:
450
+ interval = self.lock_renew_time / 1000
451
+ while True:
452
+ await asyncio.sleep(interval)
453
+ try:
454
+ ok = await self._extend_lock(
455
+ keys=[self.keys.lock(job_id), self.keys.stalled],
456
+ args=[self.token, self.lock_duration, job_id],
457
+ )
458
+ except asyncio.CancelledError:
459
+ raise
460
+ except Exception: # pragma: no cover
461
+ ok = 0
462
+ if not ok:
463
+ self._emit("lock-lost", job_id)
464
+ return
465
+
466
+ async def _promote_loop(self) -> None:
467
+ while self._running:
468
+ try:
469
+ await self._promote_delayed(
470
+ keys=[
471
+ self.keys.delayed,
472
+ self.keys.prioritized,
473
+ self.keys.marker,
474
+ self.keys.base,
475
+ self.keys.pc,
476
+ ],
477
+ args=[_now_ms()],
478
+ )
479
+ except asyncio.CancelledError:
480
+ raise
481
+ except Exception: # pragma: no cover - best-effort background sweep
482
+ pass
483
+ await asyncio.sleep(1.0)
484
+
485
+ async def _stalled_loop(self) -> None:
486
+ while self._running:
487
+ await asyncio.sleep(self.stalled_interval / 1000)
488
+ try:
489
+ failed, recovered = await self.check_stalled()
490
+ except asyncio.CancelledError:
491
+ raise
492
+ except Exception: # pragma: no cover - best-effort background sweep
493
+ continue
494
+ for job_id in recovered:
495
+ self._emit("stalled", job_id)
496
+ for job_id in failed:
497
+ self._emit("failed", job_id, RuntimeError("job stalled too many times"))
498
+
499
+ async def check_stalled(self, throttle_ms: int | None = None) -> tuple[list[str], list[str]]:
500
+ """Run one mark-and-sweep pass. Returns (failed_ids, recovered_ids).
501
+
502
+ `throttle_ms=0` bypasses the cross-worker throttle (used by tests); by
503
+ default the throttle is `stalled_interval` so concurrent workers don't
504
+ all sweep at once.
505
+ """
506
+ throttle = self.stalled_interval if throttle_ms is None else throttle_ms
507
+ res = await self._move_stalled(
508
+ keys=[
509
+ self.keys.stalled,
510
+ self.keys.active,
511
+ self.keys.prioritized,
512
+ self.keys.failed,
513
+ self.keys.stalled_check,
514
+ self.keys.base,
515
+ self.keys.marker,
516
+ self.keys.pc,
517
+ ],
518
+ args=[self.max_stalled_count, _now_ms(), throttle],
519
+ )
520
+ failed = list(res[0]) if res else []
521
+ recovered = list(res[1]) if res and len(res) > 1 else []
522
+ return failed, recovered
523
+
524
+ def _backoff_delay(self, job: Job) -> int:
525
+ return compute_backoff(job.opts.backoff, job.attempts_made)
@@ -0,0 +1,127 @@
1
+ Metadata-Version: 2.4
2
+ Name: toro-queue
3
+ Version: 0.1.0
4
+ Summary: An async-first, Redis-backed job queue for Python.
5
+ Project-URL: Homepage, https://github.com/ilovepixelart/toro
6
+ Project-URL: Repository, https://github.com/ilovepixelart/toro
7
+ Project-URL: Issues, https://github.com/ilovepixelart/toro/issues
8
+ Author: ilovepixelart
9
+ License-Expression: MIT
10
+ License-File: LICENSE
11
+ Keywords: async,asyncio,jobs,queue,redis,scheduler,task-queue,worker
12
+ Classifier: Development Status :: 3 - Alpha
13
+ Classifier: Framework :: AsyncIO
14
+ Classifier: Intended Audience :: Developers
15
+ Classifier: Programming Language :: Python :: 3 :: Only
16
+ Classifier: Programming Language :: Python :: 3.10
17
+ Classifier: Programming Language :: Python :: 3.11
18
+ Classifier: Programming Language :: Python :: 3.12
19
+ Classifier: Programming Language :: Python :: 3.13
20
+ Classifier: Topic :: Software Development :: Libraries
21
+ Classifier: Topic :: System :: Distributed Computing
22
+ Classifier: Typing :: Typed
23
+ Requires-Python: >=3.10
24
+ Requires-Dist: croniter>=2.0
25
+ Requires-Dist: redis>=5.0
26
+ Description-Content-Type: text/markdown
27
+
28
+ # toro 🐂
29
+
30
+ An **async-first**, Redis-backed job queue for Python. Every state transition is
31
+ an atomic Lua script; producing and processing are `asyncio` end to end.
32
+
33
+ ```bash
34
+ pip install toro-queue # the import name is `toro`
35
+ ```
36
+
37
+ > Installed as **`toro-queue`** on PyPI (the name `toro` was taken), but you
38
+ > `import toro`. See [DESIGN.md](https://github.com/ilovepixelart/toro/blob/main/DESIGN.md) for the architecture and the
39
+ > at-least-once reliability model.
40
+
41
+ ## Why toro
42
+
43
+ - **Async-native.** Enqueue and process with `async`/`await` — no thread pools,
44
+ no sync bridge. A natural fit for FastAPI, aiohttp, or any asyncio app.
45
+ - **Atomic by construction.** Claims, retries, promotions and finishes are Lua
46
+ scripts, so a job can't be lost or double-committed between two round trips.
47
+ - **At-least-once delivery.** Per-job locks + a background mark-and-sweep recover
48
+ jobs from workers that crashed — without the visibility-timeout double-delivery
49
+ trap of some other queues.
50
+ - **Typed.** Ships `py.typed`; the public API is fully annotated.
51
+
52
+ ## Features
53
+
54
+ | | |
55
+ |---|---|
56
+ | **Enqueue** | delayed jobs, global **priorities** (FIFO within a band) |
57
+ | **Retries** | fixed or exponential **backoff**, capped attempts |
58
+ | **Schedules** | repeatable **cron** and fixed-interval (`every`) jobs |
59
+ | **Rate limiting** | queue-wide token bucket shared across all workers |
60
+ | **Dedup** | custom (idempotent) job ids + a throttle window (`{id, ttl}`) |
61
+ | **Auto-removal** | keep the last N and/or finished-within-age completed/failed |
62
+ | **Reliability** | per-job locks, lock renewal, stalled-job recovery |
63
+ | **Observability** | progress, per-job logs, lifecycle events, `await result()` |
64
+ | **Lifecycle** | pause / resume, graceful shutdown that drains in-flight jobs |
65
+ | **Dashboard** | [matador](https://github.com/ilovepixelart/matador) — a live web UI |
66
+
67
+ ## Quick start
68
+
69
+ ```python
70
+ import asyncio
71
+ from toro import Queue, Worker
72
+
73
+ async def main():
74
+ queue = Queue("emails")
75
+ await queue.add("welcome", {"to": "ada@example.com"})
76
+
77
+ async def process(job):
78
+ print("sending", job.data)
79
+ return {"ok": True}
80
+
81
+ worker = Worker("emails", process, concurrency=8)
82
+ worker.on("completed", lambda job, result: print("done", job.id))
83
+ await worker.run()
84
+
85
+ asyncio.run(main())
86
+ ```
87
+
88
+ ## A taste of the options
89
+
90
+ ```python
91
+ # Priorities, delay, and retry-with-backoff
92
+ await queue.add("report", data, priority=10, delay=5000,
93
+ attempts=5, backoff={"type": "exponential", "delay": 1000})
94
+
95
+ # Idempotent custom id (a second add with the same id is ignored)
96
+ await queue.add("charge", data, job_id="order-1234")
97
+
98
+ # A repeatable schedule (cron or every-N-ms); "run now" with trigger_scheduler
99
+ await queue.add_scheduler("nightly-rollup", cron="0 0 * * *")
100
+
101
+ # Queue-wide rate limit: at most 100 jobs / second across every worker
102
+ worker = Worker("emails", process, rate_limit={"max": 100, "duration": 1000})
103
+
104
+ # Wait for a result from the producer side
105
+ job = await queue.add("resize", {"src": "a.png"})
106
+ print(await job.result(timeout=30))
107
+ ```
108
+
109
+ ## Develop
110
+
111
+ Managed with [uv](https://astral.sh/uv); the Astral toolchain throughout.
112
+
113
+ ```bash
114
+ uv sync # venv + deps + dev group
115
+ uv run ruff check . # lint (strict: select = ALL)
116
+ uv run ruff format . # format
117
+ uv run ty check # type check
118
+ uv run pytest -m "unit or integration" # tests (integration needs Redis on :6379)
119
+ uv run python examples/basic.py
120
+ ```
121
+
122
+ The suite is a pyramid — `-m unit` (fast, no Redis), `-m integration` (Redis),
123
+ and `-m load` (the open-loop benchmark harness in `tests/load/`).
124
+
125
+ ## License
126
+
127
+ [MIT](https://github.com/ilovepixelart/toro/blob/main/LICENSE)
@@ -0,0 +1,14 @@
1
+ toro/__init__.py,sha256=oR4z6jIOh3ZyIZg1tjV_RR1RWFucXiQJVnAv_TDci1Y,325
2
+ toro/connection.py,sha256=zpVYmcTd73Im8b518PoCNSDhkcVSpsWYwW5R8WZuD4o,1251
3
+ toro/errors.py,sha256=B1wAPzQtnIjz9ihHbhsmVur7zUcvB_lSnL2VnsMmCcs,370
4
+ toro/job.py,sha256=M1f6MqKcV1rnNxlRs_pbX1B9hBK_vLNNApu0NSXBqqk,6126
5
+ toro/keys.py,sha256=VhtYGdS6P1Q-1xbigptD7m_EgdYG6NfPFbagCdv_2SA,3333
6
+ toro/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
7
+ toro/queue.py,sha256=2UZsaPc4gxKlER_RP7cOJ15nnDNolKO07EXEvnmSJgE,23117
8
+ toro/scheduler.py,sha256=CQTNgQbd4SDhRL2seN-jpt8cpl3gGTrLX2k4h6-Wgc4,1619
9
+ toro/scripts.py,sha256=N3tIIg7GVyQ_WrpuVZpMGMYP_QDyyISZtSt-F6VMqRI,18473
10
+ toro/worker.py,sha256=khv7uFe3KmaFzpXfuf6aid3IaN1wyisKfiAQslTxOSs,21515
11
+ toro_queue-0.1.0.dist-info/METADATA,sha256=09M4Bg1W2ZYrPw54zw5rqDKcMS5eFrtZ752dPoPEm6k,4800
12
+ toro_queue-0.1.0.dist-info/WHEEL,sha256=mffPy8wBnZQn2VnJUU5jE99KsxaSfiyMHV9Yt0aLVxs,87
13
+ toro_queue-0.1.0.dist-info/licenses/LICENSE,sha256=CM5Sh4ZaqlQbqgsjTUvl-A7O0YipFw4V5aZt1K6sLC4,1070
14
+ toro_queue-0.1.0.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: hatchling 1.30.1
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any