taskiq-redis 1.0.3__py3-none-any.whl → 1.0.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
taskiq_redis/__init__.py CHANGED
@@ -1,5 +1,6 @@
1
1
  """Package for redis integration."""
2
2
 
3
+ from taskiq_redis.list_schedule_source import ListRedisScheduleSource
3
4
  from taskiq_redis.redis_backend import (
4
5
  RedisAsyncClusterResultBackend,
5
6
  RedisAsyncResultBackend,
@@ -25,6 +26,7 @@ __all__ = [
25
26
  "ListQueueBroker",
26
27
  "ListQueueClusterBroker",
27
28
  "ListQueueSentinelBroker",
29
+ "ListRedisScheduleSource",
28
30
  "PubSubBroker",
29
31
  "PubSubSentinelBroker",
30
32
  "RedisAsyncClusterResultBackend",
@@ -0,0 +1,229 @@
1
+ import datetime
2
+ from logging import getLogger
3
+ from typing import Any, List, Optional
4
+
5
+ from redis.asyncio import BlockingConnectionPool, Redis
6
+ from taskiq import ScheduledTask, ScheduleSource
7
+ from taskiq.abc.serializer import TaskiqSerializer
8
+ from taskiq.compat import model_dump, model_validate
9
+ from taskiq.serializers import PickleSerializer
10
+ from typing_extensions import Self
11
+
12
+ logger = getLogger("taskiq.redis_schedule_source")
13
+
14
+
15
+ class ListRedisScheduleSource(ScheduleSource):
16
+ """Schedule source based on arrays."""
17
+
18
+ def __init__(
19
+ self,
20
+ url: str,
21
+ prefix: str = "schedule",
22
+ max_connection_pool_size: Optional[int] = None,
23
+ serializer: Optional[TaskiqSerializer] = None,
24
+ buffer_size: int = 50,
25
+ skip_past_schedules: bool = False,
26
+ **connection_kwargs: Any,
27
+ ) -> None:
28
+ super().__init__()
29
+ self._prefix = prefix
30
+ self._buffer_size = buffer_size
31
+ self._connection_pool = BlockingConnectionPool.from_url(
32
+ url=url,
33
+ max_connections=max_connection_pool_size,
34
+ **connection_kwargs,
35
+ )
36
+ if serializer is None:
37
+ serializer = PickleSerializer()
38
+ self._serializer = serializer
39
+ self._is_first_run = True
40
+ self._previous_schedule_source: Optional[ScheduleSource] = None
41
+ self._delete_schedules_after_migration: bool = True
42
+ self._skip_past_schedules = skip_past_schedules
43
+
44
+ async def startup(self) -> None:
45
+ """
46
+ Startup the schedule source.
47
+
48
+ By default this function does nothing.
49
+ But if the previous schedule source is set,
50
+ it will try to migrate schedules from it.
51
+ """
52
+ if self._previous_schedule_source is not None:
53
+ logger.info("Migrating schedules from previous source")
54
+ await self._previous_schedule_source.startup()
55
+ schedules = await self._previous_schedule_source.get_schedules()
56
+ logger.info(f"Found {len(schedules)}")
57
+ for schedule in schedules:
58
+ await self.add_schedule(schedule)
59
+ if self._delete_schedules_after_migration:
60
+ await self._previous_schedule_source.delete_schedule(
61
+ schedule.schedule_id,
62
+ )
63
+ await self._previous_schedule_source.shutdown()
64
+ logger.info("Migration complete")
65
+
66
+ def _get_time_key(self, time: datetime.datetime) -> str:
67
+ """Get the key for a time-based schedule."""
68
+ if time.tzinfo is None:
69
+ time = time.replace(tzinfo=datetime.timezone.utc)
70
+ iso_time = time.astimezone(datetime.timezone.utc).strftime("%Y-%m-%dT%H:%M")
71
+ return f"{self._prefix}:time:{iso_time}"
72
+
73
+ def _get_cron_key(self) -> str:
74
+ """Get the key for a cron-based schedule."""
75
+ return f"{self._prefix}:cron"
76
+
77
+ def _get_data_key(self, schedule_id: str) -> str:
78
+ """Get the key for a schedule data."""
79
+ return f"{self._prefix}:data:{schedule_id}"
80
+
81
+ def _parse_time_key(self, key: str) -> Optional[datetime.datetime]:
82
+ """Get time value from the timed-key."""
83
+ try:
84
+ dt_str = key.split(":", 2)[2]
85
+ return datetime.datetime.strptime(dt_str, "%Y-%m-%dT%H:%M").replace(
86
+ tzinfo=datetime.timezone.utc,
87
+ )
88
+ except ValueError:
89
+ logger.debug("Failed to parse time key %s", key)
90
+ return None
91
+
92
+ async def _get_previous_time_schedules(self) -> list[bytes]:
93
+ """
94
+ Function that gets all timed schedules that are in the past.
95
+
96
+ Since this source doesn't retrieve all the schedules at once,
97
+ we need to get all the schedules that are in the past and haven't
98
+ been sent yet.
99
+
100
+ We do this by getting all the time keys and checking if the time
101
+ is less than the current time.
102
+
103
+ This function is called only during the first run to minimize
104
+ the number of requests to the Redis server.
105
+ """
106
+ logger.info("Getting previous time schedules")
107
+ minute_before = datetime.datetime.now(
108
+ datetime.timezone.utc,
109
+ ).replace(second=0, microsecond=0) - datetime.timedelta(
110
+ minutes=1,
111
+ )
112
+ schedules = []
113
+ async with Redis(connection_pool=self._connection_pool) as redis:
114
+ time_keys: list[str] = []
115
+ # We need to get all the time keys and check if the time is less than
116
+ # the current time.
117
+ async for key in redis.scan_iter(f"{self._prefix}:time:*"):
118
+ key_time = self._parse_time_key(key.decode())
119
+ if key_time and key_time <= minute_before:
120
+ time_keys.append(key.decode())
121
+ for key in time_keys:
122
+ schedules.extend(await redis.lrange(key, 0, -1)) # type: ignore
123
+
124
+ return schedules
125
+
126
+ async def delete_schedule(self, schedule_id: str) -> None:
127
+ """Delete a schedule from the source."""
128
+ async with Redis(connection_pool=self._connection_pool) as redis:
129
+ schedule = await redis.getdel(self._get_data_key(schedule_id))
130
+ if schedule is not None:
131
+ logger.debug("Deleting schedule %s", schedule_id)
132
+ schedule = model_validate(
133
+ ScheduledTask,
134
+ self._serializer.loadb(schedule),
135
+ )
136
+ # We need to remove the schedule from the cron or time list.
137
+ if schedule.cron is not None:
138
+ await redis.lrem(self._get_cron_key(), 0, schedule_id) # type: ignore
139
+ elif schedule.time is not None:
140
+ time_key = self._get_time_key(schedule.time)
141
+ await redis.lrem(time_key, 0, schedule_id) # type: ignore
142
+
143
+ async def add_schedule(self, schedule: "ScheduledTask") -> None:
144
+ """Add a schedule to the source."""
145
+ async with Redis(connection_pool=self._connection_pool) as redis:
146
+ # At first we set data key which contains the schedule data.
147
+ await redis.set(
148
+ f"{self._prefix}:data:{schedule.schedule_id}",
149
+ self._serializer.dumpb(model_dump(schedule)),
150
+ )
151
+ # Then we add the schedule to the cron or time list.
152
+ # This is an optimization, so we can get all the schedules
153
+ # for the current time much faster.
154
+ if schedule.cron is not None:
155
+ await redis.rpush(self._get_cron_key(), schedule.schedule_id) # type: ignore
156
+ elif schedule.time is not None:
157
+ await redis.rpush( # type: ignore
158
+ self._get_time_key(schedule.time),
159
+ schedule.schedule_id,
160
+ )
161
+
162
+ async def post_send(self, task: ScheduledTask) -> None:
163
+ """Delete a task after it's completed."""
164
+ if task.time is not None:
165
+ await self.delete_schedule(task.schedule_id)
166
+
167
+ async def get_schedules(self) -> List["ScheduledTask"]:
168
+ """
169
+ Get all schedules.
170
+
171
+ This function gets all the schedules from the schedule source.
172
+ What it does is get all the cron schedules and time schedules
173
+ for the current time and return them.
174
+
175
+ If it's the first run, it also gets all the time schedules
176
+ that are in the past and haven't been sent yet.
177
+ """
178
+ schedules = []
179
+ current_time = datetime.datetime.now(datetime.timezone.utc)
180
+ timed: list[bytes] = []
181
+ # Only during first run, we need to get previous time schedules
182
+ if self._is_first_run and not self._skip_past_schedules:
183
+ timed = await self._get_previous_time_schedules()
184
+ self._is_first_run = False
185
+ async with Redis(connection_pool=self._connection_pool) as redis:
186
+ buffer = []
187
+ crons = await redis.lrange(self._get_cron_key(), 0, -1) # type: ignore
188
+ logger.debug("Got %d cron schedules", len(crons))
189
+ if crons:
190
+ buffer.extend(crons)
191
+ timed.extend(await redis.lrange(self._get_time_key(current_time), 0, -1)) # type: ignore
192
+ logger.debug("Got %d timed schedules", len(timed))
193
+ if timed:
194
+ buffer.extend(timed)
195
+ while buffer:
196
+ schedules.extend(
197
+ await redis.mget(
198
+ (
199
+ self._get_data_key(x.decode())
200
+ for x in buffer[: self._buffer_size]
201
+ ),
202
+ ),
203
+ )
204
+ buffer = buffer[self._buffer_size :]
205
+
206
+ return [
207
+ model_validate(ScheduledTask, self._serializer.loadb(schedule))
208
+ for schedule in schedules
209
+ if schedule
210
+ ]
211
+
212
+ def with_migrate_from(
213
+ self,
214
+ source: ScheduleSource,
215
+ delete_schedules: bool = True,
216
+ ) -> Self:
217
+ """
218
+ Enable migration from previous schedule source.
219
+
220
+ If this function is called during declaration,
221
+ the source will try to migrate schedules from the previous source.
222
+
223
+ :param source: previous schedule source
224
+ :param delete_schedules: delete schedules during migration process
225
+ from the previous source.
226
+ """
227
+ self._previous_schedule_source = source
228
+ self._delete_schedules_after_migration = delete_schedules
229
+ return self
@@ -165,6 +165,7 @@ class RedisStreamBroker(BaseRedisBroker):
165
165
  consumer_id: str = "$",
166
166
  mkstream: bool = True,
167
167
  xread_block: int = 10000,
168
+ maxlen: Optional[int] = None,
168
169
  additional_streams: Optional[Dict[str, str]] = None,
169
170
  **connection_kwargs: Any,
170
171
  ) -> None:
@@ -184,6 +185,8 @@ class RedisStreamBroker(BaseRedisBroker):
184
185
  :param mkstream: create stream if it does not exist.
185
186
  :param xread_block: block time in ms for xreadgroup.
186
187
  Better to set it to a bigger value, to avoid unnecessary calls.
188
+ :param maxlen: sets the maximum length of the stream
189
+ trims (the old values of) the stream each time a new element is added
187
190
  :param additional_streams: additional streams to read from.
188
191
  Each key is a stream name, value is a consumer id.
189
192
  """
@@ -200,6 +203,7 @@ class RedisStreamBroker(BaseRedisBroker):
200
203
  self.consumer_id = consumer_id
201
204
  self.mkstream = mkstream
202
205
  self.block = xread_block
206
+ self.maxlen = maxlen
203
207
  self.additional_streams = additional_streams or {}
204
208
 
205
209
  async def _declare_consumer_group(self) -> None:
@@ -235,7 +239,11 @@ class RedisStreamBroker(BaseRedisBroker):
235
239
  :param message: message to append.
236
240
  """
237
241
  async with Redis(connection_pool=self.connection_pool) as redis_conn:
238
- await redis_conn.xadd(self.queue_name, {b"data": message.message})
242
+ await redis_conn.xadd(
243
+ self.queue_name,
244
+ {b"data": message.message},
245
+ maxlen=self.maxlen,
246
+ )
239
247
 
240
248
  def _ack_generator(self, id: str) -> Callable[[], Awaitable[None]]:
241
249
  async def _ack() -> None:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: taskiq-redis
3
- Version: 1.0.3
3
+ Version: 1.0.5
4
4
  Summary: Redis integration for taskiq
5
5
  Keywords: taskiq,tasks,distributed,async,redis,result_backend
6
6
  Author: taskiq-team
@@ -149,3 +149,67 @@ RedisAsyncResultBackend parameters:
149
149
  > )
150
150
  > ```
151
151
 
152
+
153
+ ## Schedule sources
154
+
155
+
156
+ You can use this package to add dynamic schedule sources. They are used to store
157
+ schedules for taskiq scheduler.
158
+
159
+ The advantage of using schedule sources from this package over default `LabelBased` source is that you can
160
+ dynamically add schedules in it.
161
+
162
+ We have two types of schedules:
163
+
164
+ * `RedisScheduleSource`
165
+ * `ListRedisScheduleSource`
166
+
167
+
168
+ ### RedisScheduleSource
169
+
170
+ This source is super simple. It stores all schedules by key `{prefix}:{schedule_id}`. When scheduler requests
171
+ schedules, it retrieves all values from redis that start with a given `prefix`.
172
+
173
+ This is very ineficent and should not be used for high-volume schedules. Because if you have `1000` schedules, this scheduler will make at least `20` requests to retrieve them (we use `scan` and `mget` to minimize number of calls).
174
+
175
+ ### ListRedisScheduleSource
176
+
177
+ This source holds values in lists.
178
+
179
+ * For cron tasks it uses key `{prefix}:cron`.
180
+ * For timed schedules it uses key `{prefix}:time:{time}` where `{time}` is actually time where schedules should run.
181
+
182
+ The main advantage of this approach is that we only fetch tasks we need to run at a given time and do not perform any excesive calls to redis.
183
+
184
+
185
+ ### Migration from one source to another
186
+
187
+ To migrate from `RedisScheduleSource` to `ListRedisScheduleSource` you can define the latter as this:
188
+
189
+ ```python
190
+ # broker.py
191
+ import asyncio
192
+ import datetime
193
+
194
+ from taskiq import TaskiqScheduler
195
+
196
+ from taskiq_redis import ListRedisScheduleSource, RedisStreamBroker
197
+ from taskiq_redis.schedule_source import RedisScheduleSource
198
+
199
+ broker = RedisStreamBroker(url="redis://localhost:6379")
200
+
201
+ old_source = RedisScheduleSource("redis://localhost/1", prefix="prefix1")
202
+ array_source = ListRedisScheduleSource(
203
+ "redis://localhost/1",
204
+ prefix="prefix2",
205
+ # To migrate schedules from an old source.
206
+ ).with_migrate_from(
207
+ old_source,
208
+ # To delete schedules from an old source.
209
+ delete_schedules=True,
210
+ )
211
+ scheduler = TaskiqScheduler(broker, [array_source])
212
+ ```
213
+
214
+ During startup the scheduler will try to migrate schedules from an old source to a new one. Please be sure to specify different prefixe just to avoid any kind of collision between these two.
215
+
@@ -1,12 +1,13 @@
1
- taskiq_redis/__init__.py,sha256=nb2Lx4lVj9m20duzRFQk3nNVswQGylmwLsQN6Qc1lGI,1091
1
+ taskiq_redis/__init__.py,sha256=Sl4m9rKxweU1t0m289Qtf0qm4xSSkkFHoOfKq6qaz6g,1192
2
2
  taskiq_redis/exceptions.py,sha256=7buBJ7CRVWd5WqVqSjtHO8cVL7QzZg-DOM3nB87t-Sk,738
3
+ taskiq_redis/list_schedule_source.py,sha256=w7lSJ1-n889PXOwXiA_Jp6Wj4RsZpuXwFCVhOIc5KAw,9462
3
4
  taskiq_redis/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
5
  taskiq_redis/redis_backend.py,sha256=MLBaUN3Zx-DLvm1r-lgPU82_WZq9kc6oTxYI8LQjd6k,19882
5
- taskiq_redis/redis_broker.py,sha256=ZLn7LAHj8Sh_oyW5hMgD7PZPQfUdXNPKdqhBcr9Okmg,9775
6
+ taskiq_redis/redis_broker.py,sha256=ep31kUxXx4XhGKjrzVjdNBmA6wLbTQoy7-DlKuwtLz4,10068
6
7
  taskiq_redis/redis_cluster_broker.py,sha256=FuWl5fP7Fwr9FbytErmhcUGjRCdPexDK2Co2u6kpDlo,6591
7
8
  taskiq_redis/redis_sentinel_broker.py,sha256=wHnbG3xuD_ruhhwp4AXo91NNjq8v2iufUZ0i_HbBRVQ,9073
8
9
  taskiq_redis/schedule_source.py,sha256=hqpcs2D8W90KUDHREKblisnhGCE9dbVOtKtuJcOTGZw,9915
9
- taskiq_redis-1.0.3.dist-info/LICENSE,sha256=lEHEEE-ZxmuItxYgUMPiFWdRcAITxE8DFMNyAg4eOYE,1075
10
- taskiq_redis-1.0.3.dist-info/METADATA,sha256=whl7_U6GIIcNzVZqrLQkktES6Y91awk8-HtgaX5IQ8s,4391
11
- taskiq_redis-1.0.3.dist-info/WHEEL,sha256=XbeZDeTWKc1w7CSIyre5aMDU_-PohRwTQceYnisIYYY,88
12
- taskiq_redis-1.0.3.dist-info/RECORD,,
10
+ taskiq_redis-1.0.5.dist-info/LICENSE,sha256=lEHEEE-ZxmuItxYgUMPiFWdRcAITxE8DFMNyAg4eOYE,1075
11
+ taskiq_redis-1.0.5.dist-info/METADATA,sha256=uV2AJOyyE-f1_5A1LsvGpbf74zSueiJaNNRJ6kIqpvw,6573
12
+ taskiq_redis-1.0.5.dist-info/WHEEL,sha256=fGIA9gx4Qxk2KDKeNJCbOEwSrmLtjWCwzBz351GyrPQ,88
13
+ taskiq_redis-1.0.5.dist-info/RECORD,,
@@ -1,4 +1,4 @@
1
1
  Wheel-Version: 1.0
2
- Generator: poetry-core 2.1.1
2
+ Generator: poetry-core 2.1.2
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any