typepipe 0.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,9 @@
1
+ Metadata-Version: 2.3
2
+ Name: typepipe
3
+ Version: 0.2.0
4
+ Summary: Create an asynchrounous data processing pipeline that keeps track of the types.
5
+ Author: Daniel Tschertkow
6
+ Author-email: Daniel Tschertkow <daniel.tschertkow@posteo.de>
7
+ Requires-Python: >=3.13
8
+ Description-Content-Type: text/markdown
9
+
File without changes
@@ -0,0 +1,23 @@
1
+ [project]
2
+ name = "typepipe"
3
+ version = "0.2.0"
4
+ description = "Create an asynchrounous data processing pipeline that keeps track of the types."
5
+ readme = "README.md"
6
+ authors = [
7
+ { name = "Daniel Tschertkow", email = "daniel.tschertkow@posteo.de" }
8
+ ]
9
+ requires-python = ">=3.13"
10
+ dependencies = []
11
+
12
+ [build-system]
13
+ requires = ["uv_build>=0.10.2,<0.11.0"]
14
+ build-backend = "uv_build"
15
+
16
+ [dependency-groups]
17
+ dev = [
18
+ "pytest>=9.0.2",
19
+ "pytest-asyncio>=1.3.0",
20
+ ]
21
+
22
+ [tool.pytest.ini_options]
23
+ asyncio_mode = "auto"
@@ -0,0 +1,2 @@
1
+ def hello() -> str:
2
+ return "Hello from pypline!"
@@ -0,0 +1,250 @@
1
+ import asyncio
2
+ from asyncio import Queue
3
+ from collections.abc import Awaitable, Iterable
4
+ from typing import Callable, Tuple
5
+
6
+ type Consumer[T] = Callable[[T], None]
7
+ type Function[T, R] = Callable[[T], R]
8
+ type Functor[T, R] = Callable[[T], Iterable[R]]
9
+ type Producer[T] = Callable[[], T]
10
+ type Observer[T] = Callable[[T], None] # is supposed to not mutate the argument
11
+ type Operator[T] = Callable[[T, T], T]
12
+ type Predicate[T] = Callable[[T], bool]
13
+ type IterableProducer[T] = Callable[[], Iterable[T]]
14
+
15
+
16
+ def mapper[T, R](
17
+ map: Function[T, R], inqueue: Queue[T], /, qsize: int = 1
18
+ ) -> Tuple[Queue[R], Awaitable[None]]:
19
+ """A mapper is a task executed in the background (asynchronously) that
20
+ receives an input item from the inqueue, applies map to it and puts the
21
+ result on the outqueue. qsize controls the outquesize. It corresponds to
22
+ the maxsize parameter of asyncio.Queue, but defaults to 1.
23
+
24
+ Returns a tuple of (outqueue, worker). The outqueue contains the results
25
+ while the worker must be awaited to start processing.
26
+ """
27
+
28
+ outqueue = Queue[R](maxsize=qsize)
29
+
30
+ async def worker() -> None:
31
+ await asyncio.sleep(2)
32
+ try:
33
+ while True:
34
+ item = await inqueue.get()
35
+ inqueue.task_done()
36
+ await outqueue.put(map(item))
37
+ except asyncio.QueueShutDown:
38
+ outqueue.shutdown()
39
+ return outqueue, worker()
40
+
41
+
42
+ def flatmapper[T, R](
43
+ map: Functor[T, R], inqueue: Queue[T], /, qsize: int = 1
44
+ ) -> Tuple[Queue[R], Awaitable[None]]:
45
+ """A flatmapper is a task executed in the background (asynchronously) that
46
+ receives an input item from the inqueue and applies map to it. The result is
47
+ expected to conform to collections.abc.Sequence where each element of it
48
+ gets put on the ouqueue individually. qsize controls the outquesize. It
49
+ corresponds to the maxsize parameter of asyncio.Queue, but defaults to 1.
50
+
51
+ Returns a tuple of (outqueue, worker). The outqueue contains the results
52
+ while the worker must be awaited to start processing.
53
+ """
54
+
55
+ outqueue = Queue[R](maxsize=qsize)
56
+
57
+ async def worker() -> None:
58
+ try:
59
+ while True:
60
+ item = await inqueue.get()
61
+ inqueue.task_done()
62
+ item_seq: Iterable[R] = map(item)
63
+ for i in item_seq:
64
+ await outqueue.put(i)
65
+ except asyncio.QueueShutDown:
66
+ outqueue.shutdown()
67
+
68
+ return outqueue, worker()
69
+
70
+
71
+ def producer[R](
72
+ produce: Producer[R], /, qsize: int = 1
73
+ ) -> Tuple[Queue[R], Awaitable[None]]:
74
+ """A producer is a task executed in the background (asynchronously) that
75
+ generates an input by calling generate() and puts the result on the
76
+ outqueue. qsize controls the outquesize. It corresponds to the maxsize
77
+ parameter of asyncio.Queue, but defaults to 1.
78
+
79
+ Returns a tuple of (outqueue, worker). The outqueue contains the results
80
+ while the worker must be awaited to start processing.
81
+ """
82
+ outqueue = Queue[R](maxsize=qsize)
83
+
84
+ async def worker() -> None:
85
+ item: R = produce()
86
+ await outqueue.put(item)
87
+ outqueue.shutdown()
88
+
89
+ return outqueue, worker()
90
+
91
+
92
+ def flatproducer[R](
93
+ produce: IterableProducer[R], /, qsize: int = 1
94
+ ) -> Tuple[Queue[R], Awaitable[None]]:
95
+ """A flatproducer is a task executed in the background (asynchronously) that
96
+ generates input items by calling generate() and puts the results on the
97
+ outqueue individually. The result is expected to conform to
98
+ collections.abc.Sequence. qsize controls the outquesize. It corresponds to
99
+ the maxsize parameter of asyncio.Queue, but defaults to 1.
100
+
101
+ Returns a tuple of (outqueue, worker). The outqueue contains the results
102
+ while the worker must be awaited to start processing.
103
+ """
104
+ outqueue = Queue[R](maxsize=qsize)
105
+
106
+ async def worker() -> None:
107
+ item_seq: Iterable[R] = produce()
108
+ for item in item_seq:
109
+ await outqueue.put(item)
110
+ outqueue.shutdown()
111
+
112
+ return outqueue, worker()
113
+
114
+
115
+ def iterator[T](
116
+ iterable: Iterable[T], /, qsize: int = 1
117
+ ) -> Tuple[Queue[T], Awaitable[None]]:
118
+ """A generator is a task executed in the background (asynchronously) that
119
+ generates an input by iterating through the generate generator and putting
120
+ the result on the outqueue. qsize controls the outquesize. It corresponds
121
+ to the maxsize parameter of asyncio.Queue, but defaults to 1.
122
+
123
+ Returns a tuple of (outqueue, worker). The outqueue contains the results
124
+ while the worker must be awaited to start processing.
125
+ """
126
+ outqueue = Queue[T](maxsize=qsize)
127
+
128
+ async def worker() -> None:
129
+ item: T
130
+ for item in iterable:
131
+ await outqueue.put(item)
132
+ outqueue.shutdown()
133
+
134
+ return outqueue, worker()
135
+
136
+
137
+ def reducer[T](
138
+ reduce: Operator[T], inqueue: Queue[T], /, initial_value: T | None = None
139
+ ) -> Awaitable[T | None]:
140
+ """A reducer is a task executed in the background (asynchronously) that
141
+ generates an input by calling generate() and puts the result on the
142
+ outqueue. qsize controls the outquesize. It corresponds to the maxsize
143
+ parameter of asyncio.Queue, but defaults to 1.
144
+
145
+ Returns the result of the reduction as an Awaitable.
146
+ """
147
+
148
+ async def worker() -> T | None:
149
+ if initial_value is None:
150
+ try:
151
+ first: T = await inqueue.get()
152
+ inqueue.task_done()
153
+ except asyncio.QueueShutDown:
154
+ return None
155
+ else:
156
+ first = initial_value
157
+
158
+ try:
159
+ second: T = await inqueue.get()
160
+ inqueue.task_done()
161
+ except asyncio.QueueShutDown:
162
+ return first
163
+
164
+ result: T = reduce(first, second)
165
+ try:
166
+ while True:
167
+ next = await inqueue.get()
168
+ inqueue.task_done()
169
+ result = reduce(result, next)
170
+ except asyncio.QueueShutDown:
171
+ return result
172
+
173
+ return worker()
174
+
175
+
176
+ def filterer[T](
177
+ predicate: Predicate[T], inqueue: Queue[T], /, qsize: int = 1
178
+ ) -> Tuple[Queue[T], Awaitable[None]]:
179
+ """A filterer is a task executed in the background (asynchronously) that
180
+ filters items of the inqueue and puts the items for which the predicate is
181
+ true on the outqueue. qsize controls the outqueue size. It corresponds to
182
+ the maxsize parameter of asyncio.Queue, but defaults to 1.
183
+
184
+ Returns a tuple of (outqueue, worker). The outqueue has the remaining items
185
+ and the worker is an Awaitable that starts the filtering process.
186
+ """
187
+
188
+ outqueue = Queue[T](maxsize=qsize)
189
+
190
+ async def worker() -> None:
191
+ try:
192
+ while True:
193
+ item = await inqueue.get()
194
+ inqueue.task_done()
195
+ if predicate(item):
196
+ await outqueue.put(item)
197
+ except asyncio.QueueShutDown:
198
+ outqueue.shutdown()
199
+
200
+ return outqueue, worker()
201
+
202
+
203
+ def consumer[T](
204
+ consume: Consumer[T],
205
+ inqueue: Queue[T],
206
+ /,
207
+ ) -> Awaitable[None]:
208
+ """A consumer is a task executed in the background (asynchronously) that
209
+ consumes items from the inqueue (for side effects).
210
+
211
+ Returns a worker. The worker is an Awaitable that starts the filtering
212
+ process.
213
+ """
214
+
215
+ async def worker() -> None:
216
+ try:
217
+ while True:
218
+ item = await inqueue.get()
219
+ inqueue.task_done()
220
+ consume(item)
221
+ except asyncio.QueueShutDown:
222
+ return
223
+
224
+ return worker()
225
+
226
+
227
+ def observer[T](
228
+ observe: Observer[T], inqueue: Queue[T], /, qsize: int = 1
229
+ ) -> Tuple[Queue[T], Awaitable[None]]:
230
+ """An observer is a task executed in the background (asynchronously) that
231
+ calls observer on the items from the inqueue (for side effects). The item
232
+ should not be modified by a call to observe. The item is put back on the
233
+ outqueue with the assumption that it was not modified.
234
+
235
+ Returns a tuple of (outqueue, worker). The outqueue has the remaining items
236
+ and the worker is an Awaitable that starts the filtering process.
237
+ """
238
+ outqueue = Queue[T](maxsize=qsize)
239
+
240
+ async def worker() -> None:
241
+ try:
242
+ while True:
243
+ item = await inqueue.get()
244
+ inqueue.task_done()
245
+ observe(item)
246
+ await outqueue.put(item)
247
+ except asyncio.QueueShutDown:
248
+ outqueue.shutdown()
249
+
250
+ return outqueue, worker()
@@ -0,0 +1,356 @@
1
+ """Defines the Pipeline class that builds an asynchronous processing pipeline in
2
+ a functional style.
3
+ """
4
+
5
+ import asyncio
6
+ from asyncio import Queue
7
+ from collections.abc import Awaitable, Iterable, MutableSequence
8
+ from typing import List, Self, Tuple
9
+
10
+ from . import links
11
+
12
+
13
+ class PipelineClosedException(Exception):
14
+ """PipelineClosedException is raised when a pipeline operation is added after
15
+ the pipeline was closed. A pipeline is closed when a closing operation like
16
+ reduce or consume has been performed."""
17
+
18
+
19
+ class Pipeline[T]:
20
+ """Asynchronous processing pipeline.
21
+
22
+ `Pipeline` orchestrates a series of async stages chained together via queues
23
+ of type `asyncio.Queue`. Users create a pipeline from an existing queue, a
24
+ producer function, a sequence, or a generator and then chain transformations
25
+ via `map`, `flatmap`, `filter`, and finally terminate the pipeline with
26
+ `reduce`, `consume`, or `collect`.
27
+
28
+ The pipeline tracks internal worker coroutines in and ensures they are
29
+ awaited together. Once a terminating operation is invoked, the pipeline is
30
+ marked as *closed* and further stage additions raise
31
+ `PipelineClosedException`.
32
+
33
+ Every step that exposes a downstream queue has a `qsize` keyword parameter
34
+ that sets the size of the queue and defaults to `1` (since most queues are
35
+ either always empty or always full). Use queues of bigger sizes if your
36
+ operation is bursty.
37
+ """
38
+
39
+ queue: Queue[T]
40
+ workers: List[Awaitable[None]]
41
+ closed: bool
42
+
43
+ def __init__(self, queue: Queue[T], workers: List[Awaitable[None]]):
44
+ """Create a new `Pipeline` from an existing `asyncio.Queue`. Make sure
45
+ the queue is fed and properly shut down, otherwise the pipeline will not
46
+ terminate but wait for further input.
47
+
48
+ Args:
49
+ queue: The input queue that provides items to the pipeline.
50
+
51
+ The pipeline starts in an *open* state; stages can be added until a
52
+ terminal operation (reduce/consume/collect) closes it.
53
+ """
54
+ self.closed = False
55
+ self.workers = workers
56
+ self.queue = queue
57
+
58
+ @classmethod
59
+ def from_queue(cls, inqueue: Queue[T]) -> Self:
60
+ """Construct a `Pipeline` directly from an existing queue.
61
+
62
+ Args:
63
+ inqueue: The source `asyncio.Queue`.
64
+
65
+ Returns:
66
+ A new `Pipeline` instance.
67
+ """
68
+ pipeline = cls(inqueue, list())
69
+ return pipeline
70
+
71
+ @classmethod
72
+ def from_producer(cls, func: links.Producer[T], /, qsize: int = 1) -> Self:
73
+ """Create a pipeline from a producer function.
74
+
75
+ The producer closure supplies the initial, single item for the pipeline.
76
+
77
+ Args:
78
+ func: A zero-argument callable that returns a single item.
79
+ qsize: Maximum size of the internal queue (default `1`).
80
+
81
+ Returns:
82
+ A new `Pipeline` instance.
83
+ """
84
+ queue, worker = links.producer(func, qsize=qsize)
85
+ pipeline = cls(queue, list())
86
+ pipeline.workers.append(worker)
87
+ return pipeline
88
+
89
+ @classmethod
90
+ def from_producer_flatten(
91
+ cls, func: links.IterableProducer[T], /, qsize: int = 1
92
+ ) -> Self:
93
+ """Create a pipeline from a producer function.
94
+
95
+ The producer closure supplies the initial sequence for the pipeline
96
+ where each item is put into the pipeline individually.
97
+
98
+ Args:
99
+ func: A zero-argument callable that returns a sequence.
100
+ qsize: Maximum size of the internal queue (default `1`).
101
+
102
+ Returns:
103
+ A new `Pipeline` instance.
104
+
105
+ """
106
+ queue, worker = links.flatproducer(func, qsize=qsize)
107
+ pipeline = cls(queue, list())
108
+ pipeline.workers.append(worker)
109
+ return pipeline
110
+
111
+ @classmethod
112
+ def from_iterable(cls, seq: Iterable[T], /, qsize: int = 1):
113
+ """Create a pipeline from an existing in-memory iterable.
114
+
115
+ Each item from the iterable is put into the pipeline individually.
116
+
117
+ Args:
118
+ seq: Any `collections.abc.Iterable` of items.
119
+ qsize: Queue size (default `1`).
120
+
121
+ Returns:
122
+ A new `Pipeline` instance.
123
+ """
124
+ queue, worker = links.iterator(seq, qsize=qsize)
125
+ pipeline = cls(queue, list())
126
+ pipeline.workers.append(worker)
127
+ return pipeline
128
+
129
+ def map[U](self, func: links.Function[T, U], /, qsize: int = 1) -> "Pipeline[U]":
130
+ """Add a mapping stage to the pipeline.
131
+
132
+ Each item from the previous stage is transformed by `func` and the
133
+ result is the input of the next stage in the pipeline.
134
+
135
+ Args:
136
+ func: Callable that maps an input item to an output item.
137
+ qsize: Queue size for the downstream queue (default `1`).
138
+
139
+ Returns:
140
+ The pipeline instance to allow method chaining.
141
+
142
+ Raises:
143
+ PipelineClosedException: If `map` is added to a pipeline that already
144
+ has a closing stage.
145
+
146
+ """
147
+
148
+ if self.closed:
149
+ raise PipelineClosedException("The pipeline is already closed")
150
+
151
+ queue, worker = links.mapper(func, self.queue, qsize=qsize)
152
+
153
+ self.workers.append(worker)
154
+ return Pipeline[U](queue, self.workers)
155
+
156
+ def flatmap[U](self, func: links.Functor[T, U], /, qsize: int = 1) -> "Pipeline[U]":
157
+ """Add a mapping stage to the pipeline.
158
+
159
+ `func` is expected to return an iterable for each input item. Each
160
+ element of that iterable is put individually to the downstream
161
+ queue.
162
+
163
+ Args:
164
+ func: Callable that maps an input item to a sequence of output items.
165
+ qsize: Queue size for the downstream queue (default `1`).
166
+
167
+ Returns:
168
+ The pipeline instance for method chaining.
169
+
170
+ Raises:
171
+ PipelineClosedException: If `flatmap` is added to a pipeline that
172
+ already has a closing stage.
173
+
174
+ """
175
+
176
+ if self.closed:
177
+ raise PipelineClosedException("The pipeline is already closed")
178
+
179
+ queue, worker = links.flatmapper(func, self.queue, qsize=qsize)
180
+
181
+ self.workers.append(worker)
182
+ return Pipeline[U](queue, self.workers)
183
+
184
+ def filter(self, pred: links.Predicate[T], /, qsize: int = 1) -> Self:
185
+ """Add a filter stage to the pipeline.
186
+
187
+ Only items for which `pred` returns `True` are forwarded downstream.
188
+
189
+ Args:
190
+ pred: Predicate callable returning `True` for items to keep.
191
+ qsize: Queue size for the downstream queue (default `1`).
192
+
193
+ Returns:
194
+ The pipeline instance for chaining.
195
+
196
+ Raises:
197
+ PipelineClosedException: If `filter` is added to a pipeline that
198
+ already has a closing stage.
199
+ """
200
+
201
+ if self.closed:
202
+ raise PipelineClosedException("The pipeline is already closed")
203
+
204
+ queue, worker = links.filterer(pred, self.queue, qsize=qsize)
205
+
206
+ self.workers.append(worker)
207
+ self.queue = queue
208
+ return self
209
+
210
+ async def reduce(
211
+ self, reduce: links.Operator[T], /, initial_value: T | None = None
212
+ ) -> T | None:
213
+ """Reduce the pipeline to a single accumulated value.
214
+
215
+ Consumes items from the pipeline using `reduce` as a binary operator.
216
+ After reduction the pipeline is marked as closed.
217
+
218
+ `reduce` is a closing operation!
219
+
220
+ Args:
221
+ reduce: Binary function combining two items into one.
222
+ initial_value: Optional initial accumulator. If omitted the first
223
+ item from the queue is used as the start value.
224
+
225
+ Returns:
226
+ The final reduced value, or `None` if the queue was empty.
227
+
228
+ Raises:
229
+ PipelineClosedException: If `reduce` is added to a pipeline that
230
+ already has a closing stage.
231
+ """
232
+ if self.closed:
233
+ raise PipelineClosedException("The pipeline is already closed")
234
+
235
+ self.closed = True
236
+
237
+ rworker: Awaitable[T | None] = links.reducer(
238
+ reduce, self.queue, initial_value=initial_value
239
+ )
240
+
241
+ results: List = await asyncio.gather(*self.workers, rworker)
242
+ return results[-1] # rworker holds the awaitable result of the
243
+ # reduction and comes last
244
+
245
+ async def consume(
246
+ self,
247
+ consume: links.Consumer[T],
248
+ /,
249
+ ) -> None:
250
+ """Consume items from the pipeline for side effects.
251
+
252
+ The `consume` callable is applied to each item. This is a terminal
253
+ operation; after calling it the pipeline is closed.
254
+
255
+ `consume` is a closing operation!
256
+
257
+ Args:
258
+ consume: Callable that processes each item (e.g., printing or
259
+ storing).
260
+
261
+ Raises:
262
+ PipelineClosedException: If `consume` is added to a pipeline that
263
+ already has a closing stage.
264
+ """
265
+ if self.closed:
266
+ raise PipelineClosedException("The pipeline is already closed")
267
+
268
+ self.closed = True
269
+ cworker: Awaitable[None] = links.consumer(consume, self.queue)
270
+
271
+ self.workers.append(cworker)
272
+ await asyncio.gather(*self.workers)
273
+
274
+ def isClosed(self) -> bool:
275
+ """Return `True` if a closing operation has closed the pipeline."""
276
+ return self.closed
277
+
278
+ def expose(self) -> Tuple[Queue[T], Awaitable[None]]:
279
+ """Expose the internal queue and a combined worker.
280
+
281
+ Allows external code to await the combined workers while receiving the
282
+ output queue directly.
283
+
284
+ `expose` is a closing operation!
285
+
286
+ Raises:
287
+ PipelineClosedException: If `expose` is added to a pipeline that
288
+ already has a closing stage.
289
+ """
290
+ if self.closed:
291
+ raise PipelineClosedException("The pipeline is already closed")
292
+
293
+ self.closed = True
294
+
295
+ # bundle workers into one
296
+ async def worker() -> None:
297
+ await asyncio.gather(*self.workers)
298
+
299
+ return self.queue, worker()
300
+
301
+ async def collect(self, container: MutableSequence[T]) -> MutableSequence[T]:
302
+ """Collect all items from the pipeline into `container`.
303
+
304
+ The pipeline runs to completion, appending each yielded item to the
305
+ provided mutable sequence. After collection the pipeline is closed.
306
+
307
+ `collect` is a closing operation!
308
+
309
+ Args:
310
+ container: A mutable sequence (e.g., list) to which items are added.
311
+
312
+ Raises:
313
+ PipelineClosedException: If `collect` is added to a pipeline that
314
+ already has a closing stage.
315
+ """
316
+
317
+ if self.closed:
318
+ raise PipelineClosedException("The pipeline is already closed")
319
+ self.closed = True
320
+
321
+ async def collector() -> MutableSequence[T]:
322
+ try:
323
+ while True:
324
+ item: T = await self.queue.get()
325
+ self.queue.task_done()
326
+ container.append(item)
327
+ except asyncio.QueueShutDown:
328
+ return container
329
+
330
+ *worker_results, seq = await asyncio.gather(*self.workers, collector())
331
+ return seq # seq can't be None
332
+
333
+
334
+ def inspect(self, observe: links.Observer[T], /, qsize: int = 1) -> Self:
335
+ """Insert an observer stage that runs `observe` on each item.
336
+
337
+ Useful for side-effects like logging without modifying the
338
+ stream. `observe` is expected to not modify its item.
339
+
340
+ Args:
341
+ observe: Callable invoked with each item.
342
+ qsize: Queue size for the observer's downstream queue.
343
+
344
+ Raises:
345
+ PipelineClosedException: If `inspect` is added to a pipeline that
346
+ already has a closing stage.
347
+ """
348
+ if self.closed:
349
+ raise PipelineClosedException("The pipeline is already closed")
350
+
351
+ outqueue, worker = links.observer(observe, self.queue, qsize=qsize)
352
+
353
+ self.workers.append(worker)
354
+ self.queue = outqueue
355
+
356
+ return self
File without changes