typepipe 0.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
typepipe-0.2.0/PKG-INFO
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
Metadata-Version: 2.3
|
|
2
|
+
Name: typepipe
|
|
3
|
+
Version: 0.2.0
|
|
4
|
+
Summary: Create an asynchrounous data processing pipeline that keeps track of the types.
|
|
5
|
+
Author: Daniel Tschertkow
|
|
6
|
+
Author-email: Daniel Tschertkow <daniel.tschertkow@posteo.de>
|
|
7
|
+
Requires-Python: >=3.13
|
|
8
|
+
Description-Content-Type: text/markdown
|
|
9
|
+
|
typepipe-0.2.0/README.md
ADDED
|
File without changes
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "typepipe"
|
|
3
|
+
version = "0.2.0"
|
|
4
|
+
description = "Create an asynchrounous data processing pipeline that keeps track of the types."
|
|
5
|
+
readme = "README.md"
|
|
6
|
+
authors = [
|
|
7
|
+
{ name = "Daniel Tschertkow", email = "daniel.tschertkow@posteo.de" }
|
|
8
|
+
]
|
|
9
|
+
requires-python = ">=3.13"
|
|
10
|
+
dependencies = []
|
|
11
|
+
|
|
12
|
+
[build-system]
|
|
13
|
+
requires = ["uv_build>=0.10.2,<0.11.0"]
|
|
14
|
+
build-backend = "uv_build"
|
|
15
|
+
|
|
16
|
+
[dependency-groups]
|
|
17
|
+
dev = [
|
|
18
|
+
"pytest>=9.0.2",
|
|
19
|
+
"pytest-asyncio>=1.3.0",
|
|
20
|
+
]
|
|
21
|
+
|
|
22
|
+
[tool.pytest.ini_options]
|
|
23
|
+
asyncio_mode = "auto"
|
|
@@ -0,0 +1,250 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
from asyncio import Queue
|
|
3
|
+
from collections.abc import Awaitable, Iterable
|
|
4
|
+
from typing import Callable, Tuple
|
|
5
|
+
|
|
6
|
+
type Consumer[T] = Callable[[T], None]
|
|
7
|
+
type Function[T, R] = Callable[[T], R]
|
|
8
|
+
type Functor[T, R] = Callable[[T], Iterable[R]]
|
|
9
|
+
type Producer[T] = Callable[[], T]
|
|
10
|
+
type Observer[T] = Callable[[T], None] # is supposed to not mutate the argument
|
|
11
|
+
type Operator[T] = Callable[[T, T], T]
|
|
12
|
+
type Predicate[T] = Callable[[T], bool]
|
|
13
|
+
type IterableProducer[T] = Callable[[], Iterable[T]]
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def mapper[T, R](
|
|
17
|
+
map: Function[T, R], inqueue: Queue[T], /, qsize: int = 1
|
|
18
|
+
) -> Tuple[Queue[R], Awaitable[None]]:
|
|
19
|
+
"""A mapper is a task executed in the background (asynchronously) that
|
|
20
|
+
receives an input item from the inqueue, applies map to it and puts the
|
|
21
|
+
result on the outqueue. qsize controls the outquesize. It corresponds to
|
|
22
|
+
the maxsize parameter of asyncio.Queue, but defaults to 1.
|
|
23
|
+
|
|
24
|
+
Returns a tuple of (outqueue, worker). The outqueue contains the results
|
|
25
|
+
while the worker must be awaited to start processing.
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
outqueue = Queue[R](maxsize=qsize)
|
|
29
|
+
|
|
30
|
+
async def worker() -> None:
|
|
31
|
+
await asyncio.sleep(2)
|
|
32
|
+
try:
|
|
33
|
+
while True:
|
|
34
|
+
item = await inqueue.get()
|
|
35
|
+
inqueue.task_done()
|
|
36
|
+
await outqueue.put(map(item))
|
|
37
|
+
except asyncio.QueueShutDown:
|
|
38
|
+
outqueue.shutdown()
|
|
39
|
+
return outqueue, worker()
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def flatmapper[T, R](
|
|
43
|
+
map: Functor[T, R], inqueue: Queue[T], /, qsize: int = 1
|
|
44
|
+
) -> Tuple[Queue[R], Awaitable[None]]:
|
|
45
|
+
"""A flatmapper is a task executed in the background (asynchronously) that
|
|
46
|
+
receives an input item from the inqueue and applies map to it. The result is
|
|
47
|
+
expected to conform to collections.abc.Sequence where each element of it
|
|
48
|
+
gets put on the ouqueue individually. qsize controls the outquesize. It
|
|
49
|
+
corresponds to the maxsize parameter of asyncio.Queue, but defaults to 1.
|
|
50
|
+
|
|
51
|
+
Returns a tuple of (outqueue, worker). The outqueue contains the results
|
|
52
|
+
while the worker must be awaited to start processing.
|
|
53
|
+
"""
|
|
54
|
+
|
|
55
|
+
outqueue = Queue[R](maxsize=qsize)
|
|
56
|
+
|
|
57
|
+
async def worker() -> None:
|
|
58
|
+
try:
|
|
59
|
+
while True:
|
|
60
|
+
item = await inqueue.get()
|
|
61
|
+
inqueue.task_done()
|
|
62
|
+
item_seq: Iterable[R] = map(item)
|
|
63
|
+
for i in item_seq:
|
|
64
|
+
await outqueue.put(i)
|
|
65
|
+
except asyncio.QueueShutDown:
|
|
66
|
+
outqueue.shutdown()
|
|
67
|
+
|
|
68
|
+
return outqueue, worker()
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def producer[R](
|
|
72
|
+
produce: Producer[R], /, qsize: int = 1
|
|
73
|
+
) -> Tuple[Queue[R], Awaitable[None]]:
|
|
74
|
+
"""A producer is a task executed in the background (asynchronously) that
|
|
75
|
+
generates an input by calling generate() and puts the result on the
|
|
76
|
+
outqueue. qsize controls the outquesize. It corresponds to the maxsize
|
|
77
|
+
parameter of asyncio.Queue, but defaults to 1.
|
|
78
|
+
|
|
79
|
+
Returns a tuple of (outqueue, worker). The outqueue contains the results
|
|
80
|
+
while the worker must be awaited to start processing.
|
|
81
|
+
"""
|
|
82
|
+
outqueue = Queue[R](maxsize=qsize)
|
|
83
|
+
|
|
84
|
+
async def worker() -> None:
|
|
85
|
+
item: R = produce()
|
|
86
|
+
await outqueue.put(item)
|
|
87
|
+
outqueue.shutdown()
|
|
88
|
+
|
|
89
|
+
return outqueue, worker()
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
def flatproducer[R](
|
|
93
|
+
produce: IterableProducer[R], /, qsize: int = 1
|
|
94
|
+
) -> Tuple[Queue[R], Awaitable[None]]:
|
|
95
|
+
"""A flatproducer is a task executed in the background (asynchronously) that
|
|
96
|
+
generates input items by calling generate() and puts the results on the
|
|
97
|
+
outqueue individually. The result is expected to conform to
|
|
98
|
+
collections.abc.Sequence. qsize controls the outquesize. It corresponds to
|
|
99
|
+
the maxsize parameter of asyncio.Queue, but defaults to 1.
|
|
100
|
+
|
|
101
|
+
Returns a tuple of (outqueue, worker). The outqueue contains the results
|
|
102
|
+
while the worker must be awaited to start processing.
|
|
103
|
+
"""
|
|
104
|
+
outqueue = Queue[R](maxsize=qsize)
|
|
105
|
+
|
|
106
|
+
async def worker() -> None:
|
|
107
|
+
item_seq: Iterable[R] = produce()
|
|
108
|
+
for item in item_seq:
|
|
109
|
+
await outqueue.put(item)
|
|
110
|
+
outqueue.shutdown()
|
|
111
|
+
|
|
112
|
+
return outqueue, worker()
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
def iterator[T](
|
|
116
|
+
iterable: Iterable[T], /, qsize: int = 1
|
|
117
|
+
) -> Tuple[Queue[T], Awaitable[None]]:
|
|
118
|
+
"""A generator is a task executed in the background (asynchronously) that
|
|
119
|
+
generates an input by iterating through the generate generator and putting
|
|
120
|
+
the result on the outqueue. qsize controls the outquesize. It corresponds
|
|
121
|
+
to the maxsize parameter of asyncio.Queue, but defaults to 1.
|
|
122
|
+
|
|
123
|
+
Returns a tuple of (outqueue, worker). The outqueue contains the results
|
|
124
|
+
while the worker must be awaited to start processing.
|
|
125
|
+
"""
|
|
126
|
+
outqueue = Queue[T](maxsize=qsize)
|
|
127
|
+
|
|
128
|
+
async def worker() -> None:
|
|
129
|
+
item: T
|
|
130
|
+
for item in iterable:
|
|
131
|
+
await outqueue.put(item)
|
|
132
|
+
outqueue.shutdown()
|
|
133
|
+
|
|
134
|
+
return outqueue, worker()
|
|
135
|
+
|
|
136
|
+
|
|
137
|
+
def reducer[T](
|
|
138
|
+
reduce: Operator[T], inqueue: Queue[T], /, initial_value: T | None = None
|
|
139
|
+
) -> Awaitable[T | None]:
|
|
140
|
+
"""A reducer is a task executed in the background (asynchronously) that
|
|
141
|
+
generates an input by calling generate() and puts the result on the
|
|
142
|
+
outqueue. qsize controls the outquesize. It corresponds to the maxsize
|
|
143
|
+
parameter of asyncio.Queue, but defaults to 1.
|
|
144
|
+
|
|
145
|
+
Returns the result of the reduction as an Awaitable.
|
|
146
|
+
"""
|
|
147
|
+
|
|
148
|
+
async def worker() -> T | None:
|
|
149
|
+
if initial_value is None:
|
|
150
|
+
try:
|
|
151
|
+
first: T = await inqueue.get()
|
|
152
|
+
inqueue.task_done()
|
|
153
|
+
except asyncio.QueueShutDown:
|
|
154
|
+
return None
|
|
155
|
+
else:
|
|
156
|
+
first = initial_value
|
|
157
|
+
|
|
158
|
+
try:
|
|
159
|
+
second: T = await inqueue.get()
|
|
160
|
+
inqueue.task_done()
|
|
161
|
+
except asyncio.QueueShutDown:
|
|
162
|
+
return first
|
|
163
|
+
|
|
164
|
+
result: T = reduce(first, second)
|
|
165
|
+
try:
|
|
166
|
+
while True:
|
|
167
|
+
next = await inqueue.get()
|
|
168
|
+
inqueue.task_done()
|
|
169
|
+
result = reduce(result, next)
|
|
170
|
+
except asyncio.QueueShutDown:
|
|
171
|
+
return result
|
|
172
|
+
|
|
173
|
+
return worker()
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
def filterer[T](
|
|
177
|
+
predicate: Predicate[T], inqueue: Queue[T], /, qsize: int = 1
|
|
178
|
+
) -> Tuple[Queue[T], Awaitable[None]]:
|
|
179
|
+
"""A filterer is a task executed in the background (asynchronously) that
|
|
180
|
+
filters items of the inqueue and puts the items for which the predicate is
|
|
181
|
+
true on the outqueue. qsize controls the outqueue size. It corresponds to
|
|
182
|
+
the maxsize parameter of asyncio.Queue, but defaults to 1.
|
|
183
|
+
|
|
184
|
+
Returns a tuple of (outqueue, worker). The outqueue has the remaining items
|
|
185
|
+
and the worker is an Awaitable that starts the filtering process.
|
|
186
|
+
"""
|
|
187
|
+
|
|
188
|
+
outqueue = Queue[T](maxsize=qsize)
|
|
189
|
+
|
|
190
|
+
async def worker() -> None:
|
|
191
|
+
try:
|
|
192
|
+
while True:
|
|
193
|
+
item = await inqueue.get()
|
|
194
|
+
inqueue.task_done()
|
|
195
|
+
if predicate(item):
|
|
196
|
+
await outqueue.put(item)
|
|
197
|
+
except asyncio.QueueShutDown:
|
|
198
|
+
outqueue.shutdown()
|
|
199
|
+
|
|
200
|
+
return outqueue, worker()
|
|
201
|
+
|
|
202
|
+
|
|
203
|
+
def consumer[T](
|
|
204
|
+
consume: Consumer[T],
|
|
205
|
+
inqueue: Queue[T],
|
|
206
|
+
/,
|
|
207
|
+
) -> Awaitable[None]:
|
|
208
|
+
"""A consumer is a task executed in the background (asynchronously) that
|
|
209
|
+
consumes items from the inqueue (for side effects).
|
|
210
|
+
|
|
211
|
+
Returns a worker. The worker is an Awaitable that starts the filtering
|
|
212
|
+
process.
|
|
213
|
+
"""
|
|
214
|
+
|
|
215
|
+
async def worker() -> None:
|
|
216
|
+
try:
|
|
217
|
+
while True:
|
|
218
|
+
item = await inqueue.get()
|
|
219
|
+
inqueue.task_done()
|
|
220
|
+
consume(item)
|
|
221
|
+
except asyncio.QueueShutDown:
|
|
222
|
+
return
|
|
223
|
+
|
|
224
|
+
return worker()
|
|
225
|
+
|
|
226
|
+
|
|
227
|
+
def observer[T](
|
|
228
|
+
observe: Observer[T], inqueue: Queue[T], /, qsize: int = 1
|
|
229
|
+
) -> Tuple[Queue[T], Awaitable[None]]:
|
|
230
|
+
"""An observer is a task executed in the background (asynchronously) that
|
|
231
|
+
calls observer on the items from the inqueue (for side effects). The item
|
|
232
|
+
should not be modified by a call to observe. The item is put back on the
|
|
233
|
+
outqueue with the assumption that it was not modified.
|
|
234
|
+
|
|
235
|
+
Returns a tuple of (outqueue, worker). The outqueue has the remaining items
|
|
236
|
+
and the worker is an Awaitable that starts the filtering process.
|
|
237
|
+
"""
|
|
238
|
+
outqueue = Queue[T](maxsize=qsize)
|
|
239
|
+
|
|
240
|
+
async def worker() -> None:
|
|
241
|
+
try:
|
|
242
|
+
while True:
|
|
243
|
+
item = await inqueue.get()
|
|
244
|
+
inqueue.task_done()
|
|
245
|
+
observe(item)
|
|
246
|
+
await outqueue.put(item)
|
|
247
|
+
except asyncio.QueueShutDown:
|
|
248
|
+
outqueue.shutdown()
|
|
249
|
+
|
|
250
|
+
return outqueue, worker()
|
|
@@ -0,0 +1,356 @@
|
|
|
1
|
+
"""Defines the Pipeline class that builds an asynchronous processing pipeline in
|
|
2
|
+
a functional style.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
import asyncio
|
|
6
|
+
from asyncio import Queue
|
|
7
|
+
from collections.abc import Awaitable, Iterable, MutableSequence
|
|
8
|
+
from typing import List, Self, Tuple
|
|
9
|
+
|
|
10
|
+
from . import links
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class PipelineClosedException(Exception):
|
|
14
|
+
"""PipelineClosedException is raised when a pipeline operation is added after
|
|
15
|
+
the pipeline was closed. A pipeline is closed when a closing operation like
|
|
16
|
+
reduce or consume has been performed."""
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class Pipeline[T]:
|
|
20
|
+
"""Asynchronous processing pipeline.
|
|
21
|
+
|
|
22
|
+
`Pipeline` orchestrates a series of async stages chained together via queues
|
|
23
|
+
of type `asyncio.Queue`. Users create a pipeline from an existing queue, a
|
|
24
|
+
producer function, a sequence, or a generator and then chain transformations
|
|
25
|
+
via `map`, `flatmap`, `filter`, and finally terminate the pipeline with
|
|
26
|
+
`reduce`, `consume`, or `collect`.
|
|
27
|
+
|
|
28
|
+
The pipeline tracks internal worker coroutines in and ensures they are
|
|
29
|
+
awaited together. Once a terminating operation is invoked, the pipeline is
|
|
30
|
+
marked as *closed* and further stage additions raise
|
|
31
|
+
`PipelineClosedException`.
|
|
32
|
+
|
|
33
|
+
Every step that exposes a downstream queue has a `qsize` keyword parameter
|
|
34
|
+
that sets the size of the queue and defaults to `1` (since most queues are
|
|
35
|
+
either always empty or always full). Use queues of bigger sizes if your
|
|
36
|
+
operation is bursty.
|
|
37
|
+
"""
|
|
38
|
+
|
|
39
|
+
queue: Queue[T]
|
|
40
|
+
workers: List[Awaitable[None]]
|
|
41
|
+
closed: bool
|
|
42
|
+
|
|
43
|
+
def __init__(self, queue: Queue[T], workers: List[Awaitable[None]]):
|
|
44
|
+
"""Create a new `Pipeline` from an existing `asyncio.Queue`. Make sure
|
|
45
|
+
the queue is fed and properly shut down, otherwise the pipeline will not
|
|
46
|
+
terminate but wait for further input.
|
|
47
|
+
|
|
48
|
+
Args:
|
|
49
|
+
queue: The input queue that provides items to the pipeline.
|
|
50
|
+
|
|
51
|
+
The pipeline starts in an *open* state; stages can be added until a
|
|
52
|
+
terminal operation (reduce/consume/collect) closes it.
|
|
53
|
+
"""
|
|
54
|
+
self.closed = False
|
|
55
|
+
self.workers = workers
|
|
56
|
+
self.queue = queue
|
|
57
|
+
|
|
58
|
+
@classmethod
|
|
59
|
+
def from_queue(cls, inqueue: Queue[T]) -> Self:
|
|
60
|
+
"""Construct a `Pipeline` directly from an existing queue.
|
|
61
|
+
|
|
62
|
+
Args:
|
|
63
|
+
inqueue: The source `asyncio.Queue`.
|
|
64
|
+
|
|
65
|
+
Returns:
|
|
66
|
+
A new `Pipeline` instance.
|
|
67
|
+
"""
|
|
68
|
+
pipeline = cls(inqueue, list())
|
|
69
|
+
return pipeline
|
|
70
|
+
|
|
71
|
+
@classmethod
|
|
72
|
+
def from_producer(cls, func: links.Producer[T], /, qsize: int = 1) -> Self:
|
|
73
|
+
"""Create a pipeline from a producer function.
|
|
74
|
+
|
|
75
|
+
The producer closure supplies the initial, single item for the pipeline.
|
|
76
|
+
|
|
77
|
+
Args:
|
|
78
|
+
func: A zero-argument callable that returns a single item.
|
|
79
|
+
qsize: Maximum size of the internal queue (default `1`).
|
|
80
|
+
|
|
81
|
+
Returns:
|
|
82
|
+
A new `Pipeline` instance.
|
|
83
|
+
"""
|
|
84
|
+
queue, worker = links.producer(func, qsize=qsize)
|
|
85
|
+
pipeline = cls(queue, list())
|
|
86
|
+
pipeline.workers.append(worker)
|
|
87
|
+
return pipeline
|
|
88
|
+
|
|
89
|
+
@classmethod
|
|
90
|
+
def from_producer_flatten(
|
|
91
|
+
cls, func: links.IterableProducer[T], /, qsize: int = 1
|
|
92
|
+
) -> Self:
|
|
93
|
+
"""Create a pipeline from a producer function.
|
|
94
|
+
|
|
95
|
+
The producer closure supplies the initial sequence for the pipeline
|
|
96
|
+
where each item is put into the pipeline individually.
|
|
97
|
+
|
|
98
|
+
Args:
|
|
99
|
+
func: A zero-argument callable that returns a sequence.
|
|
100
|
+
qsize: Maximum size of the internal queue (default `1`).
|
|
101
|
+
|
|
102
|
+
Returns:
|
|
103
|
+
A new `Pipeline` instance.
|
|
104
|
+
|
|
105
|
+
"""
|
|
106
|
+
queue, worker = links.flatproducer(func, qsize=qsize)
|
|
107
|
+
pipeline = cls(queue, list())
|
|
108
|
+
pipeline.workers.append(worker)
|
|
109
|
+
return pipeline
|
|
110
|
+
|
|
111
|
+
@classmethod
|
|
112
|
+
def from_iterable(cls, seq: Iterable[T], /, qsize: int = 1):
|
|
113
|
+
"""Create a pipeline from an existing in-memory iterable.
|
|
114
|
+
|
|
115
|
+
Each item from the iterable is put into the pipeline individually.
|
|
116
|
+
|
|
117
|
+
Args:
|
|
118
|
+
seq: Any `collections.abc.Iterable` of items.
|
|
119
|
+
qsize: Queue size (default `1`).
|
|
120
|
+
|
|
121
|
+
Returns:
|
|
122
|
+
A new `Pipeline` instance.
|
|
123
|
+
"""
|
|
124
|
+
queue, worker = links.iterator(seq, qsize=qsize)
|
|
125
|
+
pipeline = cls(queue, list())
|
|
126
|
+
pipeline.workers.append(worker)
|
|
127
|
+
return pipeline
|
|
128
|
+
|
|
129
|
+
def map[U](self, func: links.Function[T, U], /, qsize: int = 1) -> "Pipeline[U]":
|
|
130
|
+
"""Add a mapping stage to the pipeline.
|
|
131
|
+
|
|
132
|
+
Each item from the previous stage is transformed by `func` and the
|
|
133
|
+
result is the input of the next stage in the pipeline.
|
|
134
|
+
|
|
135
|
+
Args:
|
|
136
|
+
func: Callable that maps an input item to an output item.
|
|
137
|
+
qsize: Queue size for the downstream queue (default `1`).
|
|
138
|
+
|
|
139
|
+
Returns:
|
|
140
|
+
The pipeline instance to allow method chaining.
|
|
141
|
+
|
|
142
|
+
Raises:
|
|
143
|
+
PipelineClosedException: If `map` is added to a pipeline that already
|
|
144
|
+
has a closing stage.
|
|
145
|
+
|
|
146
|
+
"""
|
|
147
|
+
|
|
148
|
+
if self.closed:
|
|
149
|
+
raise PipelineClosedException("The pipeline is already closed")
|
|
150
|
+
|
|
151
|
+
queue, worker = links.mapper(func, self.queue, qsize=qsize)
|
|
152
|
+
|
|
153
|
+
self.workers.append(worker)
|
|
154
|
+
return Pipeline[U](queue, self.workers)
|
|
155
|
+
|
|
156
|
+
def flatmap[U](self, func: links.Functor[T, U], /, qsize: int = 1) -> "Pipeline[U]":
|
|
157
|
+
"""Add a mapping stage to the pipeline.
|
|
158
|
+
|
|
159
|
+
`func` is expected to return an iterable for each input item. Each
|
|
160
|
+
element of that iterable is put individually to the downstream
|
|
161
|
+
queue.
|
|
162
|
+
|
|
163
|
+
Args:
|
|
164
|
+
func: Callable that maps an input item to a sequence of output items.
|
|
165
|
+
qsize: Queue size for the downstream queue (default `1`).
|
|
166
|
+
|
|
167
|
+
Returns:
|
|
168
|
+
The pipeline instance for method chaining.
|
|
169
|
+
|
|
170
|
+
Raises:
|
|
171
|
+
PipelineClosedException: If `flatmap` is added to a pipeline that
|
|
172
|
+
already has a closing stage.
|
|
173
|
+
|
|
174
|
+
"""
|
|
175
|
+
|
|
176
|
+
if self.closed:
|
|
177
|
+
raise PipelineClosedException("The pipeline is already closed")
|
|
178
|
+
|
|
179
|
+
queue, worker = links.flatmapper(func, self.queue, qsize=qsize)
|
|
180
|
+
|
|
181
|
+
self.workers.append(worker)
|
|
182
|
+
return Pipeline[U](queue, self.workers)
|
|
183
|
+
|
|
184
|
+
def filter(self, pred: links.Predicate[T], /, qsize: int = 1) -> Self:
|
|
185
|
+
"""Add a filter stage to the pipeline.
|
|
186
|
+
|
|
187
|
+
Only items for which `pred` returns `True` are forwarded downstream.
|
|
188
|
+
|
|
189
|
+
Args:
|
|
190
|
+
pred: Predicate callable returning `True` for items to keep.
|
|
191
|
+
qsize: Queue size for the downstream queue (default `1`).
|
|
192
|
+
|
|
193
|
+
Returns:
|
|
194
|
+
The pipeline instance for chaining.
|
|
195
|
+
|
|
196
|
+
Raises:
|
|
197
|
+
PipelineClosedException: If `filter` is added to a pipeline that
|
|
198
|
+
already has a closing stage.
|
|
199
|
+
"""
|
|
200
|
+
|
|
201
|
+
if self.closed:
|
|
202
|
+
raise PipelineClosedException("The pipeline is already closed")
|
|
203
|
+
|
|
204
|
+
queue, worker = links.filterer(pred, self.queue, qsize=qsize)
|
|
205
|
+
|
|
206
|
+
self.workers.append(worker)
|
|
207
|
+
self.queue = queue
|
|
208
|
+
return self
|
|
209
|
+
|
|
210
|
+
async def reduce(
|
|
211
|
+
self, reduce: links.Operator[T], /, initial_value: T | None = None
|
|
212
|
+
) -> T | None:
|
|
213
|
+
"""Reduce the pipeline to a single accumulated value.
|
|
214
|
+
|
|
215
|
+
Consumes items from the pipeline using `reduce` as a binary operator.
|
|
216
|
+
After reduction the pipeline is marked as closed.
|
|
217
|
+
|
|
218
|
+
`reduce` is a closing operation!
|
|
219
|
+
|
|
220
|
+
Args:
|
|
221
|
+
reduce: Binary function combining two items into one.
|
|
222
|
+
initial_value: Optional initial accumulator. If omitted the first
|
|
223
|
+
item from the queue is used as the start value.
|
|
224
|
+
|
|
225
|
+
Returns:
|
|
226
|
+
The final reduced value, or `None` if the queue was empty.
|
|
227
|
+
|
|
228
|
+
Raises:
|
|
229
|
+
PipelineClosedException: If `reduce` is added to a pipeline that
|
|
230
|
+
already has a closing stage.
|
|
231
|
+
"""
|
|
232
|
+
if self.closed:
|
|
233
|
+
raise PipelineClosedException("The pipeline is already closed")
|
|
234
|
+
|
|
235
|
+
self.closed = True
|
|
236
|
+
|
|
237
|
+
rworker: Awaitable[T | None] = links.reducer(
|
|
238
|
+
reduce, self.queue, initial_value=initial_value
|
|
239
|
+
)
|
|
240
|
+
|
|
241
|
+
results: List = await asyncio.gather(*self.workers, rworker)
|
|
242
|
+
return results[-1] # rworker holds the awaitable result of the
|
|
243
|
+
# reduction and comes last
|
|
244
|
+
|
|
245
|
+
async def consume(
|
|
246
|
+
self,
|
|
247
|
+
consume: links.Consumer[T],
|
|
248
|
+
/,
|
|
249
|
+
) -> None:
|
|
250
|
+
"""Consume items from the pipeline for side effects.
|
|
251
|
+
|
|
252
|
+
The `consume` callable is applied to each item. This is a terminal
|
|
253
|
+
operation; after calling it the pipeline is closed.
|
|
254
|
+
|
|
255
|
+
`consume` is a closing operation!
|
|
256
|
+
|
|
257
|
+
Args:
|
|
258
|
+
consume: Callable that processes each item (e.g., printing or
|
|
259
|
+
storing).
|
|
260
|
+
|
|
261
|
+
Raises:
|
|
262
|
+
PipelineClosedException: If `consume` is added to a pipeline that
|
|
263
|
+
already has a closing stage.
|
|
264
|
+
"""
|
|
265
|
+
if self.closed:
|
|
266
|
+
raise PipelineClosedException("The pipeline is already closed")
|
|
267
|
+
|
|
268
|
+
self.closed = True
|
|
269
|
+
cworker: Awaitable[None] = links.consumer(consume, self.queue)
|
|
270
|
+
|
|
271
|
+
self.workers.append(cworker)
|
|
272
|
+
await asyncio.gather(*self.workers)
|
|
273
|
+
|
|
274
|
+
def isClosed(self) -> bool:
|
|
275
|
+
"""Return `True` if a closing operation has closed the pipeline."""
|
|
276
|
+
return self.closed
|
|
277
|
+
|
|
278
|
+
def expose(self) -> Tuple[Queue[T], Awaitable[None]]:
|
|
279
|
+
"""Expose the internal queue and a combined worker.
|
|
280
|
+
|
|
281
|
+
Allows external code to await the combined workers while receiving the
|
|
282
|
+
output queue directly.
|
|
283
|
+
|
|
284
|
+
`expose` is a closing operation!
|
|
285
|
+
|
|
286
|
+
Raises:
|
|
287
|
+
PipelineClosedException: If `expose` is added to a pipeline that
|
|
288
|
+
already has a closing stage.
|
|
289
|
+
"""
|
|
290
|
+
if self.closed:
|
|
291
|
+
raise PipelineClosedException("The pipeline is already closed")
|
|
292
|
+
|
|
293
|
+
self.closed = True
|
|
294
|
+
|
|
295
|
+
# bundle workers into one
|
|
296
|
+
async def worker() -> None:
|
|
297
|
+
await asyncio.gather(*self.workers)
|
|
298
|
+
|
|
299
|
+
return self.queue, worker()
|
|
300
|
+
|
|
301
|
+
async def collect(self, container: MutableSequence[T]) -> MutableSequence[T]:
|
|
302
|
+
"""Collect all items from the pipeline into `container`.
|
|
303
|
+
|
|
304
|
+
The pipeline runs to completion, appending each yielded item to the
|
|
305
|
+
provided mutable sequence. After collection the pipeline is closed.
|
|
306
|
+
|
|
307
|
+
`collect` is a closing operation!
|
|
308
|
+
|
|
309
|
+
Args:
|
|
310
|
+
container: A mutable sequence (e.g., list) to which items are added.
|
|
311
|
+
|
|
312
|
+
Raises:
|
|
313
|
+
PipelineClosedException: If `collect` is added to a pipeline that
|
|
314
|
+
already has a closing stage.
|
|
315
|
+
"""
|
|
316
|
+
|
|
317
|
+
if self.closed:
|
|
318
|
+
raise PipelineClosedException("The pipeline is already closed")
|
|
319
|
+
self.closed = True
|
|
320
|
+
|
|
321
|
+
async def collector() -> MutableSequence[T]:
|
|
322
|
+
try:
|
|
323
|
+
while True:
|
|
324
|
+
item: T = await self.queue.get()
|
|
325
|
+
self.queue.task_done()
|
|
326
|
+
container.append(item)
|
|
327
|
+
except asyncio.QueueShutDown:
|
|
328
|
+
return container
|
|
329
|
+
|
|
330
|
+
*worker_results, seq = await asyncio.gather(*self.workers, collector())
|
|
331
|
+
return seq # seq can't be None
|
|
332
|
+
|
|
333
|
+
|
|
334
|
+
def inspect(self, observe: links.Observer[T], /, qsize: int = 1) -> Self:
|
|
335
|
+
"""Insert an observer stage that runs `observe` on each item.
|
|
336
|
+
|
|
337
|
+
Useful for side-effects like logging without modifying the
|
|
338
|
+
stream. `observe` is expected to not modify its item.
|
|
339
|
+
|
|
340
|
+
Args:
|
|
341
|
+
observe: Callable invoked with each item.
|
|
342
|
+
qsize: Queue size for the observer's downstream queue.
|
|
343
|
+
|
|
344
|
+
Raises:
|
|
345
|
+
PipelineClosedException: If `inspect` is added to a pipeline that
|
|
346
|
+
already has a closing stage.
|
|
347
|
+
"""
|
|
348
|
+
if self.closed:
|
|
349
|
+
raise PipelineClosedException("The pipeline is already closed")
|
|
350
|
+
|
|
351
|
+
outqueue, worker = links.observer(observe, self.queue, qsize=qsize)
|
|
352
|
+
|
|
353
|
+
self.workers.append(worker)
|
|
354
|
+
self.queue = outqueue
|
|
355
|
+
|
|
356
|
+
return self
|
|
File without changes
|