wool 0.1rc20__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- wool/__init__.py +122 -0
- wool/_context.py +29 -0
- wool/_protobuf/worker.py +26 -0
- wool/_resource_pool.py +376 -0
- wool/_typing.py +7 -0
- wool/_undefined.py +11 -0
- wool/_work.py +554 -0
- wool/core/__init__.py +0 -0
- wool/core/discovery/__init__.py +0 -0
- wool/core/discovery/base.py +249 -0
- wool/core/discovery/lan.py +534 -0
- wool/core/discovery/local.py +822 -0
- wool/core/loadbalancer/__init__.py +0 -0
- wool/core/loadbalancer/base.py +125 -0
- wool/core/loadbalancer/roundrobin.py +101 -0
- wool/core/protobuf/__init__.py +18 -0
- wool/core/protobuf/exception.py +3 -0
- wool/core/protobuf/task.py +11 -0
- wool/core/protobuf/task_pb2.py +42 -0
- wool/core/protobuf/task_pb2.pyi +43 -0
- wool/core/protobuf/task_pb2_grpc.py +24 -0
- wool/core/protobuf/worker.py +26 -0
- wool/core/protobuf/worker_pb2.py +53 -0
- wool/core/protobuf/worker_pb2.pyi +65 -0
- wool/core/protobuf/worker_pb2_grpc.py +141 -0
- wool/core/typing.py +22 -0
- wool/core/worker/__init__.py +0 -0
- wool/core/worker/base.py +300 -0
- wool/core/worker/connection.py +250 -0
- wool/core/worker/local.py +148 -0
- wool/core/worker/pool.py +386 -0
- wool/core/worker/process.py +249 -0
- wool/core/worker/proxy.py +427 -0
- wool/core/worker/service.py +231 -0
- wool-0.1rc20.dist-info/METADATA +463 -0
- wool-0.1rc20.dist-info/RECORD +38 -0
- wool-0.1rc20.dist-info/WHEEL +4 -0
- wool-0.1rc20.dist-info/entry_points.txt +2 -0
|
@@ -0,0 +1,822 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import asyncio
|
|
4
|
+
import atexit
|
|
5
|
+
import hashlib
|
|
6
|
+
import struct
|
|
7
|
+
import tempfile
|
|
8
|
+
from contextlib import asynccontextmanager
|
|
9
|
+
from contextlib import contextmanager
|
|
10
|
+
from multiprocessing.shared_memory import SharedMemory
|
|
11
|
+
from pathlib import Path
|
|
12
|
+
from typing import AsyncIterator
|
|
13
|
+
from typing import Callable
|
|
14
|
+
from typing import Final
|
|
15
|
+
from typing import Self
|
|
16
|
+
from uuid import UUID
|
|
17
|
+
from uuid import uuid4
|
|
18
|
+
|
|
19
|
+
import portalocker
|
|
20
|
+
from watchdog.events import FileSystemEvent
|
|
21
|
+
from watchdog.events import FileSystemEventHandler
|
|
22
|
+
from watchdog.observers import Observer
|
|
23
|
+
|
|
24
|
+
from wool._resource_pool import ResourcePool
|
|
25
|
+
from wool.core.discovery.base import Discovery
|
|
26
|
+
from wool.core.discovery.base import DiscoveryEvent
|
|
27
|
+
from wool.core.discovery.base import DiscoveryEventType
|
|
28
|
+
from wool.core.discovery.base import DiscoveryPublisherLike
|
|
29
|
+
from wool.core.discovery.base import DiscoverySubscriberLike
|
|
30
|
+
from wool.core.discovery.base import PredicateFunction
|
|
31
|
+
from wool.core.discovery.base import WorkerInfo
|
|
32
|
+
from wool.core.protobuf.worker import WorkerInfo as WorkerInfoProtobuf
|
|
33
|
+
|
|
34
|
+
REF_WIDTH: Final = 16
|
|
35
|
+
NULL_REF: Final = b"\x00" * REF_WIDTH
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class _Watchdog(FileSystemEventHandler):
|
|
39
|
+
"""Filesystem event handler for worker discovery notifications.
|
|
40
|
+
|
|
41
|
+
Monitors the notification file for modifications and sets an asyncio
|
|
42
|
+
Event to wake subscribers when publishers modify the shared memory.
|
|
43
|
+
Thread-safe for use with watchdog's observer thread.
|
|
44
|
+
|
|
45
|
+
Acquires the scan lock before setting the notification event to ensure
|
|
46
|
+
that notifications are properly synchronized with ongoing scans. This
|
|
47
|
+
prevents race conditions where a notification arrives while a scan is
|
|
48
|
+
in progress.
|
|
49
|
+
|
|
50
|
+
:param notification:
|
|
51
|
+
asyncio.Event to set when the notification file is modified.
|
|
52
|
+
:param watchdog:
|
|
53
|
+
Path to the notification file to monitor.
|
|
54
|
+
:param lock:
|
|
55
|
+
asyncio.Lock to acquire before setting the notification event.
|
|
56
|
+
:param loop:
|
|
57
|
+
Event loop where the notification lives.
|
|
58
|
+
"""
|
|
59
|
+
|
|
60
|
+
def __init__(
|
|
61
|
+
self,
|
|
62
|
+
notification: asyncio.Event,
|
|
63
|
+
watchdog: Path,
|
|
64
|
+
lock: asyncio.Lock,
|
|
65
|
+
loop: asyncio.AbstractEventLoop,
|
|
66
|
+
):
|
|
67
|
+
self._notification = notification
|
|
68
|
+
self._watchdog = watchdog
|
|
69
|
+
self._lock = lock
|
|
70
|
+
self._loop = loop
|
|
71
|
+
|
|
72
|
+
def on_modified(self, event: FileSystemEvent):
|
|
73
|
+
"""Handle file modification events.
|
|
74
|
+
|
|
75
|
+
:param event:
|
|
76
|
+
The filesystem event containing the modified file path.
|
|
77
|
+
"""
|
|
78
|
+
event_path = Path(str(event.src_path))
|
|
79
|
+
if event_path == self._watchdog:
|
|
80
|
+
# Schedule the event.set() in the event loop with lock acquired
|
|
81
|
+
# (thread-safe)
|
|
82
|
+
self._loop.call_soon_threadsafe(self._set_event_with_lock)
|
|
83
|
+
|
|
84
|
+
def _set_event_with_lock(self):
|
|
85
|
+
"""Set the notification event after acquiring the scan lock.
|
|
86
|
+
|
|
87
|
+
This ensures that the event is only set when the lock is available,
|
|
88
|
+
preventing the notification from being lost if a scan is in progress.
|
|
89
|
+
Must be called from the event loop thread.
|
|
90
|
+
"""
|
|
91
|
+
asyncio.create_task(self._async_set_event())
|
|
92
|
+
|
|
93
|
+
async def _async_set_event(self):
|
|
94
|
+
"""Async helper to acquire lock and set event."""
|
|
95
|
+
async with self._lock:
|
|
96
|
+
self._notification.set()
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
class _WorkerReference:
|
|
100
|
+
"""Reference to a worker using its UUID.
|
|
101
|
+
|
|
102
|
+
Provides both byte and unicode string representations of a worker's UUID.
|
|
103
|
+
|
|
104
|
+
:param uid:
|
|
105
|
+
The worker's UID to reference.
|
|
106
|
+
"""
|
|
107
|
+
|
|
108
|
+
__slots__ = ("_uuid",)
|
|
109
|
+
|
|
110
|
+
def __init__(self, uid: UUID):
|
|
111
|
+
self._uuid = uid
|
|
112
|
+
|
|
113
|
+
def __str__(self) -> str:
|
|
114
|
+
"""String representation (32-char hex) for :class:`SharedMemory` names.
|
|
115
|
+
|
|
116
|
+
:returns:
|
|
117
|
+
The UUID as a hex string without dashes.
|
|
118
|
+
"""
|
|
119
|
+
return _short_hash(self._uuid.hex)
|
|
120
|
+
|
|
121
|
+
def __bytes__(self) -> bytes:
|
|
122
|
+
"""Bytes representation for address space storage.
|
|
123
|
+
|
|
124
|
+
:returns:
|
|
125
|
+
The UUID as 16 bytes.
|
|
126
|
+
"""
|
|
127
|
+
return self._uuid.bytes
|
|
128
|
+
|
|
129
|
+
def __hash__(self) -> int:
|
|
130
|
+
return hash(self._uuid)
|
|
131
|
+
|
|
132
|
+
def __eq__(self, other: object) -> bool:
|
|
133
|
+
if isinstance(other, _WorkerReference):
|
|
134
|
+
return self._uuid == other._uuid
|
|
135
|
+
return NotImplemented
|
|
136
|
+
|
|
137
|
+
def __repr__(self) -> str:
|
|
138
|
+
return f"_WorkerReference({self._uuid})"
|
|
139
|
+
|
|
140
|
+
@classmethod
|
|
141
|
+
def from_bytes(cls, data: bytes) -> _WorkerReference:
|
|
142
|
+
"""Create a reference from its bytes representation.
|
|
143
|
+
|
|
144
|
+
:param data:
|
|
145
|
+
The 16-byte UUID representation.
|
|
146
|
+
:returns:
|
|
147
|
+
A new reference instance.
|
|
148
|
+
:raises ValueError:
|
|
149
|
+
If data is not 16 bytes or is NULL.
|
|
150
|
+
"""
|
|
151
|
+
if len(data) != REF_WIDTH:
|
|
152
|
+
raise ValueError(f"Expected 16 bytes, got {len(data)}")
|
|
153
|
+
if data == NULL_REF:
|
|
154
|
+
raise ValueError("Cannot create _WorkerReference from NULL bytes")
|
|
155
|
+
ref = object.__new__(cls)
|
|
156
|
+
ref._uuid = UUID(bytes=data)
|
|
157
|
+
return ref
|
|
158
|
+
|
|
159
|
+
@property
|
|
160
|
+
def uuid(self) -> UUID:
|
|
161
|
+
"""The UUID this reference points to.
|
|
162
|
+
|
|
163
|
+
:returns:
|
|
164
|
+
The UUID instance.
|
|
165
|
+
"""
|
|
166
|
+
return self._uuid
|
|
167
|
+
|
|
168
|
+
@property
|
|
169
|
+
def bytes(self) -> bytes:
|
|
170
|
+
"""The 16-byte representation for address space storage.
|
|
171
|
+
|
|
172
|
+
:returns:
|
|
173
|
+
The UUID as 16 bytes.
|
|
174
|
+
"""
|
|
175
|
+
return self._uuid.bytes
|
|
176
|
+
|
|
177
|
+
|
|
178
|
+
# public
|
|
179
|
+
class LocalDiscovery(Discovery):
|
|
180
|
+
"""Local discovery service using shared memory.
|
|
181
|
+
|
|
182
|
+
Provides worker discovery within a single machine using shared
|
|
183
|
+
memory for communication between publishers and subscribers.
|
|
184
|
+
Multiple unrelated processes can share the same discovery
|
|
185
|
+
namespace by using the same namespace identifier, enabling
|
|
186
|
+
automatic worker discovery across process boundaries.
|
|
187
|
+
|
|
188
|
+
The namespace identifies a shared memory region where worker
|
|
189
|
+
information is stored. Publishers write worker metadata to this
|
|
190
|
+
region, and subscribers read from it to discover available
|
|
191
|
+
workers. All access is synchronized using file-based locking to
|
|
192
|
+
ensure consistency.
|
|
193
|
+
|
|
194
|
+
:param namespace:
|
|
195
|
+
Unique identifier for the shared memory region. Publishers
|
|
196
|
+
and subscribers using the same namespace will see each
|
|
197
|
+
other's workers.
|
|
198
|
+
:param capacity:
|
|
199
|
+
Maximum number of workers that can be registered
|
|
200
|
+
simultaneously. Defaults to 128.
|
|
201
|
+
:param block_size:
|
|
202
|
+
Size in bytes for each worker's serialized data block.
|
|
203
|
+
Defaults to 1024.
|
|
204
|
+
|
|
205
|
+
.. note::
|
|
206
|
+
Multiple unrelated processes can create publishers and
|
|
207
|
+
subscribers with the same namespace. They will automatically
|
|
208
|
+
discover each other's workers through the shared memory
|
|
209
|
+
region.
|
|
210
|
+
|
|
211
|
+
Example usage:
|
|
212
|
+
|
|
213
|
+
Publish workers
|
|
214
|
+
|
|
215
|
+
.. code-block:: python
|
|
216
|
+
|
|
217
|
+
publisher = LocalDiscovery.Publisher("my-worker-pool")
|
|
218
|
+
async with publisher:
|
|
219
|
+
await publisher.publish("worker-added", worker_info)
|
|
220
|
+
|
|
221
|
+
Subscribe to workers
|
|
222
|
+
|
|
223
|
+
.. code-block:: python
|
|
224
|
+
|
|
225
|
+
subscriber = LocalDiscovery.Subscriber("my-worker-pool")
|
|
226
|
+
async for event in subscriber:
|
|
227
|
+
print(f"Discovered worker: {event.worker_info}")
|
|
228
|
+
"""
|
|
229
|
+
|
|
230
|
+
_namespace: Final[str]
|
|
231
|
+
|
|
232
|
+
def __init__(
|
|
233
|
+
self,
|
|
234
|
+
namespace: str | None = None,
|
|
235
|
+
*,
|
|
236
|
+
capacity: int = 128,
|
|
237
|
+
block_size: int = 1024,
|
|
238
|
+
):
|
|
239
|
+
self._namespace = namespace or f"workerpool-{uuid4()}"
|
|
240
|
+
self._capacity = capacity
|
|
241
|
+
self._block_size = block_size
|
|
242
|
+
|
|
243
|
+
def __enter__(self) -> Self:
|
|
244
|
+
size = self._capacity * 4
|
|
245
|
+
self._address_space = SharedMemory(
|
|
246
|
+
name=_short_hash(self._namespace),
|
|
247
|
+
create=True,
|
|
248
|
+
size=size,
|
|
249
|
+
)
|
|
250
|
+
assert self._address_space.buf
|
|
251
|
+
self._cleanup = atexit.register(lambda: self._address_space.unlink())
|
|
252
|
+
for i in range(size):
|
|
253
|
+
self._address_space.buf[i] = 0
|
|
254
|
+
return self
|
|
255
|
+
|
|
256
|
+
def __exit__(self, *_):
|
|
257
|
+
self._address_space.unlink()
|
|
258
|
+
atexit.unregister(self._cleanup)
|
|
259
|
+
|
|
260
|
+
@property
|
|
261
|
+
def namespace(self):
|
|
262
|
+
"""The namespace identifier for this discovery service.
|
|
263
|
+
|
|
264
|
+
:returns:
|
|
265
|
+
The namespace string.
|
|
266
|
+
"""
|
|
267
|
+
return self._namespace
|
|
268
|
+
|
|
269
|
+
@property
|
|
270
|
+
def publisher(self) -> DiscoveryPublisherLike:
|
|
271
|
+
"""A new publisher instance for this discovery service.
|
|
272
|
+
|
|
273
|
+
:returns:
|
|
274
|
+
A publisher instance for broadcasting worker events.
|
|
275
|
+
"""
|
|
276
|
+
return self.Publisher(self._namespace, block_size=self._block_size)
|
|
277
|
+
|
|
278
|
+
@property
|
|
279
|
+
def subscriber(self) -> DiscoverySubscriberLike:
|
|
280
|
+
"""The default subscriber for all worker events.
|
|
281
|
+
|
|
282
|
+
:returns:
|
|
283
|
+
A subscriber instance that receives all worker discovery
|
|
284
|
+
events.
|
|
285
|
+
"""
|
|
286
|
+
return self.subscribe()
|
|
287
|
+
|
|
288
|
+
def subscribe(
|
|
289
|
+
self,
|
|
290
|
+
filter: PredicateFunction | None = None,
|
|
291
|
+
*,
|
|
292
|
+
poll_interval: float | None = None,
|
|
293
|
+
) -> DiscoverySubscriberLike:
|
|
294
|
+
"""Create a new subscriber with optional filtering.
|
|
295
|
+
|
|
296
|
+
:param filter:
|
|
297
|
+
Optional predicate function to filter workers. Only workers
|
|
298
|
+
for which the predicate returns True will be included in
|
|
299
|
+
events.
|
|
300
|
+
:param poll_interval:
|
|
301
|
+
Optional interval in seconds between shared memory polls.
|
|
302
|
+
If not specified, uses filesystem notifications for
|
|
303
|
+
efficient updates.
|
|
304
|
+
:returns:
|
|
305
|
+
A subscriber instance that receives filtered worker
|
|
306
|
+
discovery events.
|
|
307
|
+
"""
|
|
308
|
+
return self.Subscriber(self._namespace, filter, poll_interval=poll_interval)
|
|
309
|
+
|
|
310
|
+
class Publisher:
|
|
311
|
+
"""Publisher for broadcasting worker discovery events.
|
|
312
|
+
|
|
313
|
+
Publishes worker :class:`discovery events
|
|
314
|
+
<~wool.DiscoveryEvent>` to a shared memory region where
|
|
315
|
+
subscribers can discover them. Multiple publishers in different
|
|
316
|
+
processes can safely write to the same namespace using
|
|
317
|
+
cross-platform file locking for synchronization.
|
|
318
|
+
|
|
319
|
+
:param namespace:
|
|
320
|
+
The namespace identifier for the shared memory region.
|
|
321
|
+
:param block_size:
|
|
322
|
+
Size in bytes for worker metadata storage blocks. Defaults
|
|
323
|
+
to 512 bytes, which accommodates typical worker
|
|
324
|
+
information including tags and extra metadata.
|
|
325
|
+
:raises ValueError:
|
|
326
|
+
If block_size is negative.
|
|
327
|
+
"""
|
|
328
|
+
|
|
329
|
+
_block_size: int
|
|
330
|
+
_cleanups: dict[str, Callable]
|
|
331
|
+
_namespace: Final[str]
|
|
332
|
+
_shared_memory_pool: ResourcePool[SharedMemory]
|
|
333
|
+
|
|
334
|
+
def __init__(self, namespace: str, *, block_size: int = 512):
|
|
335
|
+
if block_size < 0:
|
|
336
|
+
raise ValueError("Block size must be positive")
|
|
337
|
+
self._namespace = namespace
|
|
338
|
+
self._block_size = block_size
|
|
339
|
+
self._cleanups = {}
|
|
340
|
+
self._shared_memory_pool = ResourcePool(
|
|
341
|
+
factory=self._shared_memory_factory,
|
|
342
|
+
finalizer=self._shared_memory_finalizer,
|
|
343
|
+
ttl=0,
|
|
344
|
+
)
|
|
345
|
+
|
|
346
|
+
async def __aenter__(self) -> Self:
|
|
347
|
+
await self._shared_memory_pool.__aenter__()
|
|
348
|
+
return self
|
|
349
|
+
|
|
350
|
+
async def __aexit__(self, *args):
|
|
351
|
+
await self._shared_memory_pool.__aexit__(*args)
|
|
352
|
+
|
|
353
|
+
@property
|
|
354
|
+
def namespace(self):
|
|
355
|
+
"""The namespace identifier for this publisher.
|
|
356
|
+
|
|
357
|
+
:returns:
|
|
358
|
+
The namespace string.
|
|
359
|
+
"""
|
|
360
|
+
return self._namespace
|
|
361
|
+
|
|
362
|
+
async def publish(self, type: DiscoveryEventType, worker_info: WorkerInfo):
|
|
363
|
+
"""Publish a worker discovery event.
|
|
364
|
+
|
|
365
|
+
Writes the event to shared memory where subscribers can
|
|
366
|
+
discover it. The operation is synchronized across processes
|
|
367
|
+
using file locking to ensure consistency. After publishing,
|
|
368
|
+
touches a notification file to wake subscribers via
|
|
369
|
+
filesystem events.
|
|
370
|
+
|
|
371
|
+
:param type:
|
|
372
|
+
The type of discovery event.
|
|
373
|
+
:param worker_info:
|
|
374
|
+
Worker information to publish.
|
|
375
|
+
:raises RuntimeError:
|
|
376
|
+
If an unexpected event type is provided or if the
|
|
377
|
+
shared memory is not properly initialized.
|
|
378
|
+
"""
|
|
379
|
+
async with _lock(self._namespace):
|
|
380
|
+
with _shared_memory(_short_hash(self._namespace)) as address_space:
|
|
381
|
+
match type:
|
|
382
|
+
case "worker-added":
|
|
383
|
+
await self._add(worker_info, address_space)
|
|
384
|
+
case "worker-dropped":
|
|
385
|
+
await self._drop(worker_info, address_space)
|
|
386
|
+
case "worker-updated":
|
|
387
|
+
await self._update(worker_info, address_space)
|
|
388
|
+
case _:
|
|
389
|
+
raise RuntimeError(
|
|
390
|
+
f"Unexpected discovery event type: {type}"
|
|
391
|
+
)
|
|
392
|
+
|
|
393
|
+
# Notify subscribers by touching the notification file
|
|
394
|
+
_watchdog_path(self._namespace).touch()
|
|
395
|
+
|
|
396
|
+
async def _add(self, worker_info: WorkerInfo, address_space: SharedMemory):
|
|
397
|
+
"""Register a worker by adding it to shared memory.
|
|
398
|
+
|
|
399
|
+
:param worker_info:
|
|
400
|
+
The worker to publish to the namespace's shared memory.
|
|
401
|
+
:raises RuntimeError:
|
|
402
|
+
If the shared memory region is not properly initialized or no
|
|
403
|
+
slots are available.
|
|
404
|
+
:raises ValueError:
|
|
405
|
+
If the worker UID is not specified.
|
|
406
|
+
"""
|
|
407
|
+
if address_space.buf is None:
|
|
408
|
+
raise RuntimeError("Registrar service not properly initialized")
|
|
409
|
+
|
|
410
|
+
ref = _WorkerReference(worker_info.uid)
|
|
411
|
+
serialized = worker_info.to_protobuf().SerializeToString()
|
|
412
|
+
size = len(serialized)
|
|
413
|
+
|
|
414
|
+
for i in range(0, len(address_space.buf), REF_WIDTH):
|
|
415
|
+
slot = struct.unpack_from("16s", address_space.buf, i)[0]
|
|
416
|
+
if slot == NULL_REF:
|
|
417
|
+
try:
|
|
418
|
+
memory_block = await self._shared_memory_pool.acquire(str(ref))
|
|
419
|
+
assert memory_block.buf is not None
|
|
420
|
+
struct.pack_into(
|
|
421
|
+
f"I{size}s", memory_block.buf, 0, size, serialized
|
|
422
|
+
)
|
|
423
|
+
struct.pack_into("16s", address_space.buf, i, ref.bytes)
|
|
424
|
+
except Exception:
|
|
425
|
+
await self._drop(worker_info, address_space)
|
|
426
|
+
raise
|
|
427
|
+
break
|
|
428
|
+
else:
|
|
429
|
+
raise RuntimeError("No available slots in shared memory registrar")
|
|
430
|
+
|
|
431
|
+
async def _drop(self, worker_info: WorkerInfo, address_space: SharedMemory):
|
|
432
|
+
"""Unregister a worker by removing it from shared memory.
|
|
433
|
+
|
|
434
|
+
:param worker_info:
|
|
435
|
+
The worker to unpublish from the namespace's shared memory.
|
|
436
|
+
:raises RuntimeError:
|
|
437
|
+
If the registrar service is not properly initialized.
|
|
438
|
+
"""
|
|
439
|
+
if address_space.buf is None:
|
|
440
|
+
raise RuntimeError("Registrar service not properly initialized")
|
|
441
|
+
|
|
442
|
+
target_ref = _WorkerReference(worker_info.uid)
|
|
443
|
+
|
|
444
|
+
for i in range(0, len(address_space.buf), REF_WIDTH):
|
|
445
|
+
slot = struct.unpack_from("16s", address_space.buf, i)[0]
|
|
446
|
+
if slot != NULL_REF:
|
|
447
|
+
ref = _WorkerReference.from_bytes(slot)
|
|
448
|
+
if ref == target_ref:
|
|
449
|
+
struct.pack_into("16s", address_space.buf, i, NULL_REF)
|
|
450
|
+
await self._shared_memory_pool.release(str(ref))
|
|
451
|
+
break
|
|
452
|
+
|
|
453
|
+
async def _update(self, worker_info: WorkerInfo, address_space: SharedMemory):
|
|
454
|
+
"""Update a worker's properties in shared memory.
|
|
455
|
+
|
|
456
|
+
:param worker_info:
|
|
457
|
+
The updated worker to publish to the namespace's shared memory.
|
|
458
|
+
:raises RuntimeError:
|
|
459
|
+
If the registrar service is not properly initialized.
|
|
460
|
+
:raises KeyError:
|
|
461
|
+
If the worker is not found in the address space.
|
|
462
|
+
"""
|
|
463
|
+
if address_space.buf is None:
|
|
464
|
+
raise RuntimeError("Registrar service not properly initialized")
|
|
465
|
+
|
|
466
|
+
target_ref = _WorkerReference(worker_info.uid)
|
|
467
|
+
serialized = worker_info.to_protobuf().SerializeToString()
|
|
468
|
+
size = len(serialized)
|
|
469
|
+
|
|
470
|
+
for i in range(0, len(address_space.buf), REF_WIDTH):
|
|
471
|
+
slot = struct.unpack_from("16s", address_space.buf, i)[0]
|
|
472
|
+
if slot != NULL_REF:
|
|
473
|
+
ref = _WorkerReference.from_bytes(slot)
|
|
474
|
+
if ref == target_ref:
|
|
475
|
+
memory_block = await self._shared_memory_pool.acquire(str(ref))
|
|
476
|
+
assert memory_block.buf is not None
|
|
477
|
+
# Save prior state before updating
|
|
478
|
+
prior_size = struct.unpack_from("I", memory_block.buf, 0)[0]
|
|
479
|
+
prior_serialized = struct.unpack_from(
|
|
480
|
+
f"{prior_size}s", memory_block.buf, 4
|
|
481
|
+
)[0]
|
|
482
|
+
try:
|
|
483
|
+
struct.pack_into(
|
|
484
|
+
f"I{size}s", memory_block.buf, 0, size, serialized
|
|
485
|
+
)
|
|
486
|
+
except Exception:
|
|
487
|
+
# Restore prior state on failure
|
|
488
|
+
struct.pack_into(
|
|
489
|
+
f"I{prior_size}s",
|
|
490
|
+
memory_block.buf,
|
|
491
|
+
0,
|
|
492
|
+
prior_size,
|
|
493
|
+
prior_serialized,
|
|
494
|
+
)
|
|
495
|
+
raise
|
|
496
|
+
return
|
|
497
|
+
|
|
498
|
+
# Worker not found in address space
|
|
499
|
+
raise KeyError(f"Worker {worker_info.uid} not found in address space")
|
|
500
|
+
|
|
501
|
+
def _shared_memory_factory(self, name: str):
|
|
502
|
+
"""Create a new shared memory block for worker metadata storage.
|
|
503
|
+
|
|
504
|
+
Creates a shared memory region with the specified name and
|
|
505
|
+
registers an atexit handler to ensure cleanup on process
|
|
506
|
+
termination. Used by the resource pool to allocate memory blocks
|
|
507
|
+
for individual worker metadata.
|
|
508
|
+
|
|
509
|
+
:param name:
|
|
510
|
+
The name for the shared memory block (typically a worker UUID
|
|
511
|
+
hex string).
|
|
512
|
+
:returns:
|
|
513
|
+
A new SharedMemory instance.
|
|
514
|
+
"""
|
|
515
|
+
shared_memory = SharedMemory(
|
|
516
|
+
name=name,
|
|
517
|
+
create=True,
|
|
518
|
+
size=self._block_size,
|
|
519
|
+
)
|
|
520
|
+
|
|
521
|
+
def cleanup():
|
|
522
|
+
try:
|
|
523
|
+
shared_memory.unlink()
|
|
524
|
+
except OSError:
|
|
525
|
+
pass
|
|
526
|
+
|
|
527
|
+
self._cleanups[name] = atexit.register(cleanup)
|
|
528
|
+
return shared_memory
|
|
529
|
+
|
|
530
|
+
def _shared_memory_finalizer(self, shared_memory: SharedMemory):
|
|
531
|
+
"""Clean up a shared memory block when released from the pool.
|
|
532
|
+
|
|
533
|
+
Unlinks the shared memory region and unregisters the atexit
|
|
534
|
+
handler. Errors during unlink are silently ignored to handle
|
|
535
|
+
Windows platforms where unlink may fail if other processes still
|
|
536
|
+
have the memory file open.
|
|
537
|
+
|
|
538
|
+
:param shared_memory:
|
|
539
|
+
The SharedMemory instance to finalize.
|
|
540
|
+
"""
|
|
541
|
+
try:
|
|
542
|
+
shared_memory.unlink()
|
|
543
|
+
except OSError:
|
|
544
|
+
pass
|
|
545
|
+
atexit.unregister(self._cleanups.pop(shared_memory.name))
|
|
546
|
+
|
|
547
|
+
class Subscriber:
|
|
548
|
+
"""Subscriber for receiving worker discovery events.
|
|
549
|
+
|
|
550
|
+
Subscribes to worker :class:`discovery events <~wool.DiscoveryEvent>`
|
|
551
|
+
from a shared memory region, monitoring for changes via filesystem
|
|
552
|
+
notifications and yielding events as workers are added, updated, or
|
|
553
|
+
dropped. Multiple subscribers in different processes can read from
|
|
554
|
+
the same namespace independently.
|
|
555
|
+
|
|
556
|
+
Uses watchdog to monitor a notification file that publishers touch
|
|
557
|
+
when modifying the shared memory, providing near-instant notification
|
|
558
|
+
of changes. Falls back to periodic polling if notifications are
|
|
559
|
+
delayed or missed.
|
|
560
|
+
|
|
561
|
+
:param namespace:
|
|
562
|
+
The namespace identifier for the shared memory region.
|
|
563
|
+
:param filter:
|
|
564
|
+
Optional predicate function to filter workers. Only workers for
|
|
565
|
+
which the predicate returns True will be included in events.
|
|
566
|
+
:param poll_interval:
|
|
567
|
+
Maximum polling interval in seconds for when filesystem
|
|
568
|
+
notifications are delayed or missed.
|
|
569
|
+
"""
|
|
570
|
+
|
|
571
|
+
_filter: Final[PredicateFunction | None]
|
|
572
|
+
_namespace: Final[str]
|
|
573
|
+
_poll_interval: Final[float | None]
|
|
574
|
+
|
|
575
|
+
def __init__(
|
|
576
|
+
self,
|
|
577
|
+
namespace: str,
|
|
578
|
+
filter: PredicateFunction | None = None,
|
|
579
|
+
*,
|
|
580
|
+
poll_interval: float | None = None,
|
|
581
|
+
):
|
|
582
|
+
self._namespace = namespace
|
|
583
|
+
self._filter = filter
|
|
584
|
+
if poll_interval is not None and poll_interval < 0:
|
|
585
|
+
raise ValueError(f"Expected positive poll interval, got {poll_interval}")
|
|
586
|
+
self._poll_interval = poll_interval
|
|
587
|
+
|
|
588
|
+
def __reduce__(self):
|
|
589
|
+
return type(self), (self._namespace, self._filter)
|
|
590
|
+
|
|
591
|
+
def __aiter__(self) -> AsyncIterator[DiscoveryEvent]:
|
|
592
|
+
return self._event_stream(self._filter)
|
|
593
|
+
|
|
594
|
+
@property
|
|
595
|
+
def namespace(self):
|
|
596
|
+
"""The namespace identifier for this subscriber."""
|
|
597
|
+
return self._namespace
|
|
598
|
+
|
|
599
|
+
async def _event_stream(
|
|
600
|
+
self, filter: PredicateFunction | None = None
|
|
601
|
+
) -> AsyncIterator[DiscoveryEvent]:
|
|
602
|
+
"""Monitor shared memory for worker changes via filesystem notifications.
|
|
603
|
+
|
|
604
|
+
Sets up a watchdog filesystem observer to monitor the notification
|
|
605
|
+
file for modifications. When publishers touch the file (after
|
|
606
|
+
updating shared memory), the observer triggers scanning of the
|
|
607
|
+
shared memory address space. Falls back to periodic polling in
|
|
608
|
+
case notifications are delayed or missed.
|
|
609
|
+
|
|
610
|
+
:param filter:
|
|
611
|
+
Optional predicate function to filter workers. Only workers for
|
|
612
|
+
which the predicate returns True will be included in events.
|
|
613
|
+
:yields:
|
|
614
|
+
Discovery events as changes are detected in shared memory.
|
|
615
|
+
"""
|
|
616
|
+
cached_workers: dict[str, WorkerInfo] = {}
|
|
617
|
+
notification = asyncio.Event()
|
|
618
|
+
lock = asyncio.Lock()
|
|
619
|
+
loop = asyncio.get_running_loop()
|
|
620
|
+
if not (watchdog := _watchdog_path(self._namespace)).exists():
|
|
621
|
+
watchdog.touch()
|
|
622
|
+
handler = _Watchdog(notification, watchdog, lock, loop)
|
|
623
|
+
observer = Observer()
|
|
624
|
+
observer.schedule(handler, path=str(watchdog.parent), recursive=False)
|
|
625
|
+
observer.start()
|
|
626
|
+
|
|
627
|
+
try:
|
|
628
|
+
with _shared_memory(_short_hash(self._namespace)) as address_space:
|
|
629
|
+
assert address_space.buf is not None
|
|
630
|
+
|
|
631
|
+
while True:
|
|
632
|
+
async with lock:
|
|
633
|
+
notification.clear()
|
|
634
|
+
discovered_workers: dict[str, WorkerInfo] = {}
|
|
635
|
+
for i in range(0, len(address_space.buf), REF_WIDTH):
|
|
636
|
+
slot = struct.unpack_from("16s", address_space.buf, i)[0]
|
|
637
|
+
if slot != NULL_REF:
|
|
638
|
+
ref = _WorkerReference.from_bytes(slot)
|
|
639
|
+
worker_info = self._deserialize_worker_info(str(ref))
|
|
640
|
+
if filter is None or filter(worker_info):
|
|
641
|
+
discovered_workers[str(worker_info.uid)] = (
|
|
642
|
+
worker_info
|
|
643
|
+
)
|
|
644
|
+
|
|
645
|
+
for event in self._diff(cached_workers, discovered_workers):
|
|
646
|
+
yield event
|
|
647
|
+
try:
|
|
648
|
+
await asyncio.wait_for(
|
|
649
|
+
notification.wait(), timeout=self._poll_interval
|
|
650
|
+
)
|
|
651
|
+
except asyncio.TimeoutError:
|
|
652
|
+
pass
|
|
653
|
+
finally:
|
|
654
|
+
observer.stop()
|
|
655
|
+
observer.join()
|
|
656
|
+
|
|
657
|
+
def _deserialize_worker_info(self, ref: str):
|
|
658
|
+
"""Load and deserialize worker metadata from shared memory.
|
|
659
|
+
|
|
660
|
+
Opens the shared memory block identified by the reference string
|
|
661
|
+
(worker UUID hex), reads the size header and serialized protobuf
|
|
662
|
+
data, and reconstructs the WorkerInfo instance.
|
|
663
|
+
|
|
664
|
+
:param ref:
|
|
665
|
+
The worker reference string (UUID hex) identifying the shared
|
|
666
|
+
memory block containing the worker's metadata.
|
|
667
|
+
:returns:
|
|
668
|
+
The deserialized WorkerInfo instance.
|
|
669
|
+
"""
|
|
670
|
+
with _shared_memory(ref) as memory_block:
|
|
671
|
+
assert memory_block.buf is not None
|
|
672
|
+
size = struct.unpack_from("I", memory_block.buf, 0)[0]
|
|
673
|
+
serialized = struct.unpack_from(f"{size}s", memory_block.buf, 4)[0]
|
|
674
|
+
protobuf = WorkerInfoProtobuf.FromString(serialized)
|
|
675
|
+
return WorkerInfo.from_protobuf(protobuf)
|
|
676
|
+
|
|
677
|
+
def _diff(
|
|
678
|
+
self,
|
|
679
|
+
cached_workers: dict[str, WorkerInfo],
|
|
680
|
+
discovered_workers: dict[str, WorkerInfo],
|
|
681
|
+
):
|
|
682
|
+
"""Detect and emit events for worker changes.
|
|
683
|
+
|
|
684
|
+
Performs a three-way comparison between the cached worker state and
|
|
685
|
+
the newly discovered workers, identifying which workers have been
|
|
686
|
+
added, dropped, or updated. Updates the cache in-place and yields
|
|
687
|
+
appropriate discovery events for each change.
|
|
688
|
+
|
|
689
|
+
:param cached_workers:
|
|
690
|
+
Dictionary of previously discovered workers (UID string ->
|
|
691
|
+
WorkerInfo). Modified in-place to reflect current state.
|
|
692
|
+
:param discovered_workers:
|
|
693
|
+
Dictionary of workers found in the current scan (UID string ->
|
|
694
|
+
WorkerInfo).
|
|
695
|
+
:yields:
|
|
696
|
+
Discovery events for each detected change (worker-added,
|
|
697
|
+
worker-dropped, worker-updated).
|
|
698
|
+
"""
|
|
699
|
+
|
|
700
|
+
# Identify added workers
|
|
701
|
+
for uid in set(discovered_workers) - set(cached_workers):
|
|
702
|
+
cached_workers[uid] = discovered_workers[uid]
|
|
703
|
+
event = DiscoveryEvent(
|
|
704
|
+
type="worker-added", worker_info=discovered_workers[uid]
|
|
705
|
+
)
|
|
706
|
+
yield event
|
|
707
|
+
|
|
708
|
+
# Identify removed workers
|
|
709
|
+
for uid in set(cached_workers) - set(discovered_workers):
|
|
710
|
+
discovered_worker = cached_workers.pop(uid)
|
|
711
|
+
event = DiscoveryEvent(
|
|
712
|
+
type="worker-dropped", worker_info=discovered_worker
|
|
713
|
+
)
|
|
714
|
+
yield event
|
|
715
|
+
|
|
716
|
+
# Identify updated workers
|
|
717
|
+
for uid in set(cached_workers) & set(discovered_workers):
|
|
718
|
+
cached_workers[uid] = discovered_workers[uid]
|
|
719
|
+
event = DiscoveryEvent(
|
|
720
|
+
type="worker-updated", worker_info=discovered_workers[uid]
|
|
721
|
+
)
|
|
722
|
+
yield event
|
|
723
|
+
|
|
724
|
+
|
|
725
|
+
def _short_hash(s: str, n: int = 30) -> str:
|
|
726
|
+
"""Create a shortened hash of a string for use as a system identifier.
|
|
727
|
+
|
|
728
|
+
Generates a SHA-256 hash of the input string and returns the first n
|
|
729
|
+
characters of a URL-safe base64 encoding. This encoding provides 50% more
|
|
730
|
+
entropy than hexadecimal in the same space (180 bits vs 120 bits for 30
|
|
731
|
+
chars). Used to create platform-safe names for shared memory regions and
|
|
732
|
+
lock files that fit within system limits (31 chars on macOS, 255 on Linux).
|
|
733
|
+
|
|
734
|
+
:param s:
|
|
735
|
+
The string to abbreviate (typically a namespace identifier).
|
|
736
|
+
:param n:
|
|
737
|
+
Number of base64 characters to return. Defaults to 30 for macOS
|
|
738
|
+
compatibility.
|
|
739
|
+
:returns:
|
|
740
|
+
The first n characters of the URL-safe base64-encoded SHA-256 hash.
|
|
741
|
+
Uses character set: A-Za-z0-9-_
|
|
742
|
+
"""
|
|
743
|
+
import base64
|
|
744
|
+
|
|
745
|
+
hash_bytes = hashlib.sha256(s.encode()).digest()
|
|
746
|
+
# URL-safe base64 encoding (replaces + with -, / with _)
|
|
747
|
+
b64_str = base64.urlsafe_b64encode(hash_bytes).decode("utf-8")
|
|
748
|
+
# Remove padding characters and truncate to n chars
|
|
749
|
+
return b64_str.rstrip("=")[:n]
|
|
750
|
+
|
|
751
|
+
|
|
752
|
+
@contextmanager
|
|
753
|
+
def _shared_memory(name):
|
|
754
|
+
"""Open an existing shared memory region by name.
|
|
755
|
+
|
|
756
|
+
Context manager that opens a shared memory region for reading or writing
|
|
757
|
+
and ensures it is properly closed on exit. Does not create new memory
|
|
758
|
+
regions (use SharedMemory with create=True for that).
|
|
759
|
+
|
|
760
|
+
:param name:
|
|
761
|
+
The name of the shared memory region to open.
|
|
762
|
+
:yields:
|
|
763
|
+
An open SharedMemory instance.
|
|
764
|
+
|
|
765
|
+
.. note::
|
|
766
|
+
Close errors are silently ignored to handle cases where the memory
|
|
767
|
+
region has been unlinked by another process.
|
|
768
|
+
"""
|
|
769
|
+
shared_memory = SharedMemory(name=name)
|
|
770
|
+
try:
|
|
771
|
+
yield shared_memory
|
|
772
|
+
finally:
|
|
773
|
+
try:
|
|
774
|
+
shared_memory.close()
|
|
775
|
+
except Exception:
|
|
776
|
+
pass
|
|
777
|
+
|
|
778
|
+
|
|
779
|
+
@asynccontextmanager
|
|
780
|
+
async def _lock(namespace: str):
|
|
781
|
+
"""Acquire an exclusive lock for the address space identified by namespace.
|
|
782
|
+
|
|
783
|
+
Uses cross-platform file locking (via portalocker) to synchronize access
|
|
784
|
+
across unrelated processes that may be publishing to the same shared
|
|
785
|
+
memory region. Works on Windows, Linux, and macOS.
|
|
786
|
+
|
|
787
|
+
Uses non-blocking lock attempts with async sleep to avoid blocking the
|
|
788
|
+
event loop while waiting for lock acquisition. Retries every 1ms until
|
|
789
|
+
the lock is acquired.
|
|
790
|
+
|
|
791
|
+
:param namespace:
|
|
792
|
+
The namespace identifying the shared memory region to lock.
|
|
793
|
+
"""
|
|
794
|
+
lock_name = _short_hash(namespace)
|
|
795
|
+
lock_path = Path(tempfile.gettempdir()) / f"wool-lock-{lock_name}"
|
|
796
|
+
|
|
797
|
+
with open(lock_path, "w") as lock_file:
|
|
798
|
+
while True:
|
|
799
|
+
try:
|
|
800
|
+
portalocker.lock(lock_file, portalocker.LOCK_EX | portalocker.LOCK_NB)
|
|
801
|
+
break
|
|
802
|
+
except portalocker.LockException:
|
|
803
|
+
await asyncio.sleep(0)
|
|
804
|
+
|
|
805
|
+
try:
|
|
806
|
+
yield
|
|
807
|
+
finally:
|
|
808
|
+
portalocker.unlock(lock_file)
|
|
809
|
+
|
|
810
|
+
|
|
811
|
+
def _watchdog_path(namespace: str) -> Path:
|
|
812
|
+
"""Get the path to the notification file for a namespace.
|
|
813
|
+
|
|
814
|
+
Returns the path to a temporary file that publishers touch when modifying
|
|
815
|
+
the shared memory region, signaling subscribers to scan for changes.
|
|
816
|
+
|
|
817
|
+
:param namespace:
|
|
818
|
+
The namespace identifying the shared memory region.
|
|
819
|
+
:returns:
|
|
820
|
+
Path to the notification file for this namespace.
|
|
821
|
+
"""
|
|
822
|
+
return Path(tempfile.gettempdir()) / f"wool-notify-{namespace}"
|