tachyon-ipc 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tachyon_ipc-0.1.0/MANIFEST.in +10 -0
- tachyon_ipc-0.1.0/PKG-INFO +281 -0
- tachyon_ipc-0.1.0/README.md +262 -0
- tachyon_ipc-0.1.0/_core_local/include/tachyon/arena.hpp +161 -0
- tachyon_ipc-0.1.0/_core_local/include/tachyon/concepts.hpp +8 -0
- tachyon_ipc-0.1.0/_core_local/include/tachyon/shm.hpp +66 -0
- tachyon_ipc-0.1.0/_core_local/include/tachyon/transport.hpp +47 -0
- tachyon_ipc-0.1.0/_core_local/include/tachyon.h +105 -0
- tachyon_ipc-0.1.0/_core_local/include/tachyon.hpp +45 -0
- tachyon_ipc-0.1.0/_core_local/src/arena.cpp +450 -0
- tachyon_ipc-0.1.0/_core_local/src/shm.cpp +110 -0
- tachyon_ipc-0.1.0/_core_local/src/tachyon_c.cpp +443 -0
- tachyon_ipc-0.1.0/_core_local/src/transport_uds.cpp +154 -0
- tachyon_ipc-0.1.0/_dlpack_local/dlpack/dlpack.h +647 -0
- tachyon_ipc-0.1.0/benchmark/bench_ipc.py +117 -0
- tachyon_ipc-0.1.0/pyproject.toml +29 -0
- tachyon_ipc-0.1.0/setup.cfg +4 -0
- tachyon_ipc-0.1.0/setup.py +78 -0
- tachyon_ipc-0.1.0/src/tachyon/__init__.py +24 -0
- tachyon_ipc-0.1.0/src/tachyon/__init__.pyi +101 -0
- tachyon_ipc-0.1.0/src/tachyon/_tachyon.cpp +1210 -0
- tachyon_ipc-0.1.0/src/tachyon/_tachyon.pyi +122 -0
- tachyon_ipc-0.1.0/src/tachyon/bus.py +100 -0
- tachyon_ipc-0.1.0/src/tachyon/message.py +8 -0
- tachyon_ipc-0.1.0/src/tachyon/py.typed +0 -0
- tachyon_ipc-0.1.0/src/tachyon_ipc.egg-info/PKG-INFO +281 -0
- tachyon_ipc-0.1.0/src/tachyon_ipc.egg-info/SOURCES.txt +29 -0
- tachyon_ipc-0.1.0/src/tachyon_ipc.egg-info/dependency_links.txt +1 -0
- tachyon_ipc-0.1.0/src/tachyon_ipc.egg-info/not-zip-safe +1 -0
- tachyon_ipc-0.1.0/src/tachyon_ipc.egg-info/top_level.txt +1 -0
- tachyon_ipc-0.1.0/tests/test_bus.py +100 -0
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
include README.md
|
|
2
|
+
include pyproject.toml
|
|
3
|
+
include setup.py
|
|
4
|
+
include src/tachyon/py.typed
|
|
5
|
+
include src/tachyon/*.pyi
|
|
6
|
+
recursive-include src/tachyon *.cpp
|
|
7
|
+
recursive-include _core_local *.cpp *.h *.hpp
|
|
8
|
+
recursive-include _dlpack_local *.h
|
|
9
|
+
recursive-include tests *.py
|
|
10
|
+
recursive-include benchmark *.py
|
|
@@ -0,0 +1,281 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: tachyon-ipc
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Low-latency bare-metal IPC — SPSC ring buffer over POSIX shared memory
|
|
5
|
+
Author: Riyane El Qoqui
|
|
6
|
+
License-Expression: Apache-2.0
|
|
7
|
+
Project-URL: github, https://github.com/riyaneel/tachyon
|
|
8
|
+
Classifier: Operating System :: POSIX :: Linux
|
|
9
|
+
Classifier: Operating System :: MacOS
|
|
10
|
+
Classifier: Programming Language :: C++
|
|
11
|
+
Classifier: Programming Language :: Python :: 3 :: Only
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.14
|
|
17
|
+
Requires-Python: >=3.10
|
|
18
|
+
Description-Content-Type: text/markdown
|
|
19
|
+
|
|
20
|
+
# Tachyon
|
|
21
|
+
|
|
22
|
+
[](https://github.com/riyaneel/tachyon/actions/workflows/ci.yml)
|
|
23
|
+
[](https://pypi.org/project/tachyon-ipc/)
|
|
24
|
+
[](https://crates.io/crates/tachyon-ipc)
|
|
25
|
+
[](./LICENSE)
|
|
26
|
+
|
|
27
|
+
Tachyon is a bare-metal, lock-free IPC primitive. Strictly-bounded SPSC ring
|
|
28
|
+
buffer over POSIX shared memory, with zero-copy bindings for Python, Rust,
|
|
29
|
+
and C++.
|
|
30
|
+
|
|
31
|
+
---
|
|
32
|
+
|
|
33
|
+
## Install
|
|
34
|
+
|
|
35
|
+
**Python** — compiles the C++ core at install time, requires GCC 14+ or Clang 17+:
|
|
36
|
+
|
|
37
|
+
```bash
|
|
38
|
+
pip install tachyon-ipc
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
**Rust:**
|
|
42
|
+
|
|
43
|
+
```bash
|
|
44
|
+
cargo add tachyon-ipc
|
|
45
|
+
```
|
|
46
|
+
|
|
47
|
+
**C++ (CMake FetchContent):**
|
|
48
|
+
|
|
49
|
+
```cmake
|
|
50
|
+
include(FetchContent)
|
|
51
|
+
|
|
52
|
+
FetchContent_Declare(tachyon
|
|
53
|
+
GIT_REPOSITORY https://github.com/riyaneel/tachyon.git
|
|
54
|
+
GIT_TAG v0.1.0
|
|
55
|
+
)
|
|
56
|
+
FetchContent_GetProperties(tachyon)
|
|
57
|
+
if (NOT tachyon_POPULATED)
|
|
58
|
+
FetchContent_Populate(tachyon)
|
|
59
|
+
add_subdirectory(${tachyon_SOURCE_DIR}/core ${tachyon_BINARY_DIR}/tachyon-core)
|
|
60
|
+
endif ()
|
|
61
|
+
|
|
62
|
+
target_link_libraries(my_app PRIVATE tachyon)
|
|
63
|
+
```
|
|
64
|
+
|
|
65
|
+
---
|
|
66
|
+
|
|
67
|
+
## Quickstart
|
|
68
|
+
|
|
69
|
+
### Python — Standard API
|
|
70
|
+
|
|
71
|
+
```python
|
|
72
|
+
import threading
|
|
73
|
+
import tachyon
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def server():
|
|
77
|
+
with tachyon.Bus.listen("/tmp/demo.sock", 1 << 16) as bus:
|
|
78
|
+
msg = next(iter(bus))
|
|
79
|
+
print(f"received type_id={msg.type_id} data={msg.data}")
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
t = threading.Thread(target=server)
|
|
83
|
+
t.start()
|
|
84
|
+
|
|
85
|
+
with tachyon.Bus.connect("/tmp/demo.sock") as bus:
|
|
86
|
+
bus.send(b"hello tachyon", type_id=1)
|
|
87
|
+
|
|
88
|
+
t.join()
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
### Python — Zero-Copy
|
|
92
|
+
|
|
93
|
+
```python
|
|
94
|
+
import threading
|
|
95
|
+
import tachyon
|
|
96
|
+
|
|
97
|
+
payload = b"zero_copy_payload"
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def server():
|
|
101
|
+
with tachyon.Bus.listen("/tmp/demo_zc.sock", 1 << 16) as bus:
|
|
102
|
+
with bus.recv_zero_copy() as rx:
|
|
103
|
+
with memoryview(rx) as mv:
|
|
104
|
+
data = mv.tobytes() # single copy into Python heap
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
t = threading.Thread(target=server)
|
|
108
|
+
t.start()
|
|
109
|
+
|
|
110
|
+
with tachyon.Bus.connect("/tmp/demo_zc.sock") as bus:
|
|
111
|
+
with bus.send_zero_copy(size=len(payload), type_id=42) as tx:
|
|
112
|
+
with memoryview(tx) as mv:
|
|
113
|
+
mv[:] = payload
|
|
114
|
+
tx.actual_size = len(payload)
|
|
115
|
+
|
|
116
|
+
t.join()
|
|
117
|
+
```
|
|
118
|
+
|
|
119
|
+
### Python — DLPack / PyTorch
|
|
120
|
+
|
|
121
|
+
```python
|
|
122
|
+
import struct, threading
|
|
123
|
+
import torch, tachyon
|
|
124
|
+
|
|
125
|
+
data = struct.pack("4f", 1.0, 2.0, 3.0, 4.0) # 16 bytes, 4× float32
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
def server():
|
|
129
|
+
with tachyon.Bus.listen("/tmp/demo_dl.sock", 1 << 16) as bus:
|
|
130
|
+
with bus.drain_batch() as batch:
|
|
131
|
+
tensor = torch.from_dlpack(batch[0]).view(torch.float32)
|
|
132
|
+
print(tensor) # tensor([1., 2., 3., 4.])
|
|
133
|
+
del tensor # release before batch commits
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
t = threading.Thread(target=server)
|
|
137
|
+
t.start()
|
|
138
|
+
|
|
139
|
+
with tachyon.Bus.connect("/tmp/demo_dl.sock") as bus:
|
|
140
|
+
with bus.send_zero_copy(size=len(data), type_id=1) as tx:
|
|
141
|
+
with memoryview(tx) as mv:
|
|
142
|
+
mv[:] = data
|
|
143
|
+
tx.actual_size = len(data)
|
|
144
|
+
|
|
145
|
+
t.join()
|
|
146
|
+
```
|
|
147
|
+
|
|
148
|
+
### Rust
|
|
149
|
+
|
|
150
|
+
```rust
|
|
151
|
+
use std::thread;
|
|
152
|
+
use tachyon_ipc::Bus;
|
|
153
|
+
|
|
154
|
+
const SOCK: &str = "/tmp/demo_rust.sock";
|
|
155
|
+
const CAP: usize = 1 << 16;
|
|
156
|
+
|
|
157
|
+
fn main() {
|
|
158
|
+
let srv = thread::spawn(|| {
|
|
159
|
+
let bus = Bus::listen(SOCK, CAP).unwrap();
|
|
160
|
+
let guard = bus.acquire_rx(10_000).unwrap();
|
|
161
|
+
println!("received {} bytes, type_id={}", guard.actual_size, guard.type_id);
|
|
162
|
+
guard.commit().unwrap();
|
|
163
|
+
});
|
|
164
|
+
|
|
165
|
+
thread::sleep(std::time::Duration::from_millis(20));
|
|
166
|
+
|
|
167
|
+
let bus = Bus::connect(SOCK).unwrap();
|
|
168
|
+
bus.send(b"hello tachyon", 1).unwrap();
|
|
169
|
+
|
|
170
|
+
srv.join().unwrap();
|
|
171
|
+
}
|
|
172
|
+
```
|
|
173
|
+
|
|
174
|
+
### C++
|
|
175
|
+
|
|
176
|
+
```cpp
|
|
177
|
+
#include <tachyon/arena.hpp>
|
|
178
|
+
#include <tachyon/shm.hpp>
|
|
179
|
+
#include <cstring>
|
|
180
|
+
|
|
181
|
+
using namespace tachyon::core;
|
|
182
|
+
|
|
183
|
+
int main() {
|
|
184
|
+
constexpr size_t CAPACITY = 4096;
|
|
185
|
+
constexpr size_t SHM_SIZE = sizeof(MemoryLayout) + CAPACITY;
|
|
186
|
+
|
|
187
|
+
auto shm = SharedMemory::create("demo", SHM_SIZE).value();
|
|
188
|
+
auto producer = Arena::format(shm.data(), CAPACITY).value();
|
|
189
|
+
auto consumer = Arena::attach(shm.data()).value();
|
|
190
|
+
|
|
191
|
+
// TX
|
|
192
|
+
std::byte *tx = producer.acquire_tx(32);
|
|
193
|
+
std::memset(tx, 0xAB, 32);
|
|
194
|
+
producer.commit_tx(32, /*type_id=*/1);
|
|
195
|
+
producer.flush();
|
|
196
|
+
|
|
197
|
+
// RX
|
|
198
|
+
uint32_t type_id = 0;
|
|
199
|
+
size_t actual = 0;
|
|
200
|
+
const std::byte *rx = consumer.acquire_rx(type_id, actual);
|
|
201
|
+
consumer.commit_rx();
|
|
202
|
+
}
|
|
203
|
+
```
|
|
204
|
+
|
|
205
|
+
---
|
|
206
|
+
|
|
207
|
+
## Benchmarks
|
|
208
|
+
|
|
209
|
+
Ping-pong RTT, two threads, 32-byte payload, 1 000 000 samples.
|
|
210
|
+
**Machine:** Intel Core i7-12650H, 64 GiB DDR5-5600 SODIMM.
|
|
211
|
+
**Build:** GCC 14, PGO Release (`scripts/pgo_build.sh`), `taskset -c 7,8,9`.
|
|
212
|
+
|
|
213
|
+
| Percentile | Latency |
|
|
214
|
+
|------------|-----------|
|
|
215
|
+
| Min | 78 ns |
|
|
216
|
+
| p50 | 93 ns |
|
|
217
|
+
| p90 | 145 ns |
|
|
218
|
+
| p99 | 155 ns |
|
|
219
|
+
| p99.9 | 166 ns |
|
|
220
|
+
| p99.99 | 350 ns |
|
|
221
|
+
| Max | 17 540 ns |
|
|
222
|
+
|
|
223
|
+
**Throughput: 8 553 K RTT/sec**
|
|
224
|
+
|
|
225
|
+
Max spikes reflect OS scheduler preemption on a non-isolated laptop core — not
|
|
226
|
+
a ring buffer pathology. On a server with isolated cores, p99.99 converges
|
|
227
|
+
toward the p99.9 band.
|
|
228
|
+
|
|
229
|
+
---
|
|
230
|
+
|
|
231
|
+
## Architecture
|
|
232
|
+
|
|
233
|
+
Tachyon decouples the **control plane** (connection bootstrap) from the
|
|
234
|
+
**data plane** (hot-path I/O).
|
|
235
|
+
|
|
236
|
+
**Control plane.** Process discovery and the initial ABI handshake run over
|
|
237
|
+
a Unix domain socket. The socket transfers an anonymous `memfd` file
|
|
238
|
+
descriptor via `SCM_RIGHTS`, then is permanently discarded. If the producer
|
|
239
|
+
and consumer were compiled with differing `TACHYON_MSG_ALIGNMENT` values,
|
|
240
|
+
the connection is rejected before the first byte of data is exchanged.
|
|
241
|
+
|
|
242
|
+
**Data plane.** All subsequent I/O operates directly in the shared memory
|
|
243
|
+
segment with no kernel involvement. The SPSC ring uses
|
|
244
|
+
`memory_order_acquire` / `memory_order_release` atomics with amortized
|
|
245
|
+
batch publication: the shared head/tail indices are updated at most once
|
|
246
|
+
every 32 messages or on an explicit `flush()`.
|
|
247
|
+
|
|
248
|
+
**Hardware sympathy.** Every control structure — message headers, atomic
|
|
249
|
+
indices, watchdog flags — is padded to 64-byte or 128-byte boundaries.
|
|
250
|
+
False sharing between producer and consumer cache lines is structurally
|
|
251
|
+
impossible.
|
|
252
|
+
|
|
253
|
+
**Hybrid wait strategy.** The consumer spins for a bounded threshold
|
|
254
|
+
(`cpu_relax()`), then sleeps via `SYS_futex` (Linux) or `__ulock_wait`
|
|
255
|
+
(macOS) with a 200 ms watchdog timeout. Kernel sleeps are bounded so the
|
|
256
|
+
thread periodically returns to the host runtime to process signals.
|
|
257
|
+
|
|
258
|
+
**Zero-copy contract.** C++ and Rust expose raw pointers or slices tied to
|
|
259
|
+
the ring buffer lifetime. Python surfaces the buffer protocol
|
|
260
|
+
(`memoryview`) and DLPack (`__dlpack__`), allowing PyTorch, JAX, and NumPy
|
|
261
|
+
to consume payloads directly from shared memory without copying.
|
|
262
|
+
|
|
263
|
+
For wire protocol details and ABI guarantees → [`ABI.md`](./ABI.md).
|
|
264
|
+
|
|
265
|
+
---
|
|
266
|
+
|
|
267
|
+
## Requirements
|
|
268
|
+
|
|
269
|
+
| Component | Minimum |
|
|
270
|
+
|-----------|-------------------------------------------|
|
|
271
|
+
| OS | Linux 5.10+ (primary), macOS 13+ (tier-2) |
|
|
272
|
+
| Compiler | GCC 14+ or Clang 17+ |
|
|
273
|
+
| CMake | 3.31+ |
|
|
274
|
+
| Python | 3.10+ |
|
|
275
|
+
| Rust | stable (2024 edition) |
|
|
276
|
+
|
|
277
|
+
---
|
|
278
|
+
|
|
279
|
+
## License
|
|
280
|
+
|
|
281
|
+
[Apache 2.0](./LICENSE)
|
|
@@ -0,0 +1,262 @@
|
|
|
1
|
+
# Tachyon
|
|
2
|
+
|
|
3
|
+
[](https://github.com/riyaneel/tachyon/actions/workflows/ci.yml)
|
|
4
|
+
[](https://pypi.org/project/tachyon-ipc/)
|
|
5
|
+
[](https://crates.io/crates/tachyon-ipc)
|
|
6
|
+
[](./LICENSE)
|
|
7
|
+
|
|
8
|
+
Tachyon is a bare-metal, lock-free IPC primitive. Strictly-bounded SPSC ring
|
|
9
|
+
buffer over POSIX shared memory, with zero-copy bindings for Python, Rust,
|
|
10
|
+
and C++.
|
|
11
|
+
|
|
12
|
+
---
|
|
13
|
+
|
|
14
|
+
## Install
|
|
15
|
+
|
|
16
|
+
**Python** — compiles the C++ core at install time, requires GCC 14+ or Clang 17+:
|
|
17
|
+
|
|
18
|
+
```bash
|
|
19
|
+
pip install tachyon-ipc
|
|
20
|
+
```
|
|
21
|
+
|
|
22
|
+
**Rust:**
|
|
23
|
+
|
|
24
|
+
```bash
|
|
25
|
+
cargo add tachyon-ipc
|
|
26
|
+
```
|
|
27
|
+
|
|
28
|
+
**C++ (CMake FetchContent):**
|
|
29
|
+
|
|
30
|
+
```cmake
|
|
31
|
+
include(FetchContent)
|
|
32
|
+
|
|
33
|
+
FetchContent_Declare(tachyon
|
|
34
|
+
GIT_REPOSITORY https://github.com/riyaneel/tachyon.git
|
|
35
|
+
GIT_TAG v0.1.0
|
|
36
|
+
)
|
|
37
|
+
FetchContent_GetProperties(tachyon)
|
|
38
|
+
if (NOT tachyon_POPULATED)
|
|
39
|
+
FetchContent_Populate(tachyon)
|
|
40
|
+
add_subdirectory(${tachyon_SOURCE_DIR}/core ${tachyon_BINARY_DIR}/tachyon-core)
|
|
41
|
+
endif ()
|
|
42
|
+
|
|
43
|
+
target_link_libraries(my_app PRIVATE tachyon)
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
---
|
|
47
|
+
|
|
48
|
+
## Quickstart
|
|
49
|
+
|
|
50
|
+
### Python — Standard API
|
|
51
|
+
|
|
52
|
+
```python
|
|
53
|
+
import threading
|
|
54
|
+
import tachyon
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def server():
|
|
58
|
+
with tachyon.Bus.listen("/tmp/demo.sock", 1 << 16) as bus:
|
|
59
|
+
msg = next(iter(bus))
|
|
60
|
+
print(f"received type_id={msg.type_id} data={msg.data}")
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
t = threading.Thread(target=server)
|
|
64
|
+
t.start()
|
|
65
|
+
|
|
66
|
+
with tachyon.Bus.connect("/tmp/demo.sock") as bus:
|
|
67
|
+
bus.send(b"hello tachyon", type_id=1)
|
|
68
|
+
|
|
69
|
+
t.join()
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
### Python — Zero-Copy
|
|
73
|
+
|
|
74
|
+
```python
|
|
75
|
+
import threading
|
|
76
|
+
import tachyon
|
|
77
|
+
|
|
78
|
+
payload = b"zero_copy_payload"
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def server():
|
|
82
|
+
with tachyon.Bus.listen("/tmp/demo_zc.sock", 1 << 16) as bus:
|
|
83
|
+
with bus.recv_zero_copy() as rx:
|
|
84
|
+
with memoryview(rx) as mv:
|
|
85
|
+
data = mv.tobytes() # single copy into Python heap
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
t = threading.Thread(target=server)
|
|
89
|
+
t.start()
|
|
90
|
+
|
|
91
|
+
with tachyon.Bus.connect("/tmp/demo_zc.sock") as bus:
|
|
92
|
+
with bus.send_zero_copy(size=len(payload), type_id=42) as tx:
|
|
93
|
+
with memoryview(tx) as mv:
|
|
94
|
+
mv[:] = payload
|
|
95
|
+
tx.actual_size = len(payload)
|
|
96
|
+
|
|
97
|
+
t.join()
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
### Python — DLPack / PyTorch
|
|
101
|
+
|
|
102
|
+
```python
|
|
103
|
+
import struct, threading
|
|
104
|
+
import torch, tachyon
|
|
105
|
+
|
|
106
|
+
data = struct.pack("4f", 1.0, 2.0, 3.0, 4.0) # 16 bytes, 4× float32
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
def server():
|
|
110
|
+
with tachyon.Bus.listen("/tmp/demo_dl.sock", 1 << 16) as bus:
|
|
111
|
+
with bus.drain_batch() as batch:
|
|
112
|
+
tensor = torch.from_dlpack(batch[0]).view(torch.float32)
|
|
113
|
+
print(tensor) # tensor([1., 2., 3., 4.])
|
|
114
|
+
del tensor # release before batch commits
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
t = threading.Thread(target=server)
|
|
118
|
+
t.start()
|
|
119
|
+
|
|
120
|
+
with tachyon.Bus.connect("/tmp/demo_dl.sock") as bus:
|
|
121
|
+
with bus.send_zero_copy(size=len(data), type_id=1) as tx:
|
|
122
|
+
with memoryview(tx) as mv:
|
|
123
|
+
mv[:] = data
|
|
124
|
+
tx.actual_size = len(data)
|
|
125
|
+
|
|
126
|
+
t.join()
|
|
127
|
+
```
|
|
128
|
+
|
|
129
|
+
### Rust
|
|
130
|
+
|
|
131
|
+
```rust
|
|
132
|
+
use std::thread;
|
|
133
|
+
use tachyon_ipc::Bus;
|
|
134
|
+
|
|
135
|
+
const SOCK: &str = "/tmp/demo_rust.sock";
|
|
136
|
+
const CAP: usize = 1 << 16;
|
|
137
|
+
|
|
138
|
+
fn main() {
|
|
139
|
+
let srv = thread::spawn(|| {
|
|
140
|
+
let bus = Bus::listen(SOCK, CAP).unwrap();
|
|
141
|
+
let guard = bus.acquire_rx(10_000).unwrap();
|
|
142
|
+
println!("received {} bytes, type_id={}", guard.actual_size, guard.type_id);
|
|
143
|
+
guard.commit().unwrap();
|
|
144
|
+
});
|
|
145
|
+
|
|
146
|
+
thread::sleep(std::time::Duration::from_millis(20));
|
|
147
|
+
|
|
148
|
+
let bus = Bus::connect(SOCK).unwrap();
|
|
149
|
+
bus.send(b"hello tachyon", 1).unwrap();
|
|
150
|
+
|
|
151
|
+
srv.join().unwrap();
|
|
152
|
+
}
|
|
153
|
+
```
|
|
154
|
+
|
|
155
|
+
### C++
|
|
156
|
+
|
|
157
|
+
```cpp
|
|
158
|
+
#include <tachyon/arena.hpp>
|
|
159
|
+
#include <tachyon/shm.hpp>
|
|
160
|
+
#include <cstring>
|
|
161
|
+
|
|
162
|
+
using namespace tachyon::core;
|
|
163
|
+
|
|
164
|
+
int main() {
|
|
165
|
+
constexpr size_t CAPACITY = 4096;
|
|
166
|
+
constexpr size_t SHM_SIZE = sizeof(MemoryLayout) + CAPACITY;
|
|
167
|
+
|
|
168
|
+
auto shm = SharedMemory::create("demo", SHM_SIZE).value();
|
|
169
|
+
auto producer = Arena::format(shm.data(), CAPACITY).value();
|
|
170
|
+
auto consumer = Arena::attach(shm.data()).value();
|
|
171
|
+
|
|
172
|
+
// TX
|
|
173
|
+
std::byte *tx = producer.acquire_tx(32);
|
|
174
|
+
std::memset(tx, 0xAB, 32);
|
|
175
|
+
producer.commit_tx(32, /*type_id=*/1);
|
|
176
|
+
producer.flush();
|
|
177
|
+
|
|
178
|
+
// RX
|
|
179
|
+
uint32_t type_id = 0;
|
|
180
|
+
size_t actual = 0;
|
|
181
|
+
const std::byte *rx = consumer.acquire_rx(type_id, actual);
|
|
182
|
+
consumer.commit_rx();
|
|
183
|
+
}
|
|
184
|
+
```
|
|
185
|
+
|
|
186
|
+
---
|
|
187
|
+
|
|
188
|
+
## Benchmarks
|
|
189
|
+
|
|
190
|
+
Ping-pong RTT, two threads, 32-byte payload, 1 000 000 samples.
|
|
191
|
+
**Machine:** Intel Core i7-12650H, 64 GiB DDR5-5600 SODIMM.
|
|
192
|
+
**Build:** GCC 14, PGO Release (`scripts/pgo_build.sh`), `taskset -c 7,8,9`.
|
|
193
|
+
|
|
194
|
+
| Percentile | Latency |
|
|
195
|
+
|------------|-----------|
|
|
196
|
+
| Min | 78 ns |
|
|
197
|
+
| p50 | 93 ns |
|
|
198
|
+
| p90 | 145 ns |
|
|
199
|
+
| p99 | 155 ns |
|
|
200
|
+
| p99.9 | 166 ns |
|
|
201
|
+
| p99.99 | 350 ns |
|
|
202
|
+
| Max | 17 540 ns |
|
|
203
|
+
|
|
204
|
+
**Throughput: 8 553 K RTT/sec**
|
|
205
|
+
|
|
206
|
+
Max spikes reflect OS scheduler preemption on a non-isolated laptop core — not
|
|
207
|
+
a ring buffer pathology. On a server with isolated cores, p99.99 converges
|
|
208
|
+
toward the p99.9 band.
|
|
209
|
+
|
|
210
|
+
---
|
|
211
|
+
|
|
212
|
+
## Architecture
|
|
213
|
+
|
|
214
|
+
Tachyon decouples the **control plane** (connection bootstrap) from the
|
|
215
|
+
**data plane** (hot-path I/O).
|
|
216
|
+
|
|
217
|
+
**Control plane.** Process discovery and the initial ABI handshake run over
|
|
218
|
+
a Unix domain socket. The socket transfers an anonymous `memfd` file
|
|
219
|
+
descriptor via `SCM_RIGHTS`, then is permanently discarded. If the producer
|
|
220
|
+
and consumer were compiled with differing `TACHYON_MSG_ALIGNMENT` values,
|
|
221
|
+
the connection is rejected before the first byte of data is exchanged.
|
|
222
|
+
|
|
223
|
+
**Data plane.** All subsequent I/O operates directly in the shared memory
|
|
224
|
+
segment with no kernel involvement. The SPSC ring uses
|
|
225
|
+
`memory_order_acquire` / `memory_order_release` atomics with amortized
|
|
226
|
+
batch publication: the shared head/tail indices are updated at most once
|
|
227
|
+
every 32 messages or on an explicit `flush()`.
|
|
228
|
+
|
|
229
|
+
**Hardware sympathy.** Every control structure — message headers, atomic
|
|
230
|
+
indices, watchdog flags — is padded to 64-byte or 128-byte boundaries.
|
|
231
|
+
False sharing between producer and consumer cache lines is structurally
|
|
232
|
+
impossible.
|
|
233
|
+
|
|
234
|
+
**Hybrid wait strategy.** The consumer spins for a bounded threshold
|
|
235
|
+
(`cpu_relax()`), then sleeps via `SYS_futex` (Linux) or `__ulock_wait`
|
|
236
|
+
(macOS) with a 200 ms watchdog timeout. Kernel sleeps are bounded so the
|
|
237
|
+
thread periodically returns to the host runtime to process signals.
|
|
238
|
+
|
|
239
|
+
**Zero-copy contract.** C++ and Rust expose raw pointers or slices tied to
|
|
240
|
+
the ring buffer lifetime. Python surfaces the buffer protocol
|
|
241
|
+
(`memoryview`) and DLPack (`__dlpack__`), allowing PyTorch, JAX, and NumPy
|
|
242
|
+
to consume payloads directly from shared memory without copying.
|
|
243
|
+
|
|
244
|
+
For wire protocol details and ABI guarantees → [`ABI.md`](./ABI.md).
|
|
245
|
+
|
|
246
|
+
---
|
|
247
|
+
|
|
248
|
+
## Requirements
|
|
249
|
+
|
|
250
|
+
| Component | Minimum |
|
|
251
|
+
|-----------|-------------------------------------------|
|
|
252
|
+
| OS | Linux 5.10+ (primary), macOS 13+ (tier-2) |
|
|
253
|
+
| Compiler | GCC 14+ or Clang 17+ |
|
|
254
|
+
| CMake | 3.31+ |
|
|
255
|
+
| Python | 3.10+ |
|
|
256
|
+
| Rust | stable (2024 edition) |
|
|
257
|
+
|
|
258
|
+
---
|
|
259
|
+
|
|
260
|
+
## License
|
|
261
|
+
|
|
262
|
+
[Apache 2.0](./LICENSE)
|
|
@@ -0,0 +1,161 @@
|
|
|
1
|
+
#pragma once
|
|
2
|
+
|
|
3
|
+
#include <atomic>
|
|
4
|
+
#include <cstddef>
|
|
5
|
+
#include <cstdint>
|
|
6
|
+
#include <cstring>
|
|
7
|
+
#include <expected>
|
|
8
|
+
#include <span>
|
|
9
|
+
|
|
10
|
+
#include <tachyon.hpp>
|
|
11
|
+
#include <tachyon/concepts.hpp>
|
|
12
|
+
#include <tachyon/shm.hpp>
|
|
13
|
+
|
|
14
|
+
#ifndef TACHYON_MSG_ALIGNMENT
|
|
15
|
+
#define TACHYON_MSG_ALIGNMENT 64
|
|
16
|
+
#endif // #ifndef TACHYON_MSG_ALIGNMENT
|
|
17
|
+
|
|
18
|
+
static_assert((TACHYON_MSG_ALIGNMENT & (TACHYON_MSG_ALIGNMENT - 1)) == 0, "TACHYON_MSG_ALIGNMENT must be a power of 2");
|
|
19
|
+
static_assert(TACHYON_MSG_ALIGNMENT >= 32, "TACHYON_MSG_ALIGNMENT must be at least 32 bytes");
|
|
20
|
+
|
|
21
|
+
namespace tachyon::core {
|
|
22
|
+
constexpr uint32_t TACHYON_MAGIC = 0x54414348;
|
|
23
|
+
constexpr uint32_t TACHYON_VERSION = 0x02;
|
|
24
|
+
|
|
25
|
+
enum class BusState : uint32_t {
|
|
26
|
+
Uninitialized = 0,
|
|
27
|
+
Initializing = 1,
|
|
28
|
+
Ready = 2,
|
|
29
|
+
Disconnected = 3,
|
|
30
|
+
FatalError = 4,
|
|
31
|
+
Unknown = 5
|
|
32
|
+
};
|
|
33
|
+
|
|
34
|
+
struct alignas(TACHYON_MSG_ALIGNMENT) MessageHeader {
|
|
35
|
+
uint32_t size;
|
|
36
|
+
uint32_t type_id;
|
|
37
|
+
uint32_t reserved_size;
|
|
38
|
+
uint8_t padding_[TACHYON_MSG_ALIGNMENT - sizeof(uint32_t) * 3];
|
|
39
|
+
};
|
|
40
|
+
|
|
41
|
+
struct alignas(128) ArenaHeader {
|
|
42
|
+
uint32_t magic;
|
|
43
|
+
uint32_t version;
|
|
44
|
+
uint32_t capacity;
|
|
45
|
+
uint32_t msg_alignment;
|
|
46
|
+
std::atomic<BusState> state;
|
|
47
|
+
};
|
|
48
|
+
|
|
49
|
+
struct SPSCIndices {
|
|
50
|
+
alignas(128) std::atomic<size_t> head{0};
|
|
51
|
+
alignas(128) std::atomic<size_t> tail{0};
|
|
52
|
+
alignas(128) std::atomic<uint32_t> consumer_sleeping{0};
|
|
53
|
+
alignas(128) std::atomic<uint64_t> producer_heartbeat{0};
|
|
54
|
+
alignas(128) std::atomic<uint64_t> consumer_heartbeat{0};
|
|
55
|
+
};
|
|
56
|
+
|
|
57
|
+
struct alignas(128) MemoryLayout {
|
|
58
|
+
ArenaHeader header;
|
|
59
|
+
SPSCIndices indices;
|
|
60
|
+
|
|
61
|
+
[[nodiscard]] inline std::byte *data_arena() noexcept {
|
|
62
|
+
return reinterpret_cast<std::byte *>(this + 1);
|
|
63
|
+
}
|
|
64
|
+
};
|
|
65
|
+
|
|
66
|
+
struct alignas(32) RxView {
|
|
67
|
+
const std::byte *ptr;
|
|
68
|
+
size_t actual_size;
|
|
69
|
+
size_t reserved_;
|
|
70
|
+
uint32_t type_id;
|
|
71
|
+
uint32_t padding_;
|
|
72
|
+
};
|
|
73
|
+
|
|
74
|
+
static_assert(sizeof(RxView) == 32, "RxView must be exactly 32 bytes sized.");
|
|
75
|
+
|
|
76
|
+
class TACHYON_API Arena {
|
|
77
|
+
MemoryLayout *layout_{nullptr};
|
|
78
|
+
size_t capacity_mask_{0};
|
|
79
|
+
|
|
80
|
+
static constexpr size_t BATCH_SIZE = 32;
|
|
81
|
+
size_t local_head_{0};
|
|
82
|
+
size_t cached_tail_{0};
|
|
83
|
+
size_t pending_tx_{0};
|
|
84
|
+
size_t local_tail_{0};
|
|
85
|
+
size_t cached_head_{0};
|
|
86
|
+
size_t pending_rx_{0};
|
|
87
|
+
size_t tx_reserved_size_{0};
|
|
88
|
+
size_t rx_reserved_size_{0};
|
|
89
|
+
|
|
90
|
+
explicit Arena(MemoryLayout *layout, size_t capacity) noexcept;
|
|
91
|
+
|
|
92
|
+
public:
|
|
93
|
+
~Arena() = default;
|
|
94
|
+
|
|
95
|
+
Arena(const Arena &) = delete;
|
|
96
|
+
|
|
97
|
+
Arena &operator=(const Arena &) = delete;
|
|
98
|
+
|
|
99
|
+
Arena(Arena &&other) noexcept;
|
|
100
|
+
|
|
101
|
+
Arena &operator=(Arena &&other) noexcept;
|
|
102
|
+
|
|
103
|
+
static auto format(std::span<std::byte> shm_span, size_t capacity) -> std::expected<Arena, ShmError>;
|
|
104
|
+
|
|
105
|
+
static auto attach(std::span<std::byte> shm_span) -> std::expected<Arena, ShmError>;
|
|
106
|
+
|
|
107
|
+
[[nodiscard]] std::byte *acquire_tx(size_t max_size) noexcept;
|
|
108
|
+
|
|
109
|
+
[[nodiscard]] bool commit_tx(size_t actual_size, uint32_t type_id) noexcept;
|
|
110
|
+
|
|
111
|
+
[[nodiscard]] const std::byte *acquire_rx(uint32_t &out_type_id, size_t &out_actual_size) noexcept;
|
|
112
|
+
|
|
113
|
+
[[nodiscard]] bool commit_rx() noexcept;
|
|
114
|
+
|
|
115
|
+
[[nodiscard]] size_t acquire_rx_batch(RxView *views, size_t max_msgs) noexcept;
|
|
116
|
+
|
|
117
|
+
bool commit_rx_batch(const RxView *views, size_t count) noexcept;
|
|
118
|
+
|
|
119
|
+
[[nodiscard]] const std::byte *
|
|
120
|
+
acquire_rx_spin(uint32_t &out_type_id, size_t &out_actual_size, uint32_t max_spins = 0) noexcept;
|
|
121
|
+
|
|
122
|
+
[[nodiscard]] const std::byte *
|
|
123
|
+
acquire_rx_blocking(uint32_t &out_type_id, size_t &out_actual_size, uint32_t spin_threshold = 10000) noexcept;
|
|
124
|
+
|
|
125
|
+
void flush() noexcept;
|
|
126
|
+
|
|
127
|
+
void flush_tx() noexcept;
|
|
128
|
+
|
|
129
|
+
void set_consumer_sleeping(bool sleeping) const noexcept;
|
|
130
|
+
|
|
131
|
+
int wait_consumer_sleeping() const noexcept;
|
|
132
|
+
|
|
133
|
+
uint64_t get_producer_heartbeat() const noexcept;
|
|
134
|
+
|
|
135
|
+
void set_fatal_error() const noexcept;
|
|
136
|
+
|
|
137
|
+
template <TachyonPayload T> [[nodiscard]] inline bool push(const uint32_t type_id, const T &payload) noexcept {
|
|
138
|
+
if (std::byte *ptr = acquire_tx(sizeof(T))) [[likely]] {
|
|
139
|
+
std::memcpy(ptr, &payload, sizeof(T));
|
|
140
|
+
return commit_tx(sizeof(T), type_id);
|
|
141
|
+
}
|
|
142
|
+
return false;
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
template <TachyonPayload T> [[nodiscard]] inline bool pop(uint32_t &out_type_id, T &out_payload) noexcept {
|
|
146
|
+
size_t actual_size = 0;
|
|
147
|
+
if (const std::byte *ptr = acquire_rx(out_type_id, actual_size)) [[likely]] {
|
|
148
|
+
if (actual_size == sizeof(T)) {
|
|
149
|
+
std::memcpy(&out_payload, ptr, sizeof(T));
|
|
150
|
+
return commit_rx();
|
|
151
|
+
}
|
|
152
|
+
(void)commit_rx();
|
|
153
|
+
}
|
|
154
|
+
return false;
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
[[nodiscard]] inline BusState get_state() const noexcept {
|
|
158
|
+
return layout_->header.state.load(std::memory_order_acquire);
|
|
159
|
+
}
|
|
160
|
+
};
|
|
161
|
+
} // namespace tachyon::core
|