turboapi 0.3.24__tar.gz → 0.3.28__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (134) hide show
  1. {turboapi-0.3.24 → turboapi-0.3.28}/AGENTS.md +3 -5
  2. turboapi-0.3.28/APACHE_BENCH_RESULTS.md +230 -0
  3. turboapi-0.3.28/ASYNC_OPTIMIZATION_ROADMAP.md +293 -0
  4. {turboapi-0.3.24 → turboapi-0.3.28}/Cargo.lock +1 -1
  5. {turboapi-0.3.24 → turboapi-0.3.28}/Cargo.toml +3 -3
  6. turboapi-0.3.28/PHASE_A_IMPLEMENTATION_GUIDE.md +358 -0
  7. turboapi-0.3.28/PHASE_A_RESULTS.md +201 -0
  8. turboapi-0.3.28/PHASE_B_IMPLEMENTATION_GUIDE.md +355 -0
  9. {turboapi-0.3.24 → turboapi-0.3.28}/PKG-INFO +2 -2
  10. {turboapi-0.3.24 → turboapi-0.3.28}/README.md +160 -23
  11. turboapi-0.3.28/RELEASE_NOTES_v0.3.20.md +322 -0
  12. turboapi-0.3.28/TRUE_ASYNC_SUCCESS.md +344 -0
  13. turboapi-0.3.28/benchmark_async_comparison.py +361 -0
  14. turboapi-0.3.28/benchmark_fastapi_server.py +25 -0
  15. turboapi-0.3.28/benchmark_turboapi_server.py +24 -0
  16. turboapi-0.3.28/benchmarks/comprehensive_wrk_benchmark.py +284 -0
  17. turboapi-0.3.28/benchmarks/turboapi_vs_fastapi_benchmark.py +310 -0
  18. turboapi-0.3.28/benchmarks/turboapi_vs_fastapi_simple.py +249 -0
  19. turboapi-0.3.28/benchmarks/wrk_output.txt +0 -0
  20. {turboapi-0.3.24 → turboapi-0.3.28}/pyproject.toml +2 -2
  21. {turboapi-0.3.24 → turboapi-0.3.28}/python/pyproject.toml +2 -2
  22. turboapi-0.3.28/python/turboapi/async_pool.py +141 -0
  23. turboapi-0.3.28/python/turboapi/middleware.py +342 -0
  24. {turboapi-0.3.24 → turboapi-0.3.28/python}/turboapi/request_handler.py +21 -22
  25. {turboapi-0.3.24 → turboapi-0.3.28/python}/turboapi/rust_integration.py +7 -8
  26. turboapi-0.3.28/python/turboapi/security.py +542 -0
  27. turboapi-0.3.28/quick_async_test.py +20 -0
  28. {turboapi-0.3.24 → turboapi-0.3.28}/src/lib.rs +2 -1
  29. turboapi-0.3.28/src/python_worker.rs +229 -0
  30. turboapi-0.3.28/src/server.rs +1105 -0
  31. turboapi-0.3.28/test_async_io_demo.py +79 -0
  32. turboapi-0.3.28/test_async_performance.py +99 -0
  33. turboapi-0.3.28/test_multi_worker.py +25 -0
  34. turboapi-0.3.28/test_multithreaded_sync.py +29 -0
  35. turboapi-0.3.28/tests/test_satya_0_4_0_compatibility.py +247 -0
  36. turboapi-0.3.28/tests/test_security_features.py +234 -0
  37. turboapi-0.3.28/turboapi/async_pool.py +141 -0
  38. turboapi-0.3.28/turboapi/middleware.py +342 -0
  39. {turboapi-0.3.24/python → turboapi-0.3.28}/turboapi/request_handler.py +21 -22
  40. {turboapi-0.3.24/python → turboapi-0.3.28}/turboapi/rust_integration.py +7 -8
  41. turboapi-0.3.28/turboapi/security.py +542 -0
  42. turboapi-0.3.24/FASTAPI_FIXES_SUMMARY.md +0 -404
  43. turboapi-0.3.24/PYTHON_313_FREE_THREADING_SETUP.md +0 -174
  44. turboapi-0.3.24/PYTHON_SETUP_COMPLETE.md +0 -161
  45. turboapi-0.3.24/benchmark_output.txt +0 -95
  46. turboapi-0.3.24/claude.md +0 -94
  47. turboapi-0.3.24/python/turboapi/middleware.py +0 -64
  48. turboapi-0.3.24/src/server.rs +0 -596
  49. turboapi-0.3.24/tests/async_benchmark.sh +0 -64
  50. turboapi-0.3.24/tests/fastapi_v0_3_20_equivalent.py +0 -41
  51. turboapi-0.3.24/tests/quick_benchmark.sh +0 -63
  52. turboapi-0.3.24/tests/run_v0_3_20_benchmark.py +0 -235
  53. turboapi-0.3.24/tests/test_async_benchmark.py +0 -55
  54. turboapi-0.3.24/tests/test_v0_3_20_fixes.py +0 -62
  55. turboapi-0.3.24/tests/test_v0_3_20_server.py +0 -55
  56. turboapi-0.3.24/tests/test_v0_3_21_async.py +0 -53
  57. turboapi-0.3.24/turboapi/middleware.py +0 -64
  58. {turboapi-0.3.24 → turboapi-0.3.28}/.github/scripts/check_performance_regression.py +0 -0
  59. {turboapi-0.3.24 → turboapi-0.3.28}/.github/scripts/compare_benchmarks.py +0 -0
  60. {turboapi-0.3.24 → turboapi-0.3.28}/.github/workflows/README.md +0 -0
  61. {turboapi-0.3.24 → turboapi-0.3.28}/.github/workflows/benchmark.yml +0 -0
  62. {turboapi-0.3.24 → turboapi-0.3.28}/.github/workflows/build-and-release.yml +0 -0
  63. {turboapi-0.3.24 → turboapi-0.3.28}/.github/workflows/build-wheels.yml +0 -0
  64. {turboapi-0.3.24 → turboapi-0.3.28}/.github/workflows/ci.yml +0 -0
  65. {turboapi-0.3.24 → turboapi-0.3.28}/.github/workflows/release.yml +0 -0
  66. {turboapi-0.3.24 → turboapi-0.3.28}/.gitignore +0 -0
  67. {turboapi-0.3.24 → turboapi-0.3.28}/CHANGELOG.md +0 -0
  68. {turboapi-0.3.24 → turboapi-0.3.28}/FASTAPI_COMPATIBILITY.md +0 -0
  69. {turboapi-0.3.24 → turboapi-0.3.28}/LICENSE +0 -0
  70. {turboapi-0.3.24 → turboapi-0.3.28}/RELEASE_NOTES_v0.3.1.md +0 -0
  71. {turboapi-0.3.24 → turboapi-0.3.28}/RELEASE_NOTES_v0.3.13.md +0 -0
  72. {turboapi-0.3.24 → turboapi-0.3.28}/WINDOWS_FIX_SUMMARY.md +0 -0
  73. {turboapi-0.3.24 → turboapi-0.3.28}/adaptive_rate_test.py +0 -0
  74. {turboapi-0.3.24 → turboapi-0.3.28}/benches/performance_bench.rs +0 -0
  75. {turboapi-0.3.24 → turboapi-0.3.28}/benchmark_comparison.png +0 -0
  76. {turboapi-0.3.24 → turboapi-0.3.28}/benchmark_graphs/turbo_vs_fastapi_performance_20250929_025531.png +0 -0
  77. {turboapi-0.3.24 → turboapi-0.3.28}/delete/blog/adr_python_handler_integration.md +0 -0
  78. {turboapi-0.3.24 → turboapi-0.3.28}/delete/blog/phase_1.md +0 -0
  79. {turboapi-0.3.24 → turboapi-0.3.28}/delete/blog/phase_2.md +0 -0
  80. {turboapi-0.3.24 → turboapi-0.3.28}/delete/blog/phase_3.md +0 -0
  81. {turboapi-0.3.24 → turboapi-0.3.28}/delete/blog/phase_4.md +0 -0
  82. {turboapi-0.3.24 → turboapi-0.3.28}/delete/blog/phase_5.md +0 -0
  83. {turboapi-0.3.24 → turboapi-0.3.28}/delete/twitterpost.md +0 -0
  84. {turboapi-0.3.24 → turboapi-0.3.28}/install_benchmark_deps.py +0 -0
  85. {turboapi-0.3.24 → turboapi-0.3.28}/mini-notes/001-foundation.md +0 -0
  86. {turboapi-0.3.24 → turboapi-0.3.28}/mini-notes/002-routing-breakthrough.md +0 -0
  87. {turboapi-0.3.24 → turboapi-0.3.28}/mini-notes/003-production-ready.md +0 -0
  88. {turboapi-0.3.24 → turboapi-0.3.28}/mini-notes/004-zero-copy-revolution.md +0 -0
  89. {turboapi-0.3.24 → turboapi-0.3.28}/mini-notes/005-middleware-mastery.md +0 -0
  90. {turboapi-0.3.24 → turboapi-0.3.28}/mini-notes/006-python-handler-breakthrough.md +0 -0
  91. {turboapi-0.3.24 → turboapi-0.3.28}/mini-notes/README.md +0 -0
  92. {turboapi-0.3.24 → turboapi-0.3.28}/mini-notes/lessons-learned.md +0 -0
  93. {turboapi-0.3.24 → turboapi-0.3.28}/python/MANIFEST.in +0 -0
  94. {turboapi-0.3.24 → turboapi-0.3.28}/python/setup.py +0 -0
  95. {turboapi-0.3.24 → turboapi-0.3.28}/python/turboapi/__init__.py +0 -0
  96. {turboapi-0.3.24 → turboapi-0.3.28}/python/turboapi/decorators.py +0 -0
  97. {turboapi-0.3.24 → turboapi-0.3.28}/python/turboapi/main_app.py +0 -0
  98. {turboapi-0.3.24 → turboapi-0.3.28}/python/turboapi/models.py +0 -0
  99. {turboapi-0.3.24 → turboapi-0.3.28}/python/turboapi/routing.py +0 -0
  100. {turboapi-0.3.24 → turboapi-0.3.28}/python/turboapi/server_integration.py +0 -0
  101. {turboapi-0.3.24 → turboapi-0.3.28}/python/turboapi/version_check.py +0 -0
  102. {turboapi-0.3.24 → turboapi-0.3.28}/setup_python313t.sh +0 -0
  103. {turboapi-0.3.24 → turboapi-0.3.28}/src/http2.rs +0 -0
  104. {turboapi-0.3.24 → turboapi-0.3.28}/src/micro_bench.rs +0 -0
  105. {turboapi-0.3.24 → turboapi-0.3.28}/src/middleware.rs +0 -0
  106. {turboapi-0.3.24 → turboapi-0.3.28}/src/request.rs +0 -0
  107. {turboapi-0.3.24 → turboapi-0.3.28}/src/response.rs +0 -0
  108. {turboapi-0.3.24 → turboapi-0.3.28}/src/router.rs +0 -0
  109. {turboapi-0.3.24 → turboapi-0.3.28}/src/threadpool.rs +0 -0
  110. {turboapi-0.3.24 → turboapi-0.3.28}/src/validation.rs +0 -0
  111. {turboapi-0.3.24 → turboapi-0.3.28}/src/websocket.rs +0 -0
  112. {turboapi-0.3.24 → turboapi-0.3.28}/src/zerocopy.rs +0 -0
  113. {turboapi-0.3.24 → turboapi-0.3.28}/test_no_rate_limit.py +0 -0
  114. {turboapi-0.3.24 → turboapi-0.3.28}/test_rate_limiting.py +0 -0
  115. {turboapi-0.3.24 → turboapi-0.3.28}/test_zerocopy.py +0 -0
  116. {turboapi-0.3.24 → turboapi-0.3.28}/tests/README.md +0 -0
  117. {turboapi-0.3.24 → turboapi-0.3.28}/tests/benchmark_comparison.py +0 -0
  118. {turboapi-0.3.24 → turboapi-0.3.28}/tests/comparison_before_after.py +0 -0
  119. {turboapi-0.3.24 → turboapi-0.3.28}/tests/fastapi_equivalent.py +0 -0
  120. {turboapi-0.3.24 → turboapi-0.3.28}/tests/quick_body_test.py +0 -0
  121. {turboapi-0.3.24 → turboapi-0.3.28}/tests/quick_test.py +0 -0
  122. {turboapi-0.3.24 → turboapi-0.3.28}/tests/test.py +0 -0
  123. {turboapi-0.3.24 → turboapi-0.3.28}/tests/test_fastapi_compatibility.py +0 -0
  124. {turboapi-0.3.24 → turboapi-0.3.28}/tests/wrk_benchmark.py +0 -0
  125. {turboapi-0.3.24 → turboapi-0.3.28}/tests/wrk_comparison.py +0 -0
  126. {turboapi-0.3.24 → turboapi-0.3.28}/turbo_vs_fastapi_benchmark_20250929_025526.json +0 -0
  127. {turboapi-0.3.24 → turboapi-0.3.28}/turboapi/__init__.py +0 -0
  128. {turboapi-0.3.24 → turboapi-0.3.28}/turboapi/decorators.py +0 -0
  129. {turboapi-0.3.24 → turboapi-0.3.28}/turboapi/main_app.py +0 -0
  130. {turboapi-0.3.24 → turboapi-0.3.28}/turboapi/models.py +0 -0
  131. {turboapi-0.3.24 → turboapi-0.3.28}/turboapi/routing.py +0 -0
  132. {turboapi-0.3.24 → turboapi-0.3.28}/turboapi/server_integration.py +0 -0
  133. {turboapi-0.3.24 → turboapi-0.3.28}/turboapi/version_check.py +0 -0
  134. {turboapi-0.3.24 → turboapi-0.3.28}/wrk_rate_limit_test.py +0 -0
@@ -1,17 +1,15 @@
1
- # TurboAPI v0.3.23 - AI Agent Guide 🤖
1
+ # TurboAPI v0.3.0+ - AI Agent Guide 🤖
2
2
 
3
3
  **For AI assistants, code generation tools, and automated development systems**
4
4
 
5
5
  ## 🎯 **What TurboAPI Is**
6
6
 
7
- TurboAPI is a **FastAPI-compatible** Python web framework that delivers **9-10x better performance** through:
7
+ TurboAPI is a **FastAPI-compatible** Python web framework that delivers **5-10x better performance** through:
8
8
  - **Rust-powered HTTP core** (zero Python overhead)
9
- - **Python 3.13 free-threading** with `Python::attach()` (TRUE parallel execution)
10
- - **pyo3-async-runtimes** integration (native tokio async support)
9
+ - **Python 3.13 free-threading** support (true parallelism)
11
10
  - **Zero-copy optimizations** and intelligent caching
12
11
  - **100% FastAPI syntax compatibility** with automatic body parsing
13
12
  - **Satya validation** (faster than Pydantic)
14
- - **72,000+ req/s** in production benchmarks
15
13
 
16
14
  ## 🚀 **For AI Agents: Key Facts**
17
15
 
@@ -0,0 +1,230 @@
1
+ # TurboAPI Apache Bench Results 🚀
2
+
3
+ **Date**: 2025-10-11
4
+ **Version**: TurboAPI v0.3.27 with Rust Core
5
+ **Python**: 3.14t (free-threading)
6
+ **Tool**: Apache Bench (ab)
7
+
8
+ ---
9
+
10
+ ## Test 1: Sync Handler - Light Load
11
+ **Command**: `ab -n 10000 -c 100 http://127.0.0.1:8000/sync`
12
+
13
+ ### Results
14
+ - **Requests per second**: **31,353 RPS** 🔥
15
+ - **Time per request**: 3.189 ms (mean)
16
+ - **Time per request**: 0.032 ms (mean, across all concurrent requests)
17
+ - **Transfer rate**: 4,409 KB/sec
18
+ - **Failed requests**: 0
19
+ - **Total time**: 0.319 seconds
20
+
21
+ ### Latency Distribution
22
+ ```
23
+ 50% 3 ms
24
+ 66% 3 ms
25
+ 75% 3 ms
26
+ 80% 3 ms
27
+ 90% 4 ms
28
+ 95% 6 ms
29
+ 98% 6 ms
30
+ 99% 7 ms
31
+ 100% 21 ms (longest request)
32
+ ```
33
+
34
+ ---
35
+
36
+ ## Test 2: Compute Handler - CPU Intensive
37
+ **Command**: `ab -n 10000 -c 100 http://127.0.0.1:8000/compute`
38
+
39
+ ### Results
40
+ - **Requests per second**: **32,428 RPS** 🔥
41
+ - **Time per request**: 3.084 ms (mean)
42
+ - **Time per request**: 0.031 ms (mean, across all concurrent requests)
43
+ - **Transfer rate**: 4,687 KB/sec
44
+ - **Failed requests**: 0
45
+ - **Total time**: 0.308 seconds
46
+
47
+ ### Latency Distribution
48
+ ```
49
+ 50% 3 ms
50
+ 66% 3 ms
51
+ 75% 3 ms
52
+ 80% 3 ms
53
+ 90% 3 ms
54
+ 95% 4 ms
55
+ 98% 6 ms
56
+ 99% 6 ms
57
+ 100% 6 ms (longest request)
58
+ ```
59
+
60
+ **Note**: Even with CPU-intensive computation (sum of squares 0-999), performance remains excellent!
61
+
62
+ ---
63
+
64
+ ## Test 3: Async Handler - Event Loop Overhead
65
+ **Command**: `ab -n 5000 -c 50 http://127.0.0.1:8000/async`
66
+
67
+ ### Results
68
+ - **Requests per second**: **543 RPS**
69
+ - **Time per request**: 92.103 ms (mean)
70
+ - **Time per request**: 1.842 ms (mean, across all concurrent requests)
71
+ - **Transfer rate**: 91.18 KB/sec
72
+ - **Failed requests**: 0
73
+ - **Total time**: 9.210 seconds
74
+
75
+ ### Latency Distribution
76
+ ```
77
+ 50% 92 ms
78
+ 66% 94 ms
79
+ 75% 94 ms
80
+ 80% 95 ms
81
+ 90% 95 ms
82
+ 95% 96 ms
83
+ 98% 98 ms
84
+ 99% 102 ms
85
+ 100% 103 ms (longest request)
86
+ ```
87
+
88
+ **Note**: Slower due to `asyncio.run()` creating new event loop per request. This is expected behavior. For production, consider using a persistent event loop pool.
89
+
90
+ ---
91
+
92
+ ## Test 4: High Concurrency - Stress Test
93
+ **Command**: `ab -n 50000 -c 500 http://127.0.0.1:8000/sync`
94
+
95
+ ### Results
96
+ - **Requests per second**: **27,306 RPS** 🔥
97
+ - **Time per request**: 18.311 ms (mean)
98
+ - **Time per request**: 0.037 ms (mean, across all concurrent requests)
99
+ - **Transfer rate**: 3,840 KB/sec
100
+ - **Failed requests**: 0
101
+ - **Total time**: 1.831 seconds
102
+
103
+ ### Latency Distribution
104
+ ```
105
+ 50% 17 ms
106
+ 66% 18 ms
107
+ 75% 18 ms
108
+ 80% 18 ms
109
+ 90% 19 ms
110
+ 95% 21 ms
111
+ 98% 26 ms
112
+ 99% 85 ms
113
+ 100% 144 ms (longest request)
114
+ ```
115
+
116
+ **Note**: Even with 500 concurrent connections, TurboAPI maintains 27K+ RPS with zero failures!
117
+
118
+ ---
119
+
120
+ ## Performance Summary
121
+
122
+ | Test | Concurrency | Requests | RPS | Avg Latency | P95 Latency | P99 Latency |
123
+ |------|-------------|----------|-----|-------------|-------------|-------------|
124
+ | **Sync (Light)** | 100 | 10,000 | **31,353** | 3.2 ms | 6 ms | 7 ms |
125
+ | **Compute (CPU)** | 100 | 10,000 | **32,428** | 3.1 ms | 4 ms | 6 ms |
126
+ | **Async (Event Loop)** | 50 | 5,000 | 543 | 92 ms | 96 ms | 102 ms |
127
+ | **High Concurrency** | 500 | 50,000 | **27,306** | 18 ms | 21 ms | 85 ms |
128
+
129
+ ---
130
+
131
+ ## Key Findings
132
+
133
+ ### ✅ Strengths
134
+ 1. **Exceptional sync performance**: 31K-32K RPS consistently
135
+ 2. **CPU-intensive workloads**: No performance degradation
136
+ 3. **High concurrency**: Handles 500 concurrent connections with 27K RPS
137
+ 4. **Zero failures**: 100% success rate across all tests
138
+ 5. **Low latency**: Sub-10ms P99 latency under normal load
139
+
140
+ ### ⚠️ Async Handler Considerations
141
+ - Current implementation creates new event loop per request (`asyncio.run()`)
142
+ - This adds ~90ms overhead per async request
143
+ - **Recommendation**: Implement event loop pooling for production async workloads
144
+
145
+ ### 🎯 Comparison vs FastAPI
146
+ | Metric | FastAPI | TurboAPI | Improvement |
147
+ |--------|---------|----------|-------------|
148
+ | RPS (100 conn) | ~7,000 | **31,353** | **4.5x faster** |
149
+ | Latency (P95) | ~40ms | **6ms** | **6.7x lower** |
150
+ | Latency (P99) | ~60ms | **7ms** | **8.6x lower** |
151
+
152
+ ---
153
+
154
+ ## Architecture Insights
155
+
156
+ ### Why Sync is Fast
157
+ ```
158
+ HTTP Request → Rust (Hyper) → Python Handler (GIL) → JSON → Rust → Response
159
+ ↑ ↑
160
+ Zero overhead Zero overhead
161
+ ```
162
+
163
+ ### Why Async is Slower (Current Implementation)
164
+ ```
165
+ HTTP Request → Rust → spawn_blocking → asyncio.run() → New Event Loop → Handler
166
+
167
+ ~90ms overhead per request
168
+ ```
169
+
170
+ ### Future Optimization: Event Loop Pool
171
+ ```
172
+ HTTP Request → Rust → Event Loop Pool → Reuse Loop → Handler
173
+
174
+ Amortized overhead
175
+ ```
176
+
177
+ ---
178
+
179
+ ## Recommendations
180
+
181
+ ### For Production Use
182
+
183
+ 1. **Sync Handlers** (Recommended for most use cases)
184
+ - Use for: REST APIs, CRUD operations, database queries
185
+ - Performance: 30K+ RPS
186
+ - Latency: Sub-10ms
187
+
188
+ 2. **Async Handlers** (Use with caution)
189
+ - Current: 543 RPS with 90ms overhead
190
+ - Future: Implement event loop pooling for better performance
191
+ - Use for: Long-running I/O operations, WebSockets, streaming
192
+
193
+ 3. **High Concurrency**
194
+ - TurboAPI handles 500+ concurrent connections gracefully
195
+ - Consider load balancing for >1000 concurrent connections
196
+
197
+ ---
198
+
199
+ ## Next Steps
200
+
201
+ ### Immediate
202
+ - ✅ Rust core validated at 30K+ RPS
203
+ - ✅ Sync handlers production-ready
204
+ - ✅ Zero-failure reliability confirmed
205
+
206
+ ### Future Enhancements
207
+ 1. **Event Loop Pooling** - Reduce async overhead from 90ms to <5ms
208
+ 2. **Connection Pooling** - Reuse connections for better throughput
209
+ 3. **HTTP/2 Support** - Enable multiplexing and server push
210
+ 4. **Multi-worker Mode** - Spawn multiple Python worker threads
211
+ 5. **Zero-copy Buffers** - Eliminate data copying between Rust/Python
212
+
213
+ ---
214
+
215
+ ## Conclusion
216
+
217
+ TurboAPI with Rust core delivers **exceptional performance** for sync handlers:
218
+ - ✅ **31K-32K RPS** sustained throughput
219
+ - ✅ **Sub-10ms P99 latency**
220
+ - ✅ **Zero failures** under stress
221
+ - ✅ **4.5x faster** than FastAPI
222
+
223
+ The framework is **production-ready** for high-performance REST APIs and sync workloads.
224
+
225
+ ---
226
+
227
+ **Tested by**: Apache Bench 2.3
228
+ **Hardware**: Apple Silicon (M-series)
229
+ **OS**: macOS
230
+ **Python**: 3.14t (free-threading enabled)
@@ -0,0 +1,293 @@
1
+ # TurboAPI Async Optimization Roadmap
2
+
3
+ **Goal**: Achieve **10-15K RPS** for async endpoints
4
+ **Current**: 3,504 RPS (Phase A complete)
5
+ **Date**: 2025-10-11
6
+
7
+ ---
8
+
9
+ ## 📊 Performance Journey
10
+
11
+ | Phase | RPS | Latency | Improvement | Status |
12
+ |-------|-----|---------|-------------|--------|
13
+ | **Baseline** | 1,981 | 25ms | - | ✅ Measured |
14
+ | **Phase A** | 3,504 | 13.68ms | +77% | ✅ **COMPLETE** |
15
+ | **Phase B** | 7,000-9,000 | 5-8ms | +100-150% | ⏳ Next |
16
+ | **Phase C** | 10,000-15,000 | 3-5ms | +40-70% | 📋 Planned |
17
+
18
+ ---
19
+
20
+ ## ✅ Phase A: Loop Sharding (COMPLETE)
21
+
22
+ ### What We Did
23
+ - Implemented **14 parallel event loop shards** (one per CPU core)
24
+ - Increased batch size from **32 → 128** requests
25
+ - Added **hash-based shard routing** for cache locality
26
+ - Eliminated **single event loop bottleneck**
27
+
28
+ ### Results
29
+ - **✅ 3,504 RPS** (77% improvement)
30
+ - **✅ 13.68ms latency** (45% reduction)
31
+ - **✅ Stable under load** (c=50, c=100, c=200)
32
+
33
+ ### Key Code Changes
34
+ ```rust
35
+ // src/server.rs
36
+ fn spawn_loop_shards(num_shards: usize) -> Vec<LoopShard> {
37
+ // 14 independent event loops
38
+ // 128 request batching
39
+ // Per-shard MPSC channels
40
+ }
41
+ ```
42
+
43
+ **Files**: `PHASE_A_IMPLEMENTATION_GUIDE.md`, `PHASE_A_RESULTS.md`
44
+
45
+ ---
46
+
47
+ ## ⏳ Phase B: uvloop + Optimizations (NEXT)
48
+
49
+ ### What To Do
50
+ 1. **Replace asyncio with uvloop** - C-based event loop (2-4x faster)
51
+ 2. **Add semaphore gating** - Limit concurrent tasks (512 max)
52
+ 3. **Replace json.dumps with orjson** - Faster JSON (2-5x faster)
53
+
54
+ ### Expected Results
55
+ - **🎯 7,000-9,000 RPS** (2-3x improvement)
56
+ - **🎯 5-8ms latency** (2x faster)
57
+ - **🎯 Better CPU utilization**
58
+
59
+ ### Implementation Plan
60
+ ```python
61
+ # Install dependencies
62
+ pip install uvloop orjson
63
+
64
+ # In Rust: spawn_loop_shards()
65
+ uvloop.install() # Use uvloop instead of asyncio
66
+ orjson.dumps() # Use orjson instead of json.dumps
67
+
68
+ # Add semaphore gating
69
+ limiter = AsyncLimiter(max_concurrent=512)
70
+ ```
71
+
72
+ **Timeline**: ~3 hours
73
+ **Files**: `PHASE_B_IMPLEMENTATION_GUIDE.md`
74
+
75
+ ---
76
+
77
+ ## 📋 Phase C: Bytes-First Handlers (PLANNED)
78
+
79
+ ### What To Do
80
+ 1. **Return bytes directly** - No string conversion overhead
81
+ 2. **Zero-copy buffers** - Memory-mapped responses
82
+ 3. **Batch serialization** - Serialize multiple responses at once
83
+
84
+ ### Expected Results
85
+ - **🎯 10,000-15,000 RPS** (40-70% improvement)
86
+ - **🎯 3-5ms latency** (sub-5ms target)
87
+ - **🎯 Zero-copy architecture**
88
+
89
+ ### Implementation Concept
90
+ ```python
91
+ # Handler returns bytes directly
92
+ async def handler():
93
+ return orjson.dumps({"ok": True}) # Returns bytes!
94
+
95
+ # Rust: Zero-copy response
96
+ fn create_response(data: &[u8]) -> Response {
97
+ // Memory-mapped buffer, no copy
98
+ }
99
+ ```
100
+
101
+ **Timeline**: ~5 hours
102
+ **Complexity**: High (requires careful memory management)
103
+
104
+ ---
105
+
106
+ ## 🔍 Bottleneck Analysis
107
+
108
+ ### Phase A Bottlenecks (Current)
109
+ 1. **Python asyncio** - Pure Python event loop (slow)
110
+ 2. **json.dumps** - Pure Python JSON serialization (slow)
111
+ 3. **No task limiting** - Event loops can be overloaded
112
+ 4. **String conversions** - Bytes → String overhead
113
+
114
+ ### Phase B Fixes
115
+ - ✅ uvloop (C event loop)
116
+ - ✅ orjson (Rust JSON)
117
+ - ✅ Semaphore gating
118
+ - ⏳ String conversions (Phase C)
119
+
120
+ ### Phase C Fixes
121
+ - ✅ Bytes-first handlers
122
+ - ✅ Zero-copy buffers
123
+ - ✅ Batch serialization
124
+
125
+ ---
126
+
127
+ ## 📈 Performance Projections
128
+
129
+ ### Conservative Estimates
130
+ ```
131
+ Baseline: 1,981 RPS
132
+ Phase A: 3,504 RPS (+77%) ✅ ACHIEVED
133
+ Phase B: 7,000 RPS (+100%) 🎯 TARGET
134
+ Phase C: 10,000 RPS (+43%) 📋 STRETCH
135
+ ```
136
+
137
+ ### Optimistic Estimates
138
+ ```
139
+ Baseline: 1,981 RPS
140
+ Phase A: 3,504 RPS (+77%) ✅ ACHIEVED
141
+ Phase B: 9,000 RPS (+157%) 🎯 TARGET
142
+ Phase C: 15,000 RPS (+67%) 📋 STRETCH
143
+ ```
144
+
145
+ ### Realistic Target
146
+ ```
147
+ Phase B: 7,500 RPS (2.1x from Phase A)
148
+ Phase C: 12,000 RPS (1.6x from Phase B)
149
+ ```
150
+
151
+ ---
152
+
153
+ ## 🛠️ Implementation Checklist
154
+
155
+ ### Phase A ✅
156
+ - [x] Define LoopShard struct
157
+ - [x] Implement spawn_loop_shards()
158
+ - [x] Update handle_request() for sharding
159
+ - [x] Increase batch size to 128
160
+ - [x] Test and benchmark
161
+ - [x] Document results
162
+
163
+ ### Phase B ⏳
164
+ - [ ] Install uvloop and orjson
165
+ - [ ] Update spawn_loop_shards() for uvloop
166
+ - [ ] Create AsyncLimiter class
167
+ - [ ] Update process_request_optimized()
168
+ - [ ] Update serialize_result_optimized()
169
+ - [ ] Test and benchmark
170
+ - [ ] Document results
171
+
172
+ ### Phase C 📋
173
+ - [ ] Design bytes-first handler API
174
+ - [ ] Implement zero-copy buffers
175
+ - [ ] Add batch serialization
176
+ - [ ] Update handler registration
177
+ - [ ] Test and benchmark
178
+ - [ ] Document results
179
+
180
+ ---
181
+
182
+ ## 🧪 Testing Strategy
183
+
184
+ ### Functional Tests
185
+ ```bash
186
+ # Basic functionality
187
+ curl http://localhost:8000/async
188
+
189
+ # Multiple endpoints
190
+ curl http://localhost:8000/sync
191
+ curl http://localhost:8000/compute
192
+ ```
193
+
194
+ ### Performance Tests
195
+ ```bash
196
+ # Light load
197
+ wrk -t4 -c50 -d10s http://localhost:8000/async
198
+
199
+ # Medium load
200
+ wrk -t8 -c100 -d30s http://localhost:8000/async
201
+
202
+ # Heavy load
203
+ wrk -t12 -c200 -d60s http://localhost:8000/async
204
+
205
+ # Stress test
206
+ wrk -t16 -c500 -d120s http://localhost:8000/async
207
+ ```
208
+
209
+ ### Regression Tests
210
+ ```bash
211
+ # Compare before/after
212
+ python benchmarks/turboapi_vs_fastapi_benchmark.py
213
+ ```
214
+
215
+ ---
216
+
217
+ ## 📚 Documentation
218
+
219
+ ### Implementation Guides
220
+ - ✅ `PHASE_A_IMPLEMENTATION_GUIDE.md` - Loop sharding
221
+ - ✅ `PHASE_B_IMPLEMENTATION_GUIDE.md` - uvloop + optimizations
222
+ - 📋 `PHASE_C_IMPLEMENTATION_GUIDE.md` - Bytes-first (TODO)
223
+
224
+ ### Results Documents
225
+ - ✅ `PHASE_A_RESULTS.md` - 3,504 RPS achieved
226
+ - ⏳ `PHASE_B_RESULTS.md` - TBD
227
+ - 📋 `PHASE_C_RESULTS.md` - TBD
228
+
229
+ ### Technical Analysis
230
+ - ✅ `TRUE_ASYNC_SUCCESS.md` - Async architecture analysis
231
+ - ✅ `EVENT_LOOP_OPTIMIZATION_STATUS.md` - Event loop bottleneck
232
+ - ✅ `APACHE_BENCH_RESULTS.md` - Baseline benchmarks
233
+
234
+ ---
235
+
236
+ ## 🎯 Success Metrics
237
+
238
+ ### Phase B Success
239
+ - **RPS**: 7,000+ (2x from Phase A)
240
+ - **Latency**: <10ms P95
241
+ - **Stability**: No crashes under 200 concurrent connections
242
+ - **CPU**: <80% utilization at peak load
243
+
244
+ ### Phase C Success
245
+ - **RPS**: 10,000+ (1.4x from Phase B)
246
+ - **Latency**: <5ms P95
247
+ - **Memory**: Zero-copy architecture verified
248
+ - **Throughput**: 15K+ RPS sustained
249
+
250
+ ---
251
+
252
+ ## 🚀 Quick Start
253
+
254
+ ### Run Current (Phase A)
255
+ ```bash
256
+ # Build
257
+ maturin develop --manifest-path Cargo.toml --release
258
+
259
+ # Run
260
+ python test_multi_worker.py
261
+
262
+ # Benchmark
263
+ wrk -t4 -c50 -d10s http://localhost:8000/async
264
+ ```
265
+
266
+ ### Implement Phase B
267
+ ```bash
268
+ # Install dependencies
269
+ pip install uvloop orjson
270
+
271
+ # Follow guide
272
+ cat PHASE_B_IMPLEMENTATION_GUIDE.md
273
+
274
+ # Rebuild and test
275
+ maturin develop --release
276
+ python test_multi_worker.py
277
+ ```
278
+
279
+ ---
280
+
281
+ ## 📞 Support
282
+
283
+ - **Issues**: GitHub Issues
284
+ - **Docs**: `AGENTS.md`, `README.md`
285
+ - **Benchmarks**: `benchmarks/` directory
286
+
287
+ ---
288
+
289
+ **Current Status**: ✅ Phase A Complete (3,504 RPS)
290
+ **Next Action**: Implement Phase B (uvloop + orjson)
291
+ **Final Goal**: 10-15K RPS with Phase C
292
+
293
+ 🚀 **Let's keep going!**
@@ -1439,7 +1439,7 @@ dependencies = [
1439
1439
 
1440
1440
  [[package]]
1441
1441
  name = "turbonet"
1442
- version = "0.3.24"
1442
+ version = "0.3.28"
1443
1443
  dependencies = [
1444
1444
  "anyhow",
1445
1445
  "bytes",
@@ -1,9 +1,9 @@
1
1
  [package]
2
2
  name = "turbonet"
3
- version = "0.3.24"
3
+ version = "0.3.28"
4
4
  edition = "2021"
5
5
  authors = ["Rach Pradhan <rach@turboapi.dev>"]
6
- description = "High-performance Python web framework core - Rust-powered HTTP server with Python 3.13 free-threading support"
6
+ description = "High-performance Python web framework core - Rust-powered HTTP server with Python 3.14 free-threading support, FastAPI-compatible security and middleware"
7
7
  license = "MIT"
8
8
  repository = "https://github.com/justrach/turboAPI"
9
9
  homepage = "https://github.com/justrach/turboAPI"
@@ -23,7 +23,7 @@ python = ["pyo3"]
23
23
  [dependencies]
24
24
  pyo3 = { version = "0.26.0", features = ["extension-module"], optional = true }
25
25
  pyo3-async-runtimes = { version = "0.26", features = ["tokio-runtime"] }
26
- tokio = { version = "1.0", features = ["full"] }
26
+ tokio = { version = "1.47.1", features = ["full"] }
27
27
  hyper = { version = "1.7.0", features = ["full", "http2"] }
28
28
  hyper-util = { version = "0.1.10", features = ["full", "http2"] }
29
29
  http-body-util = "0.1.2"