timber-compiler 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (93) hide show
  1. timber_compiler-0.1.0/.gitignore +57 -0
  2. timber_compiler-0.1.0/LICENSE +19 -0
  3. timber_compiler-0.1.0/MANIFEST.in +5 -0
  4. timber_compiler-0.1.0/PKG-INFO +215 -0
  5. timber_compiler-0.1.0/README.md +179 -0
  6. timber_compiler-0.1.0/examples/bench_python.py +90 -0
  7. timber_compiler-0.1.0/examples/breast_cancer_model.json +1 -0
  8. timber_compiler-0.1.0/examples/compiled/CMakeLists.txt +22 -0
  9. timber_compiler-0.1.0/examples/compiled/Makefile +19 -0
  10. timber_compiler-0.1.0/examples/compiled/audit_report.json +163 -0
  11. timber_compiler-0.1.0/examples/compiled/bench.c +143 -0
  12. timber_compiler-0.1.0/examples/compiled/main.c +74 -0
  13. timber_compiler-0.1.0/examples/compiled/model.c +148 -0
  14. timber_compiler-0.1.0/examples/compiled/model.h +57 -0
  15. timber_compiler-0.1.0/examples/compiled/model.timber.json +9192 -0
  16. timber_compiler-0.1.0/examples/compiled/model_data.c +458 -0
  17. timber_compiler-0.1.0/examples/compiled/test_samples.csv +11 -0
  18. timber_compiler-0.1.0/examples/compiled_exact/CMakeLists.txt +22 -0
  19. timber_compiler-0.1.0/examples/compiled_exact/Makefile +19 -0
  20. timber_compiler-0.1.0/examples/compiled_exact/audit_report.json +44 -0
  21. timber_compiler-0.1.0/examples/compiled_exact/main.c +74 -0
  22. timber_compiler-0.1.0/examples/compiled_exact/model.c +148 -0
  23. timber_compiler-0.1.0/examples/compiled_exact/model.h +57 -0
  24. timber_compiler-0.1.0/examples/compiled_exact/model.timber.json +9159 -0
  25. timber_compiler-0.1.0/examples/compiled_exact/model_data.c +458 -0
  26. timber_compiler-0.1.0/examples/compiled_exact/test_samples.csv +11 -0
  27. timber_compiler-0.1.0/examples/diagnose_accuracy.py +38 -0
  28. timber_compiler-0.1.0/examples/diagnose_base_score.py +76 -0
  29. timber_compiler-0.1.0/examples/generate_sample.py +50 -0
  30. timber_compiler-0.1.0/examples/sample_model.json +181 -0
  31. timber_compiler-0.1.0/examples/test_ctypes_wrapper.py +59 -0
  32. timber_compiler-0.1.0/examples/test_samples.csv +11 -0
  33. timber_compiler-0.1.0/examples/train_and_save.py +41 -0
  34. timber_compiler-0.1.0/paper/timber_paper.pdf +0 -0
  35. timber_compiler-0.1.0/paper/timber_paper.tex +633 -0
  36. timber_compiler-0.1.0/pyproject.toml +62 -0
  37. timber_compiler-0.1.0/setup.cfg +4 -0
  38. timber_compiler-0.1.0/targets/arm64_neon.toml +12 -0
  39. timber_compiler-0.1.0/targets/x86_64_avx2.toml +12 -0
  40. timber_compiler-0.1.0/targets/x86_64_avx512.toml +12 -0
  41. timber_compiler-0.1.0/targets/x86_64_generic.toml +12 -0
  42. timber_compiler-0.1.0/tests/__init__.py +0 -0
  43. timber_compiler-0.1.0/tests/test_codegen.py +139 -0
  44. timber_compiler-0.1.0/tests/test_end_to_end.py +250 -0
  45. timber_compiler-0.1.0/tests/test_fuzz.py +205 -0
  46. timber_compiler-0.1.0/tests/test_ir.py +164 -0
  47. timber_compiler-0.1.0/tests/test_multiclass.py +174 -0
  48. timber_compiler-0.1.0/tests/test_optimizer.py +241 -0
  49. timber_compiler-0.1.0/tests/test_phase2.py +191 -0
  50. timber_compiler-0.1.0/tests/test_phase3.py +267 -0
  51. timber_compiler-0.1.0/tests/test_rigorous.py +371 -0
  52. timber_compiler-0.1.0/tests/test_sklearn_parser.py +135 -0
  53. timber_compiler-0.1.0/tests/test_store.py +152 -0
  54. timber_compiler-0.1.0/tests/test_xgboost_parser.py +144 -0
  55. timber_compiler-0.1.0/timber/__init__.py +3 -0
  56. timber_compiler-0.1.0/timber/audit/__init__.py +5 -0
  57. timber_compiler-0.1.0/timber/audit/report.py +150 -0
  58. timber_compiler-0.1.0/timber/cli.py +652 -0
  59. timber_compiler-0.1.0/timber/codegen/__init__.py +5 -0
  60. timber_compiler-0.1.0/timber/codegen/c99.py +535 -0
  61. timber_compiler-0.1.0/timber/codegen/misra_c.py +161 -0
  62. timber_compiler-0.1.0/timber/codegen/wasm.py +275 -0
  63. timber_compiler-0.1.0/timber/frontends/__init__.py +5 -0
  64. timber_compiler-0.1.0/timber/frontends/auto_detect.py +112 -0
  65. timber_compiler-0.1.0/timber/frontends/catboost_parser.py +142 -0
  66. timber_compiler-0.1.0/timber/frontends/lightgbm_parser.py +298 -0
  67. timber_compiler-0.1.0/timber/frontends/onnx_parser.py +287 -0
  68. timber_compiler-0.1.0/timber/frontends/sklearn_parser.py +418 -0
  69. timber_compiler-0.1.0/timber/frontends/xgboost_parser.py +243 -0
  70. timber_compiler-0.1.0/timber/ir/__init__.py +39 -0
  71. timber_compiler-0.1.0/timber/ir/ensemble_meta.py +111 -0
  72. timber_compiler-0.1.0/timber/ir/model.py +519 -0
  73. timber_compiler-0.1.0/timber/optimizer/__init__.py +5 -0
  74. timber_compiler-0.1.0/timber/optimizer/branch_sort.py +154 -0
  75. timber_compiler-0.1.0/timber/optimizer/constant_feature.py +89 -0
  76. timber_compiler-0.1.0/timber/optimizer/dead_leaf.py +127 -0
  77. timber_compiler-0.1.0/timber/optimizer/diff_compile.py +125 -0
  78. timber_compiler-0.1.0/timber/optimizer/pipeline.py +156 -0
  79. timber_compiler-0.1.0/timber/optimizer/pipeline_fusion.py +98 -0
  80. timber_compiler-0.1.0/timber/optimizer/threshold_quant.py +134 -0
  81. timber_compiler-0.1.0/timber/optimizer/vectorize.py +134 -0
  82. timber_compiler-0.1.0/timber/py.typed +0 -0
  83. timber_compiler-0.1.0/timber/runtime/__init__.py +5 -0
  84. timber_compiler-0.1.0/timber/runtime/predictor.py +285 -0
  85. timber_compiler-0.1.0/timber/serve.py +196 -0
  86. timber_compiler-0.1.0/timber/store.py +224 -0
  87. timber_compiler-0.1.0/timber_compiler.egg-info/PKG-INFO +215 -0
  88. timber_compiler-0.1.0/timber_compiler.egg-info/SOURCES.txt +91 -0
  89. timber_compiler-0.1.0/timber_compiler.egg-info/dependency_links.txt +1 -0
  90. timber_compiler-0.1.0/timber_compiler.egg-info/entry_points.txt +2 -0
  91. timber_compiler-0.1.0/timber_compiler.egg-info/requires.txt +11 -0
  92. timber_compiler-0.1.0/timber_compiler.egg-info/top_level.txt +1 -0
  93. timber_compiler-0.1.0/timber_technical_doc.md +522 -0
@@ -0,0 +1,57 @@
1
+ # Python
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+ *.so
6
+ *.egg-info/
7
+ *.egg
8
+ dist/
9
+ build/
10
+ *.whl
11
+ .eggs/
12
+
13
+ # Virtual environments
14
+ venv/
15
+ .venv/
16
+ env/
17
+
18
+ # IDE
19
+ .vscode/
20
+ .idea/
21
+ *.swp
22
+ *.swo
23
+ *~
24
+ .DS_Store
25
+
26
+ # Testing / benchmarks
27
+ .pytest_cache/
28
+ .benchmarks/
29
+ .coverage
30
+ htmlcov/
31
+ .tox/
32
+ .mypy_cache/
33
+
34
+ # LaTeX build artifacts
35
+ paper/*.aux
36
+ paper/*.log
37
+ paper/*.out
38
+ paper/*.synctex.gz
39
+ paper/*.fls
40
+ paper/*.fdb_latexmk
41
+ paper/*.blg
42
+ paper/*.bbl
43
+
44
+ # Compiled model artifacts (user-specific)
45
+ ~/.timber/
46
+
47
+ # Word doc (not source)
48
+ *.docx
49
+
50
+ # Compiled binaries in examples
51
+ examples/compiled/timber_bench
52
+ examples/compiled/timber_infer
53
+ examples/compiled_exact/timber_infer
54
+ examples/**//*.so
55
+ examples/**//*.dylib
56
+ examples/**//*.dll
57
+ examples/**//*.o
@@ -0,0 +1,19 @@
1
+ Apache License
2
+ Version 2.0, January 2004
3
+ http://www.apache.org/licenses/
4
+
5
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6
+
7
+ Licensed under the Apache License, Version 2.0 (the "License");
8
+ you may not use this file except in compliance with the License.
9
+ You may obtain a copy of the License at
10
+
11
+ http://www.apache.org/licenses/LICENSE-2.0
12
+
13
+ Unless required by applicable law or agreed to in writing, software
14
+ distributed under the License is distributed on an "AS IS" BASIS,
15
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16
+ See the License for the specific language governing permissions and
17
+ limitations under the License.
18
+
19
+ Copyright 2024-2026 Kossiso Royce / Electricsheep Africa
@@ -0,0 +1,5 @@
1
+ include README.md
2
+ include LICENSE
3
+ include pyproject.toml
4
+ recursive-include timber *.py py.typed
5
+ recursive-include targets *.toml
@@ -0,0 +1,215 @@
1
+ Metadata-Version: 2.4
2
+ Name: timber-compiler
3
+ Version: 0.1.0
4
+ Summary: Classical ML Inference Compiler — compiles trained ML models into optimized native inference binaries
5
+ Author-email: Kossiso Royce <kossiso@electricsheep.africa>
6
+ License: Apache-2.0
7
+ Project-URL: Homepage, https://github.com/timber-compiler/timber
8
+ Project-URL: Documentation, https://github.com/timber-compiler/timber#readme
9
+ Project-URL: Repository, https://github.com/timber-compiler/timber
10
+ Project-URL: Issues, https://github.com/timber-compiler/timber/issues
11
+ Keywords: ml,compiler,inference,xgboost,lightgbm,sklearn
12
+ Classifier: Development Status :: 3 - Alpha
13
+ Classifier: Intended Audience :: Developers
14
+ Classifier: Intended Audience :: Science/Research
15
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
16
+ Classifier: Topic :: Software Development :: Compilers
17
+ Classifier: Programming Language :: Python :: 3
18
+ Classifier: Programming Language :: Python :: 3.10
19
+ Classifier: Programming Language :: Python :: 3.11
20
+ Classifier: Programming Language :: Python :: 3.12
21
+ Classifier: Programming Language :: C
22
+ Classifier: License :: OSI Approved :: Apache Software License
23
+ Classifier: Operating System :: OS Independent
24
+ Requires-Python: >=3.10
25
+ Description-Content-Type: text/markdown
26
+ License-File: LICENSE
27
+ Requires-Dist: click>=8.1
28
+ Requires-Dist: numpy>=1.24
29
+ Requires-Dist: tomli>=2.0; python_version < "3.11"
30
+ Provides-Extra: dev
31
+ Requires-Dist: pytest>=7.0; extra == "dev"
32
+ Requires-Dist: xgboost>=1.7; extra == "dev"
33
+ Requires-Dist: lightgbm>=4.0; extra == "dev"
34
+ Requires-Dist: scikit-learn>=1.3; extra == "dev"
35
+ Dynamic: license-file
36
+
37
+ # Timber
38
+
39
+ **Ollama for classical ML models.**
40
+
41
+ Timber compiles trained tree-based models (XGBoost, LightGBM, scikit-learn, CatBoost, ONNX) into optimized native code and serves them over a local HTTP API — just like Ollama does for LLMs, but for small models.
42
+
43
+ No Python runtime at inference time. Sub-microsecond latency. One command to load, one command to serve.
44
+
45
+ ## Quick Start
46
+
47
+ ```bash
48
+ pip install timber
49
+ ```
50
+
51
+ ### Load a model
52
+
53
+ ```bash
54
+ # Load any supported model — Timber auto-detects the format
55
+ timber load model.json
56
+ timber load model.json --name fraud-detector
57
+ timber load model.pkl --format sklearn
58
+ ```
59
+
60
+ ### Serve it
61
+
62
+ ```bash
63
+ timber serve fraud-detector
64
+ ```
65
+
66
+ ```
67
+ _____ _ _
68
+ |_ _(_)_ __ ___ | |__ ___ _ __
69
+ | | | | '_ ` _ \| '_ \ / _ \ '__|
70
+ | | | | | | | | | |_) | __/ |
71
+ |_| |_|_| |_| |_|_.__/ \___|_|
72
+
73
+ Classical ML Inference Server v0.1.0
74
+
75
+ Listening on http://0.0.0.0:11434
76
+ Model: fraud-detector
77
+ Trees: 100
78
+ Features: 30
79
+ ```
80
+
81
+ ### Run inference
82
+
83
+ ```bash
84
+ curl http://localhost:11434/api/predict \
85
+ -d '{"model": "fraud-detector", "inputs": [[1.0, 2.0, 3.0, ...]]}'
86
+ ```
87
+
88
+ ```json
89
+ {
90
+ "model": "fraud-detector",
91
+ "outputs": [0.97],
92
+ "n_samples": 1,
93
+ "latency_us": 0.8,
94
+ "done": true
95
+ }
96
+ ```
97
+
98
+ ### Manage models
99
+
100
+ ```bash
101
+ timber list # list all loaded models
102
+ timber remove fraud-detector # remove a model
103
+ ```
104
+
105
+ ```
106
+ NAME FORMAT TREES FEATURES SIZE COMPILED
107
+ ---------------------------------------------------------------------------
108
+ fraud-detector xgboost 100 30 42.1 KB yes
109
+ churn-model lightgbm 50 18 28.3 KB yes
110
+ ```
111
+
112
+ ## API Reference
113
+
114
+ | Endpoint | Method | Description |
115
+ |----------|--------|-------------|
116
+ | `/api/predict` | POST | Run inference — `{"model": "name", "inputs": [[...]]}` |
117
+ | `/api/generate` | POST | Alias for `/api/predict` (Ollama compat) |
118
+ | `/api/models` | GET | List loaded models |
119
+ | `/api/model/:name` | GET | Get model info |
120
+ | `/api/health` | GET | Health check |
121
+
122
+ ## Supported Formats
123
+
124
+ | Format | Framework | File Types |
125
+ |--------|-----------|------------|
126
+ | XGBoost JSON | XGBoost | `.json` |
127
+ | LightGBM text | LightGBM | `.txt`, `.model`, `.lgb` |
128
+ | scikit-learn pickle | scikit-learn | `.pkl`, `.pickle` |
129
+ | ONNX ML opset | ONNX | `.onnx` |
130
+ | CatBoost JSON | CatBoost | `.json` |
131
+
132
+ All formats are auto-detected. Use `--format` to override.
133
+
134
+ ## Advanced: Direct Compilation
135
+
136
+ For embedding in C/C++ projects without the server:
137
+
138
+ ```bash
139
+ # Compile to C99 source
140
+ timber compile --model model.json --out ./dist/
141
+
142
+ # Inspect a model
143
+ timber inspect model.json
144
+
145
+ # Validate compiled output
146
+ timber validate --artifact ./dist/ --reference model.json --data test.csv
147
+
148
+ # Benchmark
149
+ timber bench --artifact ./dist/ --data bench.csv
150
+ ```
151
+
152
+ ### C API
153
+
154
+ ```c
155
+ #include "model.h"
156
+
157
+ TimberCtx* ctx;
158
+ timber_init(&ctx);
159
+
160
+ float inputs[TIMBER_N_FEATURES] = { /* ... */ };
161
+ float outputs[TIMBER_N_OUTPUTS];
162
+
163
+ timber_infer_single(inputs, outputs, ctx);
164
+ timber_free(ctx);
165
+ ```
166
+
167
+ ### Logging Callback
168
+
169
+ ```c
170
+ void my_logger(int level, const char* msg) {
171
+ printf("[timber] %s\n", msg);
172
+ }
173
+
174
+ timber_set_log_callback(my_logger);
175
+ ```
176
+
177
+ ## Compiler Pipeline
178
+
179
+ ```
180
+ Model artifact → Front-end parser → Timber IR → Optimizer → Code generator → Native code
181
+ ```
182
+
183
+ ### Optimizer Passes
184
+
185
+ 1. **Dead Leaf Elimination** — Prune negligible leaves
186
+ 2. **Constant Feature Detection** — Fold trivial splits
187
+ 3. **Threshold Quantization** — Classify thresholds for optimal storage
188
+ 4. **Frequency-Ordered Branch Sorting** — Reorder for branch prediction (with calibration data)
189
+ 5. **Pipeline Fusion** — Absorb scalers into tree thresholds
190
+ 6. **Vectorization Analysis** — Identify SIMD batching opportunities
191
+
192
+ ## Architecture
193
+
194
+ ```
195
+ timber/
196
+ ├── ir/ # Intermediate Representation
197
+ ├── frontends/ # Model format parsers (xgboost, lightgbm, sklearn, onnx, catboost)
198
+ ├── optimizer/ # IR optimization passes (6 passes)
199
+ ├── codegen/ # Code generation (C99, WebAssembly, MISRA-C)
200
+ ├── runtime/ # Python ctypes predictor
201
+ ├── store.py # Local model registry (~/.timber/models/)
202
+ ├── serve.py # HTTP inference server
203
+ └── cli.py # CLI (load, serve, list, remove, compile, inspect, ...)
204
+ ```
205
+
206
+ ## Development
207
+
208
+ ```bash
209
+ pip install -e ".[dev]"
210
+ pytest tests/ -v # 144 tests
211
+ ```
212
+
213
+ ## License
214
+
215
+ Apache-2.0
@@ -0,0 +1,179 @@
1
+ # Timber
2
+
3
+ **Ollama for classical ML models.**
4
+
5
+ Timber compiles trained tree-based models (XGBoost, LightGBM, scikit-learn, CatBoost, ONNX) into optimized native code and serves them over a local HTTP API — just like Ollama does for LLMs, but for small models.
6
+
7
+ No Python runtime at inference time. Sub-microsecond latency. One command to load, one command to serve.
8
+
9
+ ## Quick Start
10
+
11
+ ```bash
12
+ pip install timber
13
+ ```
14
+
15
+ ### Load a model
16
+
17
+ ```bash
18
+ # Load any supported model — Timber auto-detects the format
19
+ timber load model.json
20
+ timber load model.json --name fraud-detector
21
+ timber load model.pkl --format sklearn
22
+ ```
23
+
24
+ ### Serve it
25
+
26
+ ```bash
27
+ timber serve fraud-detector
28
+ ```
29
+
30
+ ```
31
+ _____ _ _
32
+ |_ _(_)_ __ ___ | |__ ___ _ __
33
+ | | | | '_ ` _ \| '_ \ / _ \ '__|
34
+ | | | | | | | | | |_) | __/ |
35
+ |_| |_|_| |_| |_|_.__/ \___|_|
36
+
37
+ Classical ML Inference Server v0.1.0
38
+
39
+ Listening on http://0.0.0.0:11434
40
+ Model: fraud-detector
41
+ Trees: 100
42
+ Features: 30
43
+ ```
44
+
45
+ ### Run inference
46
+
47
+ ```bash
48
+ curl http://localhost:11434/api/predict \
49
+ -d '{"model": "fraud-detector", "inputs": [[1.0, 2.0, 3.0, ...]]}'
50
+ ```
51
+
52
+ ```json
53
+ {
54
+ "model": "fraud-detector",
55
+ "outputs": [0.97],
56
+ "n_samples": 1,
57
+ "latency_us": 0.8,
58
+ "done": true
59
+ }
60
+ ```
61
+
62
+ ### Manage models
63
+
64
+ ```bash
65
+ timber list # list all loaded models
66
+ timber remove fraud-detector # remove a model
67
+ ```
68
+
69
+ ```
70
+ NAME FORMAT TREES FEATURES SIZE COMPILED
71
+ ---------------------------------------------------------------------------
72
+ fraud-detector xgboost 100 30 42.1 KB yes
73
+ churn-model lightgbm 50 18 28.3 KB yes
74
+ ```
75
+
76
+ ## API Reference
77
+
78
+ | Endpoint | Method | Description |
79
+ |----------|--------|-------------|
80
+ | `/api/predict` | POST | Run inference — `{"model": "name", "inputs": [[...]]}` |
81
+ | `/api/generate` | POST | Alias for `/api/predict` (Ollama compat) |
82
+ | `/api/models` | GET | List loaded models |
83
+ | `/api/model/:name` | GET | Get model info |
84
+ | `/api/health` | GET | Health check |
85
+
86
+ ## Supported Formats
87
+
88
+ | Format | Framework | File Types |
89
+ |--------|-----------|------------|
90
+ | XGBoost JSON | XGBoost | `.json` |
91
+ | LightGBM text | LightGBM | `.txt`, `.model`, `.lgb` |
92
+ | scikit-learn pickle | scikit-learn | `.pkl`, `.pickle` |
93
+ | ONNX ML opset | ONNX | `.onnx` |
94
+ | CatBoost JSON | CatBoost | `.json` |
95
+
96
+ All formats are auto-detected. Use `--format` to override.
97
+
98
+ ## Advanced: Direct Compilation
99
+
100
+ For embedding in C/C++ projects without the server:
101
+
102
+ ```bash
103
+ # Compile to C99 source
104
+ timber compile --model model.json --out ./dist/
105
+
106
+ # Inspect a model
107
+ timber inspect model.json
108
+
109
+ # Validate compiled output
110
+ timber validate --artifact ./dist/ --reference model.json --data test.csv
111
+
112
+ # Benchmark
113
+ timber bench --artifact ./dist/ --data bench.csv
114
+ ```
115
+
116
+ ### C API
117
+
118
+ ```c
119
+ #include "model.h"
120
+
121
+ TimberCtx* ctx;
122
+ timber_init(&ctx);
123
+
124
+ float inputs[TIMBER_N_FEATURES] = { /* ... */ };
125
+ float outputs[TIMBER_N_OUTPUTS];
126
+
127
+ timber_infer_single(inputs, outputs, ctx);
128
+ timber_free(ctx);
129
+ ```
130
+
131
+ ### Logging Callback
132
+
133
+ ```c
134
+ void my_logger(int level, const char* msg) {
135
+ printf("[timber] %s\n", msg);
136
+ }
137
+
138
+ timber_set_log_callback(my_logger);
139
+ ```
140
+
141
+ ## Compiler Pipeline
142
+
143
+ ```
144
+ Model artifact → Front-end parser → Timber IR → Optimizer → Code generator → Native code
145
+ ```
146
+
147
+ ### Optimizer Passes
148
+
149
+ 1. **Dead Leaf Elimination** — Prune negligible leaves
150
+ 2. **Constant Feature Detection** — Fold trivial splits
151
+ 3. **Threshold Quantization** — Classify thresholds for optimal storage
152
+ 4. **Frequency-Ordered Branch Sorting** — Reorder for branch prediction (with calibration data)
153
+ 5. **Pipeline Fusion** — Absorb scalers into tree thresholds
154
+ 6. **Vectorization Analysis** — Identify SIMD batching opportunities
155
+
156
+ ## Architecture
157
+
158
+ ```
159
+ timber/
160
+ ├── ir/ # Intermediate Representation
161
+ ├── frontends/ # Model format parsers (xgboost, lightgbm, sklearn, onnx, catboost)
162
+ ├── optimizer/ # IR optimization passes (6 passes)
163
+ ├── codegen/ # Code generation (C99, WebAssembly, MISRA-C)
164
+ ├── runtime/ # Python ctypes predictor
165
+ ├── store.py # Local model registry (~/.timber/models/)
166
+ ├── serve.py # HTTP inference server
167
+ └── cli.py # CLI (load, serve, list, remove, compile, inspect, ...)
168
+ ```
169
+
170
+ ## Development
171
+
172
+ ```bash
173
+ pip install -e ".[dev]"
174
+ pytest tests/ -v # 144 tests
175
+ ```
176
+
177
+ ## License
178
+
179
+ Apache-2.0
@@ -0,0 +1,90 @@
1
+ """Benchmark XGBoost Python inference for comparison with Timber C99."""
2
+ import time
3
+ import numpy as np
4
+ import xgboost as xgb
5
+ from sklearn.datasets import load_breast_cancer
6
+ from sklearn.model_selection import train_test_split
7
+
8
+ WARMUP = 1000
9
+ ITERS = 10000
10
+
11
+ data = load_breast_cancer()
12
+ X, y = data.data.astype(np.float32), data.target
13
+ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
14
+
15
+ model = xgb.XGBClassifier(
16
+ n_estimators=50, max_depth=4, learning_rate=0.1,
17
+ objective="binary:logistic", random_state=42,
18
+ use_label_encoder=False, eval_metric="logloss",
19
+ )
20
+ model.fit(X_train, y_train)
21
+ booster = model.get_booster()
22
+
23
+ samples = X_test[:10]
24
+
25
+ print("XGBoost Python Inference Benchmark")
26
+ print("=" * 50)
27
+ print(f"Trees: {model.n_estimators}")
28
+ print(f"Features: {X_test.shape[1]}")
29
+ print(f"Samples: {len(samples)}")
30
+ print(f"Warmup: {WARMUP} iters")
31
+ print(f"Timed: {ITERS} iters")
32
+ print("=" * 50)
33
+ print()
34
+
35
+ # --- Single-sample (batch=1) ---
36
+ single = np.ascontiguousarray(samples[:1])
37
+ d_single = xgb.DMatrix(single)
38
+
39
+ for _ in range(WARMUP):
40
+ booster.predict(d_single)
41
+
42
+ latencies = []
43
+ for i in range(ITERS):
44
+ t0 = time.perf_counter_ns()
45
+ booster.predict(d_single)
46
+ t1 = time.perf_counter_ns()
47
+ latencies.append((t1 - t0) / 1000.0) # to microseconds
48
+
49
+ latencies.sort()
50
+ mean = sum(latencies) / len(latencies)
51
+ p50 = latencies[len(latencies) // 2]
52
+ p95 = latencies[int(len(latencies) * 0.95)]
53
+ p99 = latencies[int(len(latencies) * 0.99)]
54
+
55
+ print("Single-sample (batch=1):")
56
+ print(f" Mean: {mean:.2f} us")
57
+ print(f" P50: {p50:.2f} us")
58
+ print(f" P95: {p95:.2f} us")
59
+ print(f" P99: {p99:.2f} us")
60
+ print(f" Throughput: {1e6 / mean:.0f} samples/sec")
61
+ print()
62
+
63
+ # --- Batch inference ---
64
+ for bs in [1, 4, 10]:
65
+ batch = np.ascontiguousarray(samples[:bs])
66
+ d_batch = xgb.DMatrix(batch)
67
+
68
+ for _ in range(WARMUP):
69
+ booster.predict(d_batch)
70
+
71
+ latencies = []
72
+ for i in range(ITERS):
73
+ t0 = time.perf_counter_ns()
74
+ booster.predict(d_batch)
75
+ t1 = time.perf_counter_ns()
76
+ latencies.append((t1 - t0) / 1000.0)
77
+
78
+ latencies.sort()
79
+ mean = sum(latencies) / len(latencies)
80
+ p50 = latencies[len(latencies) // 2]
81
+ p95 = latencies[int(len(latencies) * 0.95)]
82
+ p99 = latencies[int(len(latencies) * 0.99)]
83
+
84
+ print(f"Batch={bs}:")
85
+ print(f" Mean: {mean:.2f} us")
86
+ print(f" P50: {p50:.2f} us")
87
+ print(f" P95: {p95:.2f} us")
88
+ print(f" P99: {p99:.2f} us")
89
+ print(f" Throughput: {bs * 1e6 / mean:.0f} samples/sec")
90
+ print()