timber-compiler 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- timber_compiler-0.1.0/.gitignore +57 -0
- timber_compiler-0.1.0/LICENSE +19 -0
- timber_compiler-0.1.0/MANIFEST.in +5 -0
- timber_compiler-0.1.0/PKG-INFO +215 -0
- timber_compiler-0.1.0/README.md +179 -0
- timber_compiler-0.1.0/examples/bench_python.py +90 -0
- timber_compiler-0.1.0/examples/breast_cancer_model.json +1 -0
- timber_compiler-0.1.0/examples/compiled/CMakeLists.txt +22 -0
- timber_compiler-0.1.0/examples/compiled/Makefile +19 -0
- timber_compiler-0.1.0/examples/compiled/audit_report.json +163 -0
- timber_compiler-0.1.0/examples/compiled/bench.c +143 -0
- timber_compiler-0.1.0/examples/compiled/main.c +74 -0
- timber_compiler-0.1.0/examples/compiled/model.c +148 -0
- timber_compiler-0.1.0/examples/compiled/model.h +57 -0
- timber_compiler-0.1.0/examples/compiled/model.timber.json +9192 -0
- timber_compiler-0.1.0/examples/compiled/model_data.c +458 -0
- timber_compiler-0.1.0/examples/compiled/test_samples.csv +11 -0
- timber_compiler-0.1.0/examples/compiled_exact/CMakeLists.txt +22 -0
- timber_compiler-0.1.0/examples/compiled_exact/Makefile +19 -0
- timber_compiler-0.1.0/examples/compiled_exact/audit_report.json +44 -0
- timber_compiler-0.1.0/examples/compiled_exact/main.c +74 -0
- timber_compiler-0.1.0/examples/compiled_exact/model.c +148 -0
- timber_compiler-0.1.0/examples/compiled_exact/model.h +57 -0
- timber_compiler-0.1.0/examples/compiled_exact/model.timber.json +9159 -0
- timber_compiler-0.1.0/examples/compiled_exact/model_data.c +458 -0
- timber_compiler-0.1.0/examples/compiled_exact/test_samples.csv +11 -0
- timber_compiler-0.1.0/examples/diagnose_accuracy.py +38 -0
- timber_compiler-0.1.0/examples/diagnose_base_score.py +76 -0
- timber_compiler-0.1.0/examples/generate_sample.py +50 -0
- timber_compiler-0.1.0/examples/sample_model.json +181 -0
- timber_compiler-0.1.0/examples/test_ctypes_wrapper.py +59 -0
- timber_compiler-0.1.0/examples/test_samples.csv +11 -0
- timber_compiler-0.1.0/examples/train_and_save.py +41 -0
- timber_compiler-0.1.0/paper/timber_paper.pdf +0 -0
- timber_compiler-0.1.0/paper/timber_paper.tex +633 -0
- timber_compiler-0.1.0/pyproject.toml +62 -0
- timber_compiler-0.1.0/setup.cfg +4 -0
- timber_compiler-0.1.0/targets/arm64_neon.toml +12 -0
- timber_compiler-0.1.0/targets/x86_64_avx2.toml +12 -0
- timber_compiler-0.1.0/targets/x86_64_avx512.toml +12 -0
- timber_compiler-0.1.0/targets/x86_64_generic.toml +12 -0
- timber_compiler-0.1.0/tests/__init__.py +0 -0
- timber_compiler-0.1.0/tests/test_codegen.py +139 -0
- timber_compiler-0.1.0/tests/test_end_to_end.py +250 -0
- timber_compiler-0.1.0/tests/test_fuzz.py +205 -0
- timber_compiler-0.1.0/tests/test_ir.py +164 -0
- timber_compiler-0.1.0/tests/test_multiclass.py +174 -0
- timber_compiler-0.1.0/tests/test_optimizer.py +241 -0
- timber_compiler-0.1.0/tests/test_phase2.py +191 -0
- timber_compiler-0.1.0/tests/test_phase3.py +267 -0
- timber_compiler-0.1.0/tests/test_rigorous.py +371 -0
- timber_compiler-0.1.0/tests/test_sklearn_parser.py +135 -0
- timber_compiler-0.1.0/tests/test_store.py +152 -0
- timber_compiler-0.1.0/tests/test_xgboost_parser.py +144 -0
- timber_compiler-0.1.0/timber/__init__.py +3 -0
- timber_compiler-0.1.0/timber/audit/__init__.py +5 -0
- timber_compiler-0.1.0/timber/audit/report.py +150 -0
- timber_compiler-0.1.0/timber/cli.py +652 -0
- timber_compiler-0.1.0/timber/codegen/__init__.py +5 -0
- timber_compiler-0.1.0/timber/codegen/c99.py +535 -0
- timber_compiler-0.1.0/timber/codegen/misra_c.py +161 -0
- timber_compiler-0.1.0/timber/codegen/wasm.py +275 -0
- timber_compiler-0.1.0/timber/frontends/__init__.py +5 -0
- timber_compiler-0.1.0/timber/frontends/auto_detect.py +112 -0
- timber_compiler-0.1.0/timber/frontends/catboost_parser.py +142 -0
- timber_compiler-0.1.0/timber/frontends/lightgbm_parser.py +298 -0
- timber_compiler-0.1.0/timber/frontends/onnx_parser.py +287 -0
- timber_compiler-0.1.0/timber/frontends/sklearn_parser.py +418 -0
- timber_compiler-0.1.0/timber/frontends/xgboost_parser.py +243 -0
- timber_compiler-0.1.0/timber/ir/__init__.py +39 -0
- timber_compiler-0.1.0/timber/ir/ensemble_meta.py +111 -0
- timber_compiler-0.1.0/timber/ir/model.py +519 -0
- timber_compiler-0.1.0/timber/optimizer/__init__.py +5 -0
- timber_compiler-0.1.0/timber/optimizer/branch_sort.py +154 -0
- timber_compiler-0.1.0/timber/optimizer/constant_feature.py +89 -0
- timber_compiler-0.1.0/timber/optimizer/dead_leaf.py +127 -0
- timber_compiler-0.1.0/timber/optimizer/diff_compile.py +125 -0
- timber_compiler-0.1.0/timber/optimizer/pipeline.py +156 -0
- timber_compiler-0.1.0/timber/optimizer/pipeline_fusion.py +98 -0
- timber_compiler-0.1.0/timber/optimizer/threshold_quant.py +134 -0
- timber_compiler-0.1.0/timber/optimizer/vectorize.py +134 -0
- timber_compiler-0.1.0/timber/py.typed +0 -0
- timber_compiler-0.1.0/timber/runtime/__init__.py +5 -0
- timber_compiler-0.1.0/timber/runtime/predictor.py +285 -0
- timber_compiler-0.1.0/timber/serve.py +196 -0
- timber_compiler-0.1.0/timber/store.py +224 -0
- timber_compiler-0.1.0/timber_compiler.egg-info/PKG-INFO +215 -0
- timber_compiler-0.1.0/timber_compiler.egg-info/SOURCES.txt +91 -0
- timber_compiler-0.1.0/timber_compiler.egg-info/dependency_links.txt +1 -0
- timber_compiler-0.1.0/timber_compiler.egg-info/entry_points.txt +2 -0
- timber_compiler-0.1.0/timber_compiler.egg-info/requires.txt +11 -0
- timber_compiler-0.1.0/timber_compiler.egg-info/top_level.txt +1 -0
- timber_compiler-0.1.0/timber_technical_doc.md +522 -0
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
# Python
|
|
2
|
+
__pycache__/
|
|
3
|
+
*.py[cod]
|
|
4
|
+
*$py.class
|
|
5
|
+
*.so
|
|
6
|
+
*.egg-info/
|
|
7
|
+
*.egg
|
|
8
|
+
dist/
|
|
9
|
+
build/
|
|
10
|
+
*.whl
|
|
11
|
+
.eggs/
|
|
12
|
+
|
|
13
|
+
# Virtual environments
|
|
14
|
+
venv/
|
|
15
|
+
.venv/
|
|
16
|
+
env/
|
|
17
|
+
|
|
18
|
+
# IDE
|
|
19
|
+
.vscode/
|
|
20
|
+
.idea/
|
|
21
|
+
*.swp
|
|
22
|
+
*.swo
|
|
23
|
+
*~
|
|
24
|
+
.DS_Store
|
|
25
|
+
|
|
26
|
+
# Testing / benchmarks
|
|
27
|
+
.pytest_cache/
|
|
28
|
+
.benchmarks/
|
|
29
|
+
.coverage
|
|
30
|
+
htmlcov/
|
|
31
|
+
.tox/
|
|
32
|
+
.mypy_cache/
|
|
33
|
+
|
|
34
|
+
# LaTeX build artifacts
|
|
35
|
+
paper/*.aux
|
|
36
|
+
paper/*.log
|
|
37
|
+
paper/*.out
|
|
38
|
+
paper/*.synctex.gz
|
|
39
|
+
paper/*.fls
|
|
40
|
+
paper/*.fdb_latexmk
|
|
41
|
+
paper/*.blg
|
|
42
|
+
paper/*.bbl
|
|
43
|
+
|
|
44
|
+
# Compiled model artifacts (user-specific)
|
|
45
|
+
~/.timber/
|
|
46
|
+
|
|
47
|
+
# Word doc (not source)
|
|
48
|
+
*.docx
|
|
49
|
+
|
|
50
|
+
# Compiled binaries in examples
|
|
51
|
+
examples/compiled/timber_bench
|
|
52
|
+
examples/compiled/timber_infer
|
|
53
|
+
examples/compiled_exact/timber_infer
|
|
54
|
+
examples/**//*.so
|
|
55
|
+
examples/**//*.dylib
|
|
56
|
+
examples/**//*.dll
|
|
57
|
+
examples/**//*.o
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
Apache License
|
|
2
|
+
Version 2.0, January 2004
|
|
3
|
+
http://www.apache.org/licenses/
|
|
4
|
+
|
|
5
|
+
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
|
6
|
+
|
|
7
|
+
Licensed under the Apache License, Version 2.0 (the "License");
|
|
8
|
+
you may not use this file except in compliance with the License.
|
|
9
|
+
You may obtain a copy of the License at
|
|
10
|
+
|
|
11
|
+
http://www.apache.org/licenses/LICENSE-2.0
|
|
12
|
+
|
|
13
|
+
Unless required by applicable law or agreed to in writing, software
|
|
14
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
|
15
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
16
|
+
See the License for the specific language governing permissions and
|
|
17
|
+
limitations under the License.
|
|
18
|
+
|
|
19
|
+
Copyright 2024-2026 Kossiso Royce / Electricsheep Africa
|
|
@@ -0,0 +1,215 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: timber-compiler
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Classical ML Inference Compiler — compiles trained ML models into optimized native inference binaries
|
|
5
|
+
Author-email: Kossiso Royce <kossiso@electricsheep.africa>
|
|
6
|
+
License: Apache-2.0
|
|
7
|
+
Project-URL: Homepage, https://github.com/timber-compiler/timber
|
|
8
|
+
Project-URL: Documentation, https://github.com/timber-compiler/timber#readme
|
|
9
|
+
Project-URL: Repository, https://github.com/timber-compiler/timber
|
|
10
|
+
Project-URL: Issues, https://github.com/timber-compiler/timber/issues
|
|
11
|
+
Keywords: ml,compiler,inference,xgboost,lightgbm,sklearn
|
|
12
|
+
Classifier: Development Status :: 3 - Alpha
|
|
13
|
+
Classifier: Intended Audience :: Developers
|
|
14
|
+
Classifier: Intended Audience :: Science/Research
|
|
15
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
16
|
+
Classifier: Topic :: Software Development :: Compilers
|
|
17
|
+
Classifier: Programming Language :: Python :: 3
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
21
|
+
Classifier: Programming Language :: C
|
|
22
|
+
Classifier: License :: OSI Approved :: Apache Software License
|
|
23
|
+
Classifier: Operating System :: OS Independent
|
|
24
|
+
Requires-Python: >=3.10
|
|
25
|
+
Description-Content-Type: text/markdown
|
|
26
|
+
License-File: LICENSE
|
|
27
|
+
Requires-Dist: click>=8.1
|
|
28
|
+
Requires-Dist: numpy>=1.24
|
|
29
|
+
Requires-Dist: tomli>=2.0; python_version < "3.11"
|
|
30
|
+
Provides-Extra: dev
|
|
31
|
+
Requires-Dist: pytest>=7.0; extra == "dev"
|
|
32
|
+
Requires-Dist: xgboost>=1.7; extra == "dev"
|
|
33
|
+
Requires-Dist: lightgbm>=4.0; extra == "dev"
|
|
34
|
+
Requires-Dist: scikit-learn>=1.3; extra == "dev"
|
|
35
|
+
Dynamic: license-file
|
|
36
|
+
|
|
37
|
+
# Timber
|
|
38
|
+
|
|
39
|
+
**Ollama for classical ML models.**
|
|
40
|
+
|
|
41
|
+
Timber compiles trained tree-based models (XGBoost, LightGBM, scikit-learn, CatBoost, ONNX) into optimized native code and serves them over a local HTTP API — just like Ollama does for LLMs, but for small models.
|
|
42
|
+
|
|
43
|
+
No Python runtime at inference time. Sub-microsecond latency. One command to load, one command to serve.
|
|
44
|
+
|
|
45
|
+
## Quick Start
|
|
46
|
+
|
|
47
|
+
```bash
|
|
48
|
+
pip install timber
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
### Load a model
|
|
52
|
+
|
|
53
|
+
```bash
|
|
54
|
+
# Load any supported model — Timber auto-detects the format
|
|
55
|
+
timber load model.json
|
|
56
|
+
timber load model.json --name fraud-detector
|
|
57
|
+
timber load model.pkl --format sklearn
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
### Serve it
|
|
61
|
+
|
|
62
|
+
```bash
|
|
63
|
+
timber serve fraud-detector
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
```
|
|
67
|
+
_____ _ _
|
|
68
|
+
|_ _(_)_ __ ___ | |__ ___ _ __
|
|
69
|
+
| | | | '_ ` _ \| '_ \ / _ \ '__|
|
|
70
|
+
| | | | | | | | | |_) | __/ |
|
|
71
|
+
|_| |_|_| |_| |_|_.__/ \___|_|
|
|
72
|
+
|
|
73
|
+
Classical ML Inference Server v0.1.0
|
|
74
|
+
|
|
75
|
+
Listening on http://0.0.0.0:11434
|
|
76
|
+
Model: fraud-detector
|
|
77
|
+
Trees: 100
|
|
78
|
+
Features: 30
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
### Run inference
|
|
82
|
+
|
|
83
|
+
```bash
|
|
84
|
+
curl http://localhost:11434/api/predict \
|
|
85
|
+
-d '{"model": "fraud-detector", "inputs": [[1.0, 2.0, 3.0, ...]]}'
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
```json
|
|
89
|
+
{
|
|
90
|
+
"model": "fraud-detector",
|
|
91
|
+
"outputs": [0.97],
|
|
92
|
+
"n_samples": 1,
|
|
93
|
+
"latency_us": 0.8,
|
|
94
|
+
"done": true
|
|
95
|
+
}
|
|
96
|
+
```
|
|
97
|
+
|
|
98
|
+
### Manage models
|
|
99
|
+
|
|
100
|
+
```bash
|
|
101
|
+
timber list # list all loaded models
|
|
102
|
+
timber remove fraud-detector # remove a model
|
|
103
|
+
```
|
|
104
|
+
|
|
105
|
+
```
|
|
106
|
+
NAME FORMAT TREES FEATURES SIZE COMPILED
|
|
107
|
+
---------------------------------------------------------------------------
|
|
108
|
+
fraud-detector xgboost 100 30 42.1 KB yes
|
|
109
|
+
churn-model lightgbm 50 18 28.3 KB yes
|
|
110
|
+
```
|
|
111
|
+
|
|
112
|
+
## API Reference
|
|
113
|
+
|
|
114
|
+
| Endpoint | Method | Description |
|
|
115
|
+
|----------|--------|-------------|
|
|
116
|
+
| `/api/predict` | POST | Run inference — `{"model": "name", "inputs": [[...]]}` |
|
|
117
|
+
| `/api/generate` | POST | Alias for `/api/predict` (Ollama compat) |
|
|
118
|
+
| `/api/models` | GET | List loaded models |
|
|
119
|
+
| `/api/model/:name` | GET | Get model info |
|
|
120
|
+
| `/api/health` | GET | Health check |
|
|
121
|
+
|
|
122
|
+
## Supported Formats
|
|
123
|
+
|
|
124
|
+
| Format | Framework | File Types |
|
|
125
|
+
|--------|-----------|------------|
|
|
126
|
+
| XGBoost JSON | XGBoost | `.json` |
|
|
127
|
+
| LightGBM text | LightGBM | `.txt`, `.model`, `.lgb` |
|
|
128
|
+
| scikit-learn pickle | scikit-learn | `.pkl`, `.pickle` |
|
|
129
|
+
| ONNX ML opset | ONNX | `.onnx` |
|
|
130
|
+
| CatBoost JSON | CatBoost | `.json` |
|
|
131
|
+
|
|
132
|
+
All formats are auto-detected. Use `--format` to override.
|
|
133
|
+
|
|
134
|
+
## Advanced: Direct Compilation
|
|
135
|
+
|
|
136
|
+
For embedding in C/C++ projects without the server:
|
|
137
|
+
|
|
138
|
+
```bash
|
|
139
|
+
# Compile to C99 source
|
|
140
|
+
timber compile --model model.json --out ./dist/
|
|
141
|
+
|
|
142
|
+
# Inspect a model
|
|
143
|
+
timber inspect model.json
|
|
144
|
+
|
|
145
|
+
# Validate compiled output
|
|
146
|
+
timber validate --artifact ./dist/ --reference model.json --data test.csv
|
|
147
|
+
|
|
148
|
+
# Benchmark
|
|
149
|
+
timber bench --artifact ./dist/ --data bench.csv
|
|
150
|
+
```
|
|
151
|
+
|
|
152
|
+
### C API
|
|
153
|
+
|
|
154
|
+
```c
|
|
155
|
+
#include "model.h"
|
|
156
|
+
|
|
157
|
+
TimberCtx* ctx;
|
|
158
|
+
timber_init(&ctx);
|
|
159
|
+
|
|
160
|
+
float inputs[TIMBER_N_FEATURES] = { /* ... */ };
|
|
161
|
+
float outputs[TIMBER_N_OUTPUTS];
|
|
162
|
+
|
|
163
|
+
timber_infer_single(inputs, outputs, ctx);
|
|
164
|
+
timber_free(ctx);
|
|
165
|
+
```
|
|
166
|
+
|
|
167
|
+
### Logging Callback
|
|
168
|
+
|
|
169
|
+
```c
|
|
170
|
+
void my_logger(int level, const char* msg) {
|
|
171
|
+
printf("[timber] %s\n", msg);
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
timber_set_log_callback(my_logger);
|
|
175
|
+
```
|
|
176
|
+
|
|
177
|
+
## Compiler Pipeline
|
|
178
|
+
|
|
179
|
+
```
|
|
180
|
+
Model artifact → Front-end parser → Timber IR → Optimizer → Code generator → Native code
|
|
181
|
+
```
|
|
182
|
+
|
|
183
|
+
### Optimizer Passes
|
|
184
|
+
|
|
185
|
+
1. **Dead Leaf Elimination** — Prune negligible leaves
|
|
186
|
+
2. **Constant Feature Detection** — Fold trivial splits
|
|
187
|
+
3. **Threshold Quantization** — Classify thresholds for optimal storage
|
|
188
|
+
4. **Frequency-Ordered Branch Sorting** — Reorder for branch prediction (with calibration data)
|
|
189
|
+
5. **Pipeline Fusion** — Absorb scalers into tree thresholds
|
|
190
|
+
6. **Vectorization Analysis** — Identify SIMD batching opportunities
|
|
191
|
+
|
|
192
|
+
## Architecture
|
|
193
|
+
|
|
194
|
+
```
|
|
195
|
+
timber/
|
|
196
|
+
├── ir/ # Intermediate Representation
|
|
197
|
+
├── frontends/ # Model format parsers (xgboost, lightgbm, sklearn, onnx, catboost)
|
|
198
|
+
├── optimizer/ # IR optimization passes (6 passes)
|
|
199
|
+
├── codegen/ # Code generation (C99, WebAssembly, MISRA-C)
|
|
200
|
+
├── runtime/ # Python ctypes predictor
|
|
201
|
+
├── store.py # Local model registry (~/.timber/models/)
|
|
202
|
+
├── serve.py # HTTP inference server
|
|
203
|
+
└── cli.py # CLI (load, serve, list, remove, compile, inspect, ...)
|
|
204
|
+
```
|
|
205
|
+
|
|
206
|
+
## Development
|
|
207
|
+
|
|
208
|
+
```bash
|
|
209
|
+
pip install -e ".[dev]"
|
|
210
|
+
pytest tests/ -v # 144 tests
|
|
211
|
+
```
|
|
212
|
+
|
|
213
|
+
## License
|
|
214
|
+
|
|
215
|
+
Apache-2.0
|
|
@@ -0,0 +1,179 @@
|
|
|
1
|
+
# Timber
|
|
2
|
+
|
|
3
|
+
**Ollama for classical ML models.**
|
|
4
|
+
|
|
5
|
+
Timber compiles trained tree-based models (XGBoost, LightGBM, scikit-learn, CatBoost, ONNX) into optimized native code and serves them over a local HTTP API — just like Ollama does for LLMs, but for small models.
|
|
6
|
+
|
|
7
|
+
No Python runtime at inference time. Sub-microsecond latency. One command to load, one command to serve.
|
|
8
|
+
|
|
9
|
+
## Quick Start
|
|
10
|
+
|
|
11
|
+
```bash
|
|
12
|
+
pip install timber
|
|
13
|
+
```
|
|
14
|
+
|
|
15
|
+
### Load a model
|
|
16
|
+
|
|
17
|
+
```bash
|
|
18
|
+
# Load any supported model — Timber auto-detects the format
|
|
19
|
+
timber load model.json
|
|
20
|
+
timber load model.json --name fraud-detector
|
|
21
|
+
timber load model.pkl --format sklearn
|
|
22
|
+
```
|
|
23
|
+
|
|
24
|
+
### Serve it
|
|
25
|
+
|
|
26
|
+
```bash
|
|
27
|
+
timber serve fraud-detector
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
```
|
|
31
|
+
_____ _ _
|
|
32
|
+
|_ _(_)_ __ ___ | |__ ___ _ __
|
|
33
|
+
| | | | '_ ` _ \| '_ \ / _ \ '__|
|
|
34
|
+
| | | | | | | | | |_) | __/ |
|
|
35
|
+
|_| |_|_| |_| |_|_.__/ \___|_|
|
|
36
|
+
|
|
37
|
+
Classical ML Inference Server v0.1.0
|
|
38
|
+
|
|
39
|
+
Listening on http://0.0.0.0:11434
|
|
40
|
+
Model: fraud-detector
|
|
41
|
+
Trees: 100
|
|
42
|
+
Features: 30
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
### Run inference
|
|
46
|
+
|
|
47
|
+
```bash
|
|
48
|
+
curl http://localhost:11434/api/predict \
|
|
49
|
+
-d '{"model": "fraud-detector", "inputs": [[1.0, 2.0, 3.0, ...]]}'
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
```json
|
|
53
|
+
{
|
|
54
|
+
"model": "fraud-detector",
|
|
55
|
+
"outputs": [0.97],
|
|
56
|
+
"n_samples": 1,
|
|
57
|
+
"latency_us": 0.8,
|
|
58
|
+
"done": true
|
|
59
|
+
}
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
### Manage models
|
|
63
|
+
|
|
64
|
+
```bash
|
|
65
|
+
timber list # list all loaded models
|
|
66
|
+
timber remove fraud-detector # remove a model
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
```
|
|
70
|
+
NAME FORMAT TREES FEATURES SIZE COMPILED
|
|
71
|
+
---------------------------------------------------------------------------
|
|
72
|
+
fraud-detector xgboost 100 30 42.1 KB yes
|
|
73
|
+
churn-model lightgbm 50 18 28.3 KB yes
|
|
74
|
+
```
|
|
75
|
+
|
|
76
|
+
## API Reference
|
|
77
|
+
|
|
78
|
+
| Endpoint | Method | Description |
|
|
79
|
+
|----------|--------|-------------|
|
|
80
|
+
| `/api/predict` | POST | Run inference — `{"model": "name", "inputs": [[...]]}` |
|
|
81
|
+
| `/api/generate` | POST | Alias for `/api/predict` (Ollama compat) |
|
|
82
|
+
| `/api/models` | GET | List loaded models |
|
|
83
|
+
| `/api/model/:name` | GET | Get model info |
|
|
84
|
+
| `/api/health` | GET | Health check |
|
|
85
|
+
|
|
86
|
+
## Supported Formats
|
|
87
|
+
|
|
88
|
+
| Format | Framework | File Types |
|
|
89
|
+
|--------|-----------|------------|
|
|
90
|
+
| XGBoost JSON | XGBoost | `.json` |
|
|
91
|
+
| LightGBM text | LightGBM | `.txt`, `.model`, `.lgb` |
|
|
92
|
+
| scikit-learn pickle | scikit-learn | `.pkl`, `.pickle` |
|
|
93
|
+
| ONNX ML opset | ONNX | `.onnx` |
|
|
94
|
+
| CatBoost JSON | CatBoost | `.json` |
|
|
95
|
+
|
|
96
|
+
All formats are auto-detected. Use `--format` to override.
|
|
97
|
+
|
|
98
|
+
## Advanced: Direct Compilation
|
|
99
|
+
|
|
100
|
+
For embedding in C/C++ projects without the server:
|
|
101
|
+
|
|
102
|
+
```bash
|
|
103
|
+
# Compile to C99 source
|
|
104
|
+
timber compile --model model.json --out ./dist/
|
|
105
|
+
|
|
106
|
+
# Inspect a model
|
|
107
|
+
timber inspect model.json
|
|
108
|
+
|
|
109
|
+
# Validate compiled output
|
|
110
|
+
timber validate --artifact ./dist/ --reference model.json --data test.csv
|
|
111
|
+
|
|
112
|
+
# Benchmark
|
|
113
|
+
timber bench --artifact ./dist/ --data bench.csv
|
|
114
|
+
```
|
|
115
|
+
|
|
116
|
+
### C API
|
|
117
|
+
|
|
118
|
+
```c
|
|
119
|
+
#include "model.h"
|
|
120
|
+
|
|
121
|
+
TimberCtx* ctx;
|
|
122
|
+
timber_init(&ctx);
|
|
123
|
+
|
|
124
|
+
float inputs[TIMBER_N_FEATURES] = { /* ... */ };
|
|
125
|
+
float outputs[TIMBER_N_OUTPUTS];
|
|
126
|
+
|
|
127
|
+
timber_infer_single(inputs, outputs, ctx);
|
|
128
|
+
timber_free(ctx);
|
|
129
|
+
```
|
|
130
|
+
|
|
131
|
+
### Logging Callback
|
|
132
|
+
|
|
133
|
+
```c
|
|
134
|
+
void my_logger(int level, const char* msg) {
|
|
135
|
+
printf("[timber] %s\n", msg);
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
timber_set_log_callback(my_logger);
|
|
139
|
+
```
|
|
140
|
+
|
|
141
|
+
## Compiler Pipeline
|
|
142
|
+
|
|
143
|
+
```
|
|
144
|
+
Model artifact → Front-end parser → Timber IR → Optimizer → Code generator → Native code
|
|
145
|
+
```
|
|
146
|
+
|
|
147
|
+
### Optimizer Passes
|
|
148
|
+
|
|
149
|
+
1. **Dead Leaf Elimination** — Prune negligible leaves
|
|
150
|
+
2. **Constant Feature Detection** — Fold trivial splits
|
|
151
|
+
3. **Threshold Quantization** — Classify thresholds for optimal storage
|
|
152
|
+
4. **Frequency-Ordered Branch Sorting** — Reorder for branch prediction (with calibration data)
|
|
153
|
+
5. **Pipeline Fusion** — Absorb scalers into tree thresholds
|
|
154
|
+
6. **Vectorization Analysis** — Identify SIMD batching opportunities
|
|
155
|
+
|
|
156
|
+
## Architecture
|
|
157
|
+
|
|
158
|
+
```
|
|
159
|
+
timber/
|
|
160
|
+
├── ir/ # Intermediate Representation
|
|
161
|
+
├── frontends/ # Model format parsers (xgboost, lightgbm, sklearn, onnx, catboost)
|
|
162
|
+
├── optimizer/ # IR optimization passes (6 passes)
|
|
163
|
+
├── codegen/ # Code generation (C99, WebAssembly, MISRA-C)
|
|
164
|
+
├── runtime/ # Python ctypes predictor
|
|
165
|
+
├── store.py # Local model registry (~/.timber/models/)
|
|
166
|
+
├── serve.py # HTTP inference server
|
|
167
|
+
└── cli.py # CLI (load, serve, list, remove, compile, inspect, ...)
|
|
168
|
+
```
|
|
169
|
+
|
|
170
|
+
## Development
|
|
171
|
+
|
|
172
|
+
```bash
|
|
173
|
+
pip install -e ".[dev]"
|
|
174
|
+
pytest tests/ -v # 144 tests
|
|
175
|
+
```
|
|
176
|
+
|
|
177
|
+
## License
|
|
178
|
+
|
|
179
|
+
Apache-2.0
|
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
"""Benchmark XGBoost Python inference for comparison with Timber C99."""
|
|
2
|
+
import time
|
|
3
|
+
import numpy as np
|
|
4
|
+
import xgboost as xgb
|
|
5
|
+
from sklearn.datasets import load_breast_cancer
|
|
6
|
+
from sklearn.model_selection import train_test_split
|
|
7
|
+
|
|
8
|
+
WARMUP = 1000
|
|
9
|
+
ITERS = 10000
|
|
10
|
+
|
|
11
|
+
data = load_breast_cancer()
|
|
12
|
+
X, y = data.data.astype(np.float32), data.target
|
|
13
|
+
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
|
|
14
|
+
|
|
15
|
+
model = xgb.XGBClassifier(
|
|
16
|
+
n_estimators=50, max_depth=4, learning_rate=0.1,
|
|
17
|
+
objective="binary:logistic", random_state=42,
|
|
18
|
+
use_label_encoder=False, eval_metric="logloss",
|
|
19
|
+
)
|
|
20
|
+
model.fit(X_train, y_train)
|
|
21
|
+
booster = model.get_booster()
|
|
22
|
+
|
|
23
|
+
samples = X_test[:10]
|
|
24
|
+
|
|
25
|
+
print("XGBoost Python Inference Benchmark")
|
|
26
|
+
print("=" * 50)
|
|
27
|
+
print(f"Trees: {model.n_estimators}")
|
|
28
|
+
print(f"Features: {X_test.shape[1]}")
|
|
29
|
+
print(f"Samples: {len(samples)}")
|
|
30
|
+
print(f"Warmup: {WARMUP} iters")
|
|
31
|
+
print(f"Timed: {ITERS} iters")
|
|
32
|
+
print("=" * 50)
|
|
33
|
+
print()
|
|
34
|
+
|
|
35
|
+
# --- Single-sample (batch=1) ---
|
|
36
|
+
single = np.ascontiguousarray(samples[:1])
|
|
37
|
+
d_single = xgb.DMatrix(single)
|
|
38
|
+
|
|
39
|
+
for _ in range(WARMUP):
|
|
40
|
+
booster.predict(d_single)
|
|
41
|
+
|
|
42
|
+
latencies = []
|
|
43
|
+
for i in range(ITERS):
|
|
44
|
+
t0 = time.perf_counter_ns()
|
|
45
|
+
booster.predict(d_single)
|
|
46
|
+
t1 = time.perf_counter_ns()
|
|
47
|
+
latencies.append((t1 - t0) / 1000.0) # to microseconds
|
|
48
|
+
|
|
49
|
+
latencies.sort()
|
|
50
|
+
mean = sum(latencies) / len(latencies)
|
|
51
|
+
p50 = latencies[len(latencies) // 2]
|
|
52
|
+
p95 = latencies[int(len(latencies) * 0.95)]
|
|
53
|
+
p99 = latencies[int(len(latencies) * 0.99)]
|
|
54
|
+
|
|
55
|
+
print("Single-sample (batch=1):")
|
|
56
|
+
print(f" Mean: {mean:.2f} us")
|
|
57
|
+
print(f" P50: {p50:.2f} us")
|
|
58
|
+
print(f" P95: {p95:.2f} us")
|
|
59
|
+
print(f" P99: {p99:.2f} us")
|
|
60
|
+
print(f" Throughput: {1e6 / mean:.0f} samples/sec")
|
|
61
|
+
print()
|
|
62
|
+
|
|
63
|
+
# --- Batch inference ---
|
|
64
|
+
for bs in [1, 4, 10]:
|
|
65
|
+
batch = np.ascontiguousarray(samples[:bs])
|
|
66
|
+
d_batch = xgb.DMatrix(batch)
|
|
67
|
+
|
|
68
|
+
for _ in range(WARMUP):
|
|
69
|
+
booster.predict(d_batch)
|
|
70
|
+
|
|
71
|
+
latencies = []
|
|
72
|
+
for i in range(ITERS):
|
|
73
|
+
t0 = time.perf_counter_ns()
|
|
74
|
+
booster.predict(d_batch)
|
|
75
|
+
t1 = time.perf_counter_ns()
|
|
76
|
+
latencies.append((t1 - t0) / 1000.0)
|
|
77
|
+
|
|
78
|
+
latencies.sort()
|
|
79
|
+
mean = sum(latencies) / len(latencies)
|
|
80
|
+
p50 = latencies[len(latencies) // 2]
|
|
81
|
+
p95 = latencies[int(len(latencies) * 0.95)]
|
|
82
|
+
p99 = latencies[int(len(latencies) * 0.99)]
|
|
83
|
+
|
|
84
|
+
print(f"Batch={bs}:")
|
|
85
|
+
print(f" Mean: {mean:.2f} us")
|
|
86
|
+
print(f" P50: {p50:.2f} us")
|
|
87
|
+
print(f" P95: {p95:.2f} us")
|
|
88
|
+
print(f" P99: {p99:.2f} us")
|
|
89
|
+
print(f" Throughput: {bs * 1e6 / mean:.0f} samples/sec")
|
|
90
|
+
print()
|