tinymlc 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- TinyMLC/ANG/__init__.py +0 -0
- TinyMLC/ANG/args.py +86 -0
- TinyMLC/ANG/estimator.py +103 -0
- TinyMLC/ANG/estimator_hal.py +184 -0
- TinyMLC/ANG/estimator_qemu.py +257 -0
- TinyMLC/ANG/estimator_software.py +130 -0
- TinyMLC/ANG/model_builder.py +508 -0
- TinyMLC/ANG/model_generator.py +439 -0
- TinyMLC/ANG/model_info.py +283 -0
- TinyMLC/ANG/utils.py +420 -0
- TinyMLC/__init__.py +0 -0
- TinyMLC/cli.py +126 -0
- TinyMLC/codegen.py +877 -0
- TinyMLC/converter/__init__.py +0 -0
- TinyMLC/converter/export_weights.py +382 -0
- TinyMLC/converter/parser_litert.py +757 -0
- TinyMLC/converter/parser_onnx.py +649 -0
- TinyMLC/generate_lut.py +97 -0
- TinyMLC/handlers.py +325 -0
- TinyMLC/ops.py +76 -0
- TinyMLC/templates/lut.c.tpl +23 -0
- TinyMLC/templates/lut.h.tpl +67 -0
- TinyMLC/templates/model.c.tpl +314 -0
- TinyMLC/templates/model.h.tpl +66 -0
- TinyMLC/transform/__init__.py +0 -0
- TinyMLC/transform/algebraic.py +286 -0
- TinyMLC/transform/base.py +58 -0
- TinyMLC/transform/constant_folding.py +260 -0
- TinyMLC/transform/cse.py +192 -0
- TinyMLC/transform/dce.py +182 -0
- TinyMLC/transform/fusion.py +723 -0
- TinyMLC/transform/memory.py +200 -0
- TinyMLC/transform/pass_manager.py +101 -0
- TinyMLC/transform/simplify.py +515 -0
- tinymlc-0.1.0.dist-info/METADATA +49 -0
- tinymlc-0.1.0.dist-info/RECORD +47 -0
- tinymlc-0.1.0.dist-info/WHEEL +4 -0
- tinymlc-0.1.0.dist-info/entry_points.txt +2 -0
- tinymlc-0.1.0.dist-info/licenses/LICENSE +201 -0
- utils/__init__.py +0 -0
- utils/arm-none-eabi-gcc.cmake +53 -0
- utils/dump.py +86 -0
- utils/generate_onnx_models.py +183 -0
- utils/generate_tflite_models.py +236 -0
- utils/pack_macos.sh +88 -0
- utils/path.py +31 -0
- utils/riscv-none-elf-gcc.cmake +50 -0
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
# TinyMLC - Tiny Machine Learning Compiler
|
|
3
|
+
#
|
|
4
|
+
# Copyright (c) 2026 Jia Liu & TinyMLC Contributors
|
|
5
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
6
|
+
#
|
|
7
|
+
# This file is part of TinyMLC.
|
|
8
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
9
|
+
# you may not use this file except in compliance with the License.
|
|
10
|
+
# You may obtain a copy of the License at:
|
|
11
|
+
#
|
|
12
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
13
|
+
#
|
|
14
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
15
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
16
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
17
|
+
# See the License for the specific language governing permissions and
|
|
18
|
+
# limitations under the License.
|
|
19
|
+
|
|
20
|
+
# Software-based estimator using MACs and parameter counts.
|
|
21
|
+
|
|
22
|
+
from typing import Dict, Any, Optional
|
|
23
|
+
from TinyMLC.ANG.estimator import Estimator
|
|
24
|
+
from TinyMLC.ANG.utils import (calculate_macs, calculate_params,
|
|
25
|
+
calculate_peak_ram)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class SoftwareEstimator(Estimator):
|
|
29
|
+
"""
|
|
30
|
+
Pure software estimator (open-loop).
|
|
31
|
+
|
|
32
|
+
This estimator uses mathematical formulas to compute:
|
|
33
|
+
- MACs (multiply-accumulate operations)
|
|
34
|
+
- Parameter count
|
|
35
|
+
- Peak RAM usage (estimated)
|
|
36
|
+
|
|
37
|
+
It does NOT require any hardware or simulation.
|
|
38
|
+
|
|
39
|
+
This is a "open-loop" estimator because there is no feedback
|
|
40
|
+
from actual hardware execution.
|
|
41
|
+
"""
|
|
42
|
+
|
|
43
|
+
def __init__(self, config: Optional[Dict[str, Any]] = None):
|
|
44
|
+
"""
|
|
45
|
+
Initialize the software estimator.
|
|
46
|
+
|
|
47
|
+
Args:
|
|
48
|
+
config: Configuration with max constraints and weighting.
|
|
49
|
+
"""
|
|
50
|
+
super().__init__(config)
|
|
51
|
+
|
|
52
|
+
self.default_config = {
|
|
53
|
+
"max_macs": 100000, # Maximum allowed MACs
|
|
54
|
+
"max_params": 50000, # Maximum allowed parameters
|
|
55
|
+
"max_ram": 32768, # Maximum RAM in bytes
|
|
56
|
+
"weight_macs": 0.4, # Weight for MACs in score
|
|
57
|
+
"weight_params": 0.3, # Weight for params in score
|
|
58
|
+
"weight_ram": 0.3, # Weight for RAM in score
|
|
59
|
+
"clock_speed": 100000000,
|
|
60
|
+
# Clock speed in Hz (for latency estimate)
|
|
61
|
+
}
|
|
62
|
+
# Merge with user config
|
|
63
|
+
self.config = {**self.default_config, **(config or {})}
|
|
64
|
+
|
|
65
|
+
def estimate(self, model_info: Dict[str, Any]) -> Dict[str, Any]:
|
|
66
|
+
"""
|
|
67
|
+
Estimate performance using pure software calculations.
|
|
68
|
+
|
|
69
|
+
Args:
|
|
70
|
+
model_info: ModelInfo dictionary.
|
|
71
|
+
|
|
72
|
+
Returns:
|
|
73
|
+
Dictionary with performance metrics.
|
|
74
|
+
"""
|
|
75
|
+
macs = calculate_macs(model_info)
|
|
76
|
+
params = calculate_params(model_info)
|
|
77
|
+
peak_ram = calculate_peak_ram(model_info)
|
|
78
|
+
|
|
79
|
+
# Estimate flash usage: params + overhead
|
|
80
|
+
# Overhead is approximated as 1KB for code and metadata
|
|
81
|
+
flash = params + 1024
|
|
82
|
+
|
|
83
|
+
# Estimate latency: MACs / clock speed (ideal case)
|
|
84
|
+
# This is a theoretical lower bound, not real-world latency
|
|
85
|
+
latency_ms = (macs / self.config["clock_speed"]) * 1000.0
|
|
86
|
+
|
|
87
|
+
# Calculate score (higher is better)
|
|
88
|
+
# Normalize each metric to [0, 1] range
|
|
89
|
+
max_macs = self.config["max_macs"]
|
|
90
|
+
max_params = self.config["max_params"]
|
|
91
|
+
max_ram = self.config["max_ram"]
|
|
92
|
+
|
|
93
|
+
# Avoid division by zero
|
|
94
|
+
macs_score = 1.0 - min(macs / max_macs, 1.0) if max_macs > 0 else 0.0
|
|
95
|
+
params_score = (
|
|
96
|
+
1.0 - min(params / max_params, 1.0) if max_params > 0 else 0.0
|
|
97
|
+
)
|
|
98
|
+
ram_score = 1.0 - min(peak_ram / max_ram, 1.0) if max_ram > 0 else 0.0
|
|
99
|
+
|
|
100
|
+
# Weighted combination
|
|
101
|
+
score = (
|
|
102
|
+
self.config["weight_macs"] * macs_score
|
|
103
|
+
+ self.config["weight_params"] * params_score
|
|
104
|
+
+ self.config["weight_ram"] * ram_score
|
|
105
|
+
) * 100.0 # Scale to 0-100
|
|
106
|
+
|
|
107
|
+
return {
|
|
108
|
+
"score": score,
|
|
109
|
+
"macs": macs,
|
|
110
|
+
"params": params,
|
|
111
|
+
"peak_ram": peak_ram,
|
|
112
|
+
"flash": flash,
|
|
113
|
+
"latency_ms": latency_ms,
|
|
114
|
+
"details": {
|
|
115
|
+
"estimator": "software",
|
|
116
|
+
"macs_score": macs_score,
|
|
117
|
+
"params_score": params_score,
|
|
118
|
+
"ram_score": ram_score,
|
|
119
|
+
},
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
def get_info(self) -> Dict[str, str]:
|
|
123
|
+
"""Get estimator information."""
|
|
124
|
+
return {
|
|
125
|
+
"name": "SoftwareEstimator",
|
|
126
|
+
"version": "1.0",
|
|
127
|
+
"type": "open_loop",
|
|
128
|
+
"description": "Pure software estimator using MACs and params",
|
|
129
|
+
"clock_speed": str(self.config["clock_speed"]),
|
|
130
|
+
}
|
|
@@ -0,0 +1,508 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
# TinyMLC - Tiny Machine Learning Compiler
|
|
3
|
+
#
|
|
4
|
+
# Copyright (c) 2026 Jia Liu & TinyMLC Contributors
|
|
5
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
6
|
+
#
|
|
7
|
+
# This file is part of TinyMLC.
|
|
8
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
9
|
+
# you may not use this file except in compliance with the License.
|
|
10
|
+
# You may obtain a copy of the License at:
|
|
11
|
+
#
|
|
12
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
13
|
+
#
|
|
14
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
15
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
16
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
17
|
+
# See the License for the specific language governing permissions and
|
|
18
|
+
# limitations under the License.
|
|
19
|
+
|
|
20
|
+
# Builder for constructing ModelInfo structures from scratch.
|
|
21
|
+
|
|
22
|
+
from typing import List, Optional, Dict
|
|
23
|
+
import numpy as np
|
|
24
|
+
|
|
25
|
+
from TinyMLC.ANG.model_info import ModelInfo, TensorSpec, Op
|
|
26
|
+
from TinyMLC.ANG.utils import (calculate_macs, calculate_params,
|
|
27
|
+
calculate_peak_ram, calculate_flash)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class ModelBuilder:
|
|
31
|
+
"""
|
|
32
|
+
Fluent builder for constructing ModelInfo structures.
|
|
33
|
+
|
|
34
|
+
This class handles:
|
|
35
|
+
- Sequential layer addition (Conv, Pool, FC, Upsample, Concat)
|
|
36
|
+
- Shape inference and tracking
|
|
37
|
+
- Weight initialization (random or provided)
|
|
38
|
+
- Final ModelInfo assembly
|
|
39
|
+
"""
|
|
40
|
+
|
|
41
|
+
def __init__(self, model_name: str = "ang_generated"):
|
|
42
|
+
self.model_name = model_name
|
|
43
|
+
self.inputs: List[TensorSpec] = []
|
|
44
|
+
self.outputs: List[TensorSpec] = []
|
|
45
|
+
self.ops: List[Op] = []
|
|
46
|
+
self.tensors: Dict[int, TensorSpec] = {}
|
|
47
|
+
self.weights: Dict[int, np.ndarray] = {}
|
|
48
|
+
self._next_tensor_index: int = 0
|
|
49
|
+
self._layer_counter: int = 0
|
|
50
|
+
|
|
51
|
+
# ============ Tensor Management ============
|
|
52
|
+
|
|
53
|
+
def _next_tensor(self) -> int:
|
|
54
|
+
"""Get the next available tensor index."""
|
|
55
|
+
idx = self._next_tensor_index
|
|
56
|
+
self._next_tensor_index += 1
|
|
57
|
+
return idx
|
|
58
|
+
|
|
59
|
+
def _next_layer_name(self, prefix: str) -> str:
|
|
60
|
+
"""Generate a unique layer name."""
|
|
61
|
+
self._layer_counter += 1
|
|
62
|
+
return f"{prefix}_{self._layer_counter}"
|
|
63
|
+
|
|
64
|
+
def add_tensor(
|
|
65
|
+
self,
|
|
66
|
+
name: str,
|
|
67
|
+
shape: List[int],
|
|
68
|
+
dtype: str = "int8",
|
|
69
|
+
weight: Optional[np.ndarray] = None,
|
|
70
|
+
scale: Optional[float] = None,
|
|
71
|
+
zero_point: Optional[int] = None,
|
|
72
|
+
) -> int:
|
|
73
|
+
"""
|
|
74
|
+
Add a tensor to the model.
|
|
75
|
+
|
|
76
|
+
Args:
|
|
77
|
+
name: Tensor name.
|
|
78
|
+
shape: Tensor shape.
|
|
79
|
+
dtype: Data type.
|
|
80
|
+
weight: Optional weight data.
|
|
81
|
+
scale: Optional quantization scale.
|
|
82
|
+
zero_point: Optional quantization zero point.
|
|
83
|
+
|
|
84
|
+
Returns:
|
|
85
|
+
The tensor index.
|
|
86
|
+
"""
|
|
87
|
+
if scale is None and dtype.startswith("int"):
|
|
88
|
+
# Default scale for int8: 1/256
|
|
89
|
+
scale = 1.0 / 256.0
|
|
90
|
+
zero_point = 0
|
|
91
|
+
|
|
92
|
+
spec = TensorSpec(
|
|
93
|
+
name=name,
|
|
94
|
+
shape=shape,
|
|
95
|
+
dtype=dtype,
|
|
96
|
+
scale=scale,
|
|
97
|
+
zero_point=zero_point,
|
|
98
|
+
)
|
|
99
|
+
idx = self._next_tensor()
|
|
100
|
+
self.tensors[idx] = spec
|
|
101
|
+
|
|
102
|
+
if weight is not None:
|
|
103
|
+
self.weights[idx] = weight
|
|
104
|
+
|
|
105
|
+
return idx
|
|
106
|
+
|
|
107
|
+
def add_input(
|
|
108
|
+
self,
|
|
109
|
+
name: str,
|
|
110
|
+
shape: List[int],
|
|
111
|
+
dtype: str = "int8",
|
|
112
|
+
scale: Optional[float] = None,
|
|
113
|
+
zero_point: Optional[int] = None,
|
|
114
|
+
) -> int:
|
|
115
|
+
"""Add an input tensor."""
|
|
116
|
+
if scale is None and dtype.startswith("int"):
|
|
117
|
+
scale = 1.0 / 256.0
|
|
118
|
+
zero_point = 0
|
|
119
|
+
idx = self._next_tensor()
|
|
120
|
+
|
|
121
|
+
spec = TensorSpec(
|
|
122
|
+
name=name,
|
|
123
|
+
shape=shape,
|
|
124
|
+
dtype=dtype,
|
|
125
|
+
tensor_index=idx,
|
|
126
|
+
scale=scale,
|
|
127
|
+
zero_point=zero_point,
|
|
128
|
+
)
|
|
129
|
+
self.inputs.append(spec)
|
|
130
|
+
self.tensors[idx] = spec
|
|
131
|
+
return idx
|
|
132
|
+
|
|
133
|
+
def add_output(
|
|
134
|
+
self,
|
|
135
|
+
name: str,
|
|
136
|
+
shape: List[int],
|
|
137
|
+
dtype: str = "int8",
|
|
138
|
+
scale: Optional[float] = None,
|
|
139
|
+
zero_point: Optional[int] = None,
|
|
140
|
+
) -> int:
|
|
141
|
+
"""Add an output tensor."""
|
|
142
|
+
if scale is None and dtype.startswith("int"):
|
|
143
|
+
scale = 1.0 / 256.0
|
|
144
|
+
zero_point = 0
|
|
145
|
+
idx = self._next_tensor()
|
|
146
|
+
|
|
147
|
+
spec = TensorSpec(
|
|
148
|
+
name=name,
|
|
149
|
+
shape=shape,
|
|
150
|
+
dtype=dtype,
|
|
151
|
+
tensor_index=idx,
|
|
152
|
+
scale=scale,
|
|
153
|
+
zero_point=zero_point,
|
|
154
|
+
)
|
|
155
|
+
self.outputs.append(spec)
|
|
156
|
+
self.tensors[idx] = spec
|
|
157
|
+
return idx
|
|
158
|
+
|
|
159
|
+
# ============ Layer Building ============
|
|
160
|
+
|
|
161
|
+
def add_conv(
|
|
162
|
+
self,
|
|
163
|
+
input_idx: int,
|
|
164
|
+
output_idx: int,
|
|
165
|
+
kernel_size: int,
|
|
166
|
+
channels_out: int,
|
|
167
|
+
stride: int = 1,
|
|
168
|
+
padding: str = "SAME",
|
|
169
|
+
activation: str = "relu",
|
|
170
|
+
weight: Optional[np.ndarray] = None,
|
|
171
|
+
bias: Optional[np.ndarray] = None,
|
|
172
|
+
) -> int:
|
|
173
|
+
"""
|
|
174
|
+
Add a Conv2D operation.
|
|
175
|
+
|
|
176
|
+
Returns:
|
|
177
|
+
The output tensor index (same as output_idx).
|
|
178
|
+
"""
|
|
179
|
+
input_spec = self.tensors.get(input_idx)
|
|
180
|
+
if input_spec is None:
|
|
181
|
+
raise ValueError(f"Input tensor {input_idx} not found")
|
|
182
|
+
|
|
183
|
+
if len(input_spec.shape) != 4:
|
|
184
|
+
# If not 4D, treat as 1D conv or raise
|
|
185
|
+
# For now, assume 2D conv on 4D input
|
|
186
|
+
pass
|
|
187
|
+
|
|
188
|
+
channels_in = input_spec.shape[3] if len(input_spec.shape) == 4 else 1
|
|
189
|
+
|
|
190
|
+
# Create weight tensor
|
|
191
|
+
weight_shape = [kernel_size, kernel_size, channels_in, channels_out]
|
|
192
|
+
weight_name = self._next_layer_name("conv_weight")
|
|
193
|
+
|
|
194
|
+
if weight is None:
|
|
195
|
+
weight = self._random_weight(weight_shape)
|
|
196
|
+
|
|
197
|
+
weight_idx = self.add_tensor(
|
|
198
|
+
name=weight_name,
|
|
199
|
+
shape=weight_shape,
|
|
200
|
+
dtype="int8",
|
|
201
|
+
weight=weight,
|
|
202
|
+
)
|
|
203
|
+
|
|
204
|
+
# Create bias tensor
|
|
205
|
+
bias_shape = [channels_out]
|
|
206
|
+
bias_name = self._next_layer_name("conv_bias")
|
|
207
|
+
|
|
208
|
+
if bias is None:
|
|
209
|
+
bias = self._random_bias(bias_shape)
|
|
210
|
+
|
|
211
|
+
bias_idx = self.add_tensor(
|
|
212
|
+
name=bias_name,
|
|
213
|
+
shape=bias_shape,
|
|
214
|
+
dtype="int32",
|
|
215
|
+
weight=bias,
|
|
216
|
+
)
|
|
217
|
+
|
|
218
|
+
# Get input shape for conv params
|
|
219
|
+
input_shape = input_spec.shape
|
|
220
|
+
output_shape = (
|
|
221
|
+
self.tensors.get(output_idx).shape
|
|
222
|
+
if output_idx in self.tensors
|
|
223
|
+
else input_shape
|
|
224
|
+
)
|
|
225
|
+
|
|
226
|
+
# Calculate output shape (simplified)
|
|
227
|
+
out_h = (
|
|
228
|
+
(input_shape[1] + 2 * 0 - kernel_size) // stride + 1
|
|
229
|
+
if len(input_shape) >= 2
|
|
230
|
+
else 1
|
|
231
|
+
)
|
|
232
|
+
out_w = (
|
|
233
|
+
(input_shape[2] + 2 * 0 - kernel_size) // stride + 1
|
|
234
|
+
if len(input_shape) >= 3
|
|
235
|
+
else 1
|
|
236
|
+
)
|
|
237
|
+
|
|
238
|
+
# Add Conv2D operation
|
|
239
|
+
op = Op(
|
|
240
|
+
op_name="CONV_2D",
|
|
241
|
+
input_indices=[input_idx, weight_idx, bias_idx],
|
|
242
|
+
output_indices=[output_idx],
|
|
243
|
+
params={
|
|
244
|
+
"data_input_idx": input_idx,
|
|
245
|
+
"conv_params": {
|
|
246
|
+
"input_h": input_shape[1] if len(input_shape) >= 2 else 1,
|
|
247
|
+
"input_w": input_shape[2] if len(input_shape) >= 3 else 1,
|
|
248
|
+
"input_c": (
|
|
249
|
+
input_shape[3] if len(input_shape) >= 4 else channels_in
|
|
250
|
+
),
|
|
251
|
+
"output_h": out_h,
|
|
252
|
+
"output_w": out_w,
|
|
253
|
+
"output_c": channels_out,
|
|
254
|
+
"kernel_h": kernel_size,
|
|
255
|
+
"kernel_w": kernel_size,
|
|
256
|
+
"stride_h": stride,
|
|
257
|
+
"stride_w": stride,
|
|
258
|
+
"padding_h": 0,
|
|
259
|
+
"padding_w": 0,
|
|
260
|
+
}
|
|
261
|
+
},
|
|
262
|
+
)
|
|
263
|
+
self.ops.append(op)
|
|
264
|
+
|
|
265
|
+
return output_idx
|
|
266
|
+
|
|
267
|
+
def add_fc(
|
|
268
|
+
self,
|
|
269
|
+
input_idx: int,
|
|
270
|
+
output_idx: int,
|
|
271
|
+
units: int,
|
|
272
|
+
activation: str = "relu",
|
|
273
|
+
weight: Optional[np.ndarray] = None,
|
|
274
|
+
bias: Optional[np.ndarray] = None,
|
|
275
|
+
) -> int:
|
|
276
|
+
"""Add a Fully Connected (Dense) operation."""
|
|
277
|
+
input_spec = self.tensors.get(input_idx)
|
|
278
|
+
if input_spec is None:
|
|
279
|
+
raise ValueError(f"Input tensor {input_idx} not found")
|
|
280
|
+
|
|
281
|
+
input_size = 1
|
|
282
|
+
for dim in input_spec.shape:
|
|
283
|
+
input_size *= dim
|
|
284
|
+
|
|
285
|
+
# Create weight tensor
|
|
286
|
+
weight_shape = [input_size, units]
|
|
287
|
+
weight_name = self._next_layer_name("fc_weight")
|
|
288
|
+
|
|
289
|
+
if weight is None:
|
|
290
|
+
weight = self._random_weight(weight_shape)
|
|
291
|
+
|
|
292
|
+
weight_idx = self.add_tensor(
|
|
293
|
+
name=weight_name,
|
|
294
|
+
shape=weight_shape,
|
|
295
|
+
dtype="int8",
|
|
296
|
+
weight=weight,
|
|
297
|
+
)
|
|
298
|
+
|
|
299
|
+
# Create bias tensor
|
|
300
|
+
bias_shape = [units]
|
|
301
|
+
bias_name = self._next_layer_name("fc_bias")
|
|
302
|
+
|
|
303
|
+
if bias is None:
|
|
304
|
+
bias = self._random_bias(bias_shape)
|
|
305
|
+
|
|
306
|
+
bias_idx = self.add_tensor(
|
|
307
|
+
name=bias_name,
|
|
308
|
+
shape=bias_shape,
|
|
309
|
+
dtype="int32",
|
|
310
|
+
weight=bias,
|
|
311
|
+
)
|
|
312
|
+
|
|
313
|
+
op = Op(
|
|
314
|
+
op_name="FULLY_CONNECTED",
|
|
315
|
+
input_indices=[input_idx, weight_idx, bias_idx],
|
|
316
|
+
output_indices=[output_idx],
|
|
317
|
+
params={
|
|
318
|
+
"data_input_idx": input_idx,
|
|
319
|
+
"fc_params": {
|
|
320
|
+
"units": units,
|
|
321
|
+
"activation": activation,
|
|
322
|
+
}
|
|
323
|
+
},
|
|
324
|
+
)
|
|
325
|
+
self.ops.append(op)
|
|
326
|
+
|
|
327
|
+
return output_idx
|
|
328
|
+
|
|
329
|
+
# ============ P2 New Ops (Detection / Segmentation) ============
|
|
330
|
+
|
|
331
|
+
def add_upsample(
|
|
332
|
+
self,
|
|
333
|
+
input_idx: int,
|
|
334
|
+
output_idx: int,
|
|
335
|
+
scale_factor: int = 2,
|
|
336
|
+
mode: str = "nearest",
|
|
337
|
+
) -> int:
|
|
338
|
+
"""
|
|
339
|
+
Add an Upsample operation.
|
|
340
|
+
|
|
341
|
+
Args:
|
|
342
|
+
input_idx: Input tensor index.
|
|
343
|
+
output_idx: Output tensor index.
|
|
344
|
+
scale_factor: Upsampling factor (2, 4, 8).
|
|
345
|
+
mode: Interpolation mode ("nearest" or "bilinear").
|
|
346
|
+
|
|
347
|
+
Returns:
|
|
348
|
+
The output tensor index (same as output_idx).
|
|
349
|
+
"""
|
|
350
|
+
op = Op(
|
|
351
|
+
op_name="UPSAMPLE_2D",
|
|
352
|
+
input_indices=[input_idx],
|
|
353
|
+
output_indices=[output_idx],
|
|
354
|
+
params={
|
|
355
|
+
"upsample_params": {
|
|
356
|
+
"scale_factor": scale_factor,
|
|
357
|
+
"mode": mode,
|
|
358
|
+
}
|
|
359
|
+
},
|
|
360
|
+
)
|
|
361
|
+
self.ops.append(op)
|
|
362
|
+
return output_idx
|
|
363
|
+
|
|
364
|
+
def add_concat(
|
|
365
|
+
self,
|
|
366
|
+
input_indices: List[int],
|
|
367
|
+
output_idx: int,
|
|
368
|
+
axis: int = -1,
|
|
369
|
+
) -> int:
|
|
370
|
+
"""
|
|
371
|
+
Add a Concat operation.
|
|
372
|
+
|
|
373
|
+
Args:
|
|
374
|
+
input_indices: List of input tensor indices.
|
|
375
|
+
output_idx: Output tensor index.
|
|
376
|
+
axis: Concatenation axis.
|
|
377
|
+
|
|
378
|
+
Returns:
|
|
379
|
+
The output tensor index (same as output_idx).
|
|
380
|
+
"""
|
|
381
|
+
op = Op(
|
|
382
|
+
op_name="CONCAT",
|
|
383
|
+
input_indices=input_indices,
|
|
384
|
+
output_indices=[output_idx],
|
|
385
|
+
params={
|
|
386
|
+
"concat_params": {
|
|
387
|
+
"axis": axis,
|
|
388
|
+
}
|
|
389
|
+
},
|
|
390
|
+
)
|
|
391
|
+
self.ops.append(op)
|
|
392
|
+
return output_idx
|
|
393
|
+
|
|
394
|
+
def add_add(
|
|
395
|
+
self,
|
|
396
|
+
input_indices: List[int],
|
|
397
|
+
output_idx: int,
|
|
398
|
+
) -> int:
|
|
399
|
+
"""
|
|
400
|
+
Add an Add operation (element-wise addition).
|
|
401
|
+
|
|
402
|
+
Args:
|
|
403
|
+
input_indices: List of input tensor indices (usually 2).
|
|
404
|
+
output_idx: Output tensor index.
|
|
405
|
+
|
|
406
|
+
Returns:
|
|
407
|
+
The output tensor index (same as output_idx).
|
|
408
|
+
"""
|
|
409
|
+
op = Op(
|
|
410
|
+
op_name="ADD",
|
|
411
|
+
input_indices=input_indices,
|
|
412
|
+
output_indices=[output_idx],
|
|
413
|
+
params={},
|
|
414
|
+
)
|
|
415
|
+
self.ops.append(op)
|
|
416
|
+
return output_idx
|
|
417
|
+
|
|
418
|
+
def add_detection_head(
|
|
419
|
+
self,
|
|
420
|
+
input_idx: int,
|
|
421
|
+
output_boxes_idx: int,
|
|
422
|
+
output_classes_idx: int,
|
|
423
|
+
num_anchors: int = 3,
|
|
424
|
+
num_classes: int = 10,
|
|
425
|
+
) -> None:
|
|
426
|
+
"""
|
|
427
|
+
Add a Detection Head.
|
|
428
|
+
|
|
429
|
+
This is a multi-output head used for object detection.
|
|
430
|
+
It produces:
|
|
431
|
+
- Box predictions: [N, num_anchors, 4]
|
|
432
|
+
- Class predictions: [N, num_anchors, num_classes]
|
|
433
|
+
|
|
434
|
+
Args:
|
|
435
|
+
input_idx: Input tensor index (feature map).
|
|
436
|
+
output_boxes_idx: Output tensor index for boxes.
|
|
437
|
+
output_classes_idx: Output tensor index for classes.
|
|
438
|
+
num_anchors: Number of anchors per location.
|
|
439
|
+
num_classes: Number of object classes.
|
|
440
|
+
"""
|
|
441
|
+
# Detection head typically is a set of conv layers
|
|
442
|
+
# For now, we add a placeholder op that captures the head
|
|
443
|
+
op = Op(
|
|
444
|
+
op_name="DETECTION_HEAD",
|
|
445
|
+
input_indices=[input_idx],
|
|
446
|
+
output_indices=[output_boxes_idx, output_classes_idx],
|
|
447
|
+
params={
|
|
448
|
+
"detection_params": {
|
|
449
|
+
"num_anchors": num_anchors,
|
|
450
|
+
"num_classes": num_classes,
|
|
451
|
+
}
|
|
452
|
+
},
|
|
453
|
+
)
|
|
454
|
+
self.ops.append(op)
|
|
455
|
+
|
|
456
|
+
# ============ Weight Initialization ============
|
|
457
|
+
|
|
458
|
+
def _random_weight(self, shape: List[int]) -> np.ndarray:
|
|
459
|
+
"""Generate random int8 weight."""
|
|
460
|
+
# Uniform distribution in [-0.5, 0.5] scaled to int8
|
|
461
|
+
weight = np.random.uniform(-0.5, 0.5, size=shape)
|
|
462
|
+
weight = np.clip(np.round(weight * 256), -128, 127).astype(np.int8)
|
|
463
|
+
return weight
|
|
464
|
+
|
|
465
|
+
def _random_bias(self, shape: List[int]) -> np.ndarray:
|
|
466
|
+
"""Generate random int32 bias."""
|
|
467
|
+
bias = np.random.uniform(-0.5, 0.5, size=shape)
|
|
468
|
+
bias = np.clip(np.round(bias * 256), -128, 127).astype(np.int32)
|
|
469
|
+
return bias
|
|
470
|
+
|
|
471
|
+
# ============ Build ============
|
|
472
|
+
|
|
473
|
+
def build(self) -> ModelInfo:
|
|
474
|
+
"""Build and validate the final ModelInfo."""
|
|
475
|
+
# Add index and set state to "generated" for all ops
|
|
476
|
+
for i, op in enumerate(self.ops):
|
|
477
|
+
op.index = i
|
|
478
|
+
op.state = "generated"
|
|
479
|
+
|
|
480
|
+
model_info = ModelInfo(
|
|
481
|
+
inputs=self.inputs,
|
|
482
|
+
outputs=self.outputs,
|
|
483
|
+
ops=self.ops,
|
|
484
|
+
tensors=self.tensors,
|
|
485
|
+
weights=self.weights,
|
|
486
|
+
quant_scales={},
|
|
487
|
+
)
|
|
488
|
+
|
|
489
|
+
# calc stat info.
|
|
490
|
+
model_dict = model_info.to_dict()
|
|
491
|
+
macs = calculate_macs(model_dict)
|
|
492
|
+
params = calculate_params(model_dict)
|
|
493
|
+
peak_ram = calculate_peak_ram(model_dict)
|
|
494
|
+
flash = calculate_flash(model_dict)
|
|
495
|
+
|
|
496
|
+
# Fill quant_scales
|
|
497
|
+
model_info.quant_scales = {
|
|
498
|
+
"macs": macs,
|
|
499
|
+
"params": params,
|
|
500
|
+
"peak_ram": peak_ram,
|
|
501
|
+
"flash": flash,
|
|
502
|
+
}
|
|
503
|
+
|
|
504
|
+
if not model_info.validate():
|
|
505
|
+
# Add more detailed validation
|
|
506
|
+
raise ValueError("Model validation failed")
|
|
507
|
+
|
|
508
|
+
return model_info
|