tinymlc 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- TinyMLC/ANG/__init__.py +0 -0
- TinyMLC/ANG/args.py +86 -0
- TinyMLC/ANG/estimator.py +103 -0
- TinyMLC/ANG/estimator_hal.py +184 -0
- TinyMLC/ANG/estimator_qemu.py +257 -0
- TinyMLC/ANG/estimator_software.py +130 -0
- TinyMLC/ANG/model_builder.py +508 -0
- TinyMLC/ANG/model_generator.py +439 -0
- TinyMLC/ANG/model_info.py +283 -0
- TinyMLC/ANG/utils.py +420 -0
- TinyMLC/__init__.py +0 -0
- TinyMLC/cli.py +126 -0
- TinyMLC/codegen.py +877 -0
- TinyMLC/converter/__init__.py +0 -0
- TinyMLC/converter/export_weights.py +382 -0
- TinyMLC/converter/parser_litert.py +757 -0
- TinyMLC/converter/parser_onnx.py +649 -0
- TinyMLC/generate_lut.py +97 -0
- TinyMLC/handlers.py +325 -0
- TinyMLC/ops.py +76 -0
- TinyMLC/templates/lut.c.tpl +23 -0
- TinyMLC/templates/lut.h.tpl +67 -0
- TinyMLC/templates/model.c.tpl +314 -0
- TinyMLC/templates/model.h.tpl +66 -0
- TinyMLC/transform/__init__.py +0 -0
- TinyMLC/transform/algebraic.py +286 -0
- TinyMLC/transform/base.py +58 -0
- TinyMLC/transform/constant_folding.py +260 -0
- TinyMLC/transform/cse.py +192 -0
- TinyMLC/transform/dce.py +182 -0
- TinyMLC/transform/fusion.py +723 -0
- TinyMLC/transform/memory.py +200 -0
- TinyMLC/transform/pass_manager.py +101 -0
- TinyMLC/transform/simplify.py +515 -0
- tinymlc-0.1.0.dist-info/METADATA +49 -0
- tinymlc-0.1.0.dist-info/RECORD +47 -0
- tinymlc-0.1.0.dist-info/WHEEL +4 -0
- tinymlc-0.1.0.dist-info/entry_points.txt +2 -0
- tinymlc-0.1.0.dist-info/licenses/LICENSE +201 -0
- utils/__init__.py +0 -0
- utils/arm-none-eabi-gcc.cmake +53 -0
- utils/dump.py +86 -0
- utils/generate_onnx_models.py +183 -0
- utils/generate_tflite_models.py +236 -0
- utils/pack_macos.sh +88 -0
- utils/path.py +31 -0
- utils/riscv-none-elf-gcc.cmake +50 -0
|
@@ -0,0 +1,283 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
# TinyMLC - Tiny Machine Learning Compiler
|
|
3
|
+
#
|
|
4
|
+
# Copyright (c) 2026 Jia Liu & TinyMLC Contributors
|
|
5
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
6
|
+
#
|
|
7
|
+
# This file is part of TinyMLC.
|
|
8
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
9
|
+
# you may not use this file except in compliance with the License.
|
|
10
|
+
# You may obtain a copy of the License at:
|
|
11
|
+
#
|
|
12
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
13
|
+
#
|
|
14
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
15
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
16
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
17
|
+
# See the License for the specific language governing permissions and
|
|
18
|
+
# limitations under the License.
|
|
19
|
+
|
|
20
|
+
# Unified Intermediate Representation (IR) for TinyMLC.
|
|
21
|
+
# This structure is used as the contract between all frontends
|
|
22
|
+
# (LiteRT, ONNX, ANG) and the backend code generator.
|
|
23
|
+
|
|
24
|
+
from typing import Dict, List, Optional, Any
|
|
25
|
+
import numpy as np
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class TensorSpec:
|
|
29
|
+
"""
|
|
30
|
+
Specification of a tensor in the model.
|
|
31
|
+
|
|
32
|
+
Attributes:
|
|
33
|
+
name: Unique identifier for the tensor.
|
|
34
|
+
shape: List of dimensions (batch, height, width, channels, ...).
|
|
35
|
+
dtype: Data type of tensor values (e.g., "int8", "int32", "float32").
|
|
36
|
+
scale: Quantization scale factor (optional, None for float models).
|
|
37
|
+
zero_point: Quantization zero point (optional, None for float models).
|
|
38
|
+
"""
|
|
39
|
+
|
|
40
|
+
def __init__(
|
|
41
|
+
self,
|
|
42
|
+
name: str,
|
|
43
|
+
shape: List[int],
|
|
44
|
+
dtype: str,
|
|
45
|
+
tensor_index: Optional[int] = None,
|
|
46
|
+
scale: Optional[float] = None,
|
|
47
|
+
zero_point: Optional[int] = None,
|
|
48
|
+
):
|
|
49
|
+
self.name = name
|
|
50
|
+
self.shape = shape
|
|
51
|
+
self.dtype = dtype
|
|
52
|
+
self.tensor_index = tensor_index
|
|
53
|
+
self.scale = scale
|
|
54
|
+
self.zero_point = zero_point
|
|
55
|
+
|
|
56
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
57
|
+
"""Convert to a dictionary representation."""
|
|
58
|
+
return {
|
|
59
|
+
"name": self.name,
|
|
60
|
+
"shape": self.shape,
|
|
61
|
+
"dtype": self.dtype,
|
|
62
|
+
"tensor_index": self.tensor_index,
|
|
63
|
+
"scale": self.scale,
|
|
64
|
+
"zero_point": self.zero_point,
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
class Op:
|
|
69
|
+
"""
|
|
70
|
+
A single operation/layer in the model.
|
|
71
|
+
|
|
72
|
+
Attributes:
|
|
73
|
+
index: Unique index for this operation.
|
|
74
|
+
op_name: Type of operation (e.g., "CONV_2D", "FULLY_CONNECTED").
|
|
75
|
+
input_indices: List of tensor indices that are inputs to this op.
|
|
76
|
+
output_indices: List of tensor indices that are outputs of this op.
|
|
77
|
+
params: Op-specific parameters (conv_params, fc_params, etc.).
|
|
78
|
+
state: Operator state ("created", "translated", "generated").
|
|
79
|
+
pass_flags: Dictionary of pass flags.
|
|
80
|
+
"""
|
|
81
|
+
|
|
82
|
+
def __init__(
|
|
83
|
+
self,
|
|
84
|
+
op_name: str,
|
|
85
|
+
input_indices: List[int],
|
|
86
|
+
output_indices: List[int],
|
|
87
|
+
params: Optional[Dict[str, Any]] = None,
|
|
88
|
+
index: Optional[int] = None,
|
|
89
|
+
state: str = "created",
|
|
90
|
+
):
|
|
91
|
+
self.index = index
|
|
92
|
+
self.op_name = op_name
|
|
93
|
+
self.input_indices = input_indices
|
|
94
|
+
self.output_indices = output_indices
|
|
95
|
+
self.params = params or {}
|
|
96
|
+
self.state = state
|
|
97
|
+
self.pass_flags = {}
|
|
98
|
+
|
|
99
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
100
|
+
"""Convert to a dictionary representation."""
|
|
101
|
+
result = {
|
|
102
|
+
"index": self.index,
|
|
103
|
+
"op_name": self.op_name,
|
|
104
|
+
"input_indices": self.input_indices,
|
|
105
|
+
"output_indices": self.output_indices,
|
|
106
|
+
"state": self.state,
|
|
107
|
+
"pass_flags": self.pass_flags,
|
|
108
|
+
}
|
|
109
|
+
result.update(self.params)
|
|
110
|
+
return result
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
class ModelInfo:
|
|
114
|
+
"""
|
|
115
|
+
Unified Intermediate Representation for neural network models.
|
|
116
|
+
|
|
117
|
+
This is the central data structure used throughout TinyMLC.
|
|
118
|
+
All frontends (LiteRT, ONNX, ANG) produce this structure.
|
|
119
|
+
The backend code generator consumes this structure.
|
|
120
|
+
|
|
121
|
+
Attributes:
|
|
122
|
+
inputs: List of input tensor specifications.
|
|
123
|
+
outputs: List of output tensor specifications.
|
|
124
|
+
ops: List of operations in execution order.
|
|
125
|
+
tensors: Dictionary mapping tensor index to TensorSpec.
|
|
126
|
+
weights: Dictionary mapping tensor index to numpy array data.
|
|
127
|
+
quant_scales: Global quantization scales (if not per-tensor).
|
|
128
|
+
"""
|
|
129
|
+
|
|
130
|
+
def __init__(
|
|
131
|
+
self,
|
|
132
|
+
inputs: List[TensorSpec],
|
|
133
|
+
outputs: List[TensorSpec],
|
|
134
|
+
ops: List[Op],
|
|
135
|
+
tensors: Dict[int, TensorSpec],
|
|
136
|
+
weights: Dict[int, np.ndarray],
|
|
137
|
+
quant_scales: Optional[Dict[str, Any]] = None,
|
|
138
|
+
):
|
|
139
|
+
self.inputs = inputs
|
|
140
|
+
self.outputs = outputs
|
|
141
|
+
self.ops = ops
|
|
142
|
+
self.tensors = tensors
|
|
143
|
+
self.weights = weights
|
|
144
|
+
self.quant_scales = quant_scales or {}
|
|
145
|
+
|
|
146
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
147
|
+
"""Convert the entire ModelInfo to a dictionary."""
|
|
148
|
+
return {
|
|
149
|
+
"input": [t.to_dict() for t in self.inputs],
|
|
150
|
+
"output": [t.to_dict() for t in self.outputs],
|
|
151
|
+
"ops": [op.to_dict() for op in self.ops],
|
|
152
|
+
"tensors": {
|
|
153
|
+
idx: spec.to_dict() for idx, spec in self.tensors.items()
|
|
154
|
+
},
|
|
155
|
+
"weights": {
|
|
156
|
+
str(idx): weight.tolist()
|
|
157
|
+
for idx, weight in self.weights.items()
|
|
158
|
+
},
|
|
159
|
+
"quant_scales": self.quant_scales,
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
def get_tensor(self, index: int) -> Optional[TensorSpec]:
|
|
163
|
+
"""Get tensor specification by index."""
|
|
164
|
+
return self.tensors.get(index)
|
|
165
|
+
|
|
166
|
+
def get_weight(self, index: int) -> Optional[np.ndarray]:
|
|
167
|
+
"""Get weight data by tensor index."""
|
|
168
|
+
return self.weights.get(index)
|
|
169
|
+
|
|
170
|
+
def add_tensor(
|
|
171
|
+
self,
|
|
172
|
+
index: int,
|
|
173
|
+
spec: TensorSpec,
|
|
174
|
+
weight: Optional[np.ndarray] = None,
|
|
175
|
+
) -> None:
|
|
176
|
+
"""
|
|
177
|
+
Add a tensor to the model.
|
|
178
|
+
|
|
179
|
+
Args:
|
|
180
|
+
index: Unique index for the tensor.
|
|
181
|
+
spec: TensorSpec describing the tensor.
|
|
182
|
+
weight: Optional numpy array with weight data.
|
|
183
|
+
"""
|
|
184
|
+
self.tensors[index] = spec
|
|
185
|
+
if weight is not None:
|
|
186
|
+
self.weights[index] = weight
|
|
187
|
+
|
|
188
|
+
def add_op(self, op: Op) -> None:
|
|
189
|
+
"""Add an operation to the model."""
|
|
190
|
+
self.ops.append(op)
|
|
191
|
+
|
|
192
|
+
def validate(self) -> bool:
|
|
193
|
+
"""
|
|
194
|
+
Validate the model for consistency.
|
|
195
|
+
|
|
196
|
+
Checks:
|
|
197
|
+
- All indices in ops refer to valid tensors.
|
|
198
|
+
- All weights have matching shapes with their tensor specs.
|
|
199
|
+
- Input/output lists are not empty.
|
|
200
|
+
|
|
201
|
+
Returns:
|
|
202
|
+
True if the model is valid, False otherwise.
|
|
203
|
+
"""
|
|
204
|
+
# Check that input and output are not empty
|
|
205
|
+
if not self.inputs or not self.outputs:
|
|
206
|
+
return False
|
|
207
|
+
|
|
208
|
+
# Check that all tensor indices in ops exist
|
|
209
|
+
all_tensor_indices = set(self.tensors.keys())
|
|
210
|
+
for op in self.ops:
|
|
211
|
+
for idx in op.input_indices + op.output_indices:
|
|
212
|
+
if idx not in all_tensor_indices:
|
|
213
|
+
return False
|
|
214
|
+
|
|
215
|
+
# Check that weights match tensor shapes
|
|
216
|
+
for idx, weight in self.weights.items():
|
|
217
|
+
if idx not in self.tensors:
|
|
218
|
+
return False
|
|
219
|
+
expected_shape = self.tensors[idx].shape
|
|
220
|
+
if list(weight.shape) != expected_shape:
|
|
221
|
+
# Allow broadcasting for scalar weights (shape mismatch is OK)
|
|
222
|
+
if len(weight.shape) != 0:
|
|
223
|
+
return False
|
|
224
|
+
|
|
225
|
+
return True
|
|
226
|
+
|
|
227
|
+
|
|
228
|
+
def default_quant_scale() -> float:
|
|
229
|
+
"""
|
|
230
|
+
Return the default quantization scale for int8 models.
|
|
231
|
+
|
|
232
|
+
This is the scale corresponding to 1/256, which is a common default
|
|
233
|
+
when no specific scale is provided.
|
|
234
|
+
|
|
235
|
+
Returns:
|
|
236
|
+
Default scale value.
|
|
237
|
+
"""
|
|
238
|
+
# 1/256 = 0.00390625
|
|
239
|
+
return 1.0 / 256.0
|
|
240
|
+
|
|
241
|
+
|
|
242
|
+
def default_zero_point() -> int:
|
|
243
|
+
"""
|
|
244
|
+
Return the default zero point for int8 models.
|
|
245
|
+
|
|
246
|
+
Returns:
|
|
247
|
+
Default zero point (0 for symmetric quantization).
|
|
248
|
+
"""
|
|
249
|
+
return 0
|
|
250
|
+
|
|
251
|
+
|
|
252
|
+
def create_default_tensor_spec(
|
|
253
|
+
name: str,
|
|
254
|
+
shape: List[int],
|
|
255
|
+
dtype: str = "int8",
|
|
256
|
+
) -> TensorSpec:
|
|
257
|
+
"""
|
|
258
|
+
Create a tensor specification with default quantization parameters.
|
|
259
|
+
|
|
260
|
+
Args:
|
|
261
|
+
name: Tensor name.
|
|
262
|
+
shape: Tensor shape.
|
|
263
|
+
dtype: Data type (default: "int8").
|
|
264
|
+
|
|
265
|
+
Returns:
|
|
266
|
+
A TensorSpec with default scale and zero point if quantized.
|
|
267
|
+
"""
|
|
268
|
+
if dtype.startswith("int"):
|
|
269
|
+
return TensorSpec(
|
|
270
|
+
name=name,
|
|
271
|
+
shape=shape,
|
|
272
|
+
dtype=dtype,
|
|
273
|
+
scale=default_quant_scale(),
|
|
274
|
+
zero_point=default_zero_point(),
|
|
275
|
+
)
|
|
276
|
+
else:
|
|
277
|
+
return TensorSpec(
|
|
278
|
+
name=name,
|
|
279
|
+
shape=shape,
|
|
280
|
+
dtype=dtype,
|
|
281
|
+
scale=None,
|
|
282
|
+
zero_point=None,
|
|
283
|
+
)
|
TinyMLC/ANG/utils.py
ADDED
|
@@ -0,0 +1,420 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
# TinyMLC - Tiny Machine Learning Compiler
|
|
3
|
+
#
|
|
4
|
+
# Copyright (c) 2026 Jia Liu & TinyMLC Contributors
|
|
5
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
6
|
+
#
|
|
7
|
+
# This file is part of TinyMLC.
|
|
8
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
9
|
+
# you may not use this file except in compliance with the License.
|
|
10
|
+
# You may obtain a copy of the License at:
|
|
11
|
+
#
|
|
12
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
13
|
+
#
|
|
14
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
15
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
16
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
17
|
+
# See the License for the specific language governing permissions and
|
|
18
|
+
# limitations under the License.
|
|
19
|
+
|
|
20
|
+
# Common utility functions used across ANG.
|
|
21
|
+
|
|
22
|
+
import copy
|
|
23
|
+
import json
|
|
24
|
+
import hashlib
|
|
25
|
+
from typing import Dict, Any, List, Optional
|
|
26
|
+
import numpy as np
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def hash_structure(structure: Dict[str, Any]) -> str:
|
|
30
|
+
"""
|
|
31
|
+
Compute a hash of a network structure (op sequence only, not weights).
|
|
32
|
+
|
|
33
|
+
This is used for caching and duplicate detection.
|
|
34
|
+
|
|
35
|
+
Args:
|
|
36
|
+
structure: Dictionary containing the network structure.
|
|
37
|
+
|
|
38
|
+
Returns:
|
|
39
|
+
SHA256 hash string.
|
|
40
|
+
"""
|
|
41
|
+
# Extract only the structure-defining parts
|
|
42
|
+
ops = structure.get("ops", [])
|
|
43
|
+
# Normalize to a stable representation
|
|
44
|
+
normalized = []
|
|
45
|
+
for op in ops:
|
|
46
|
+
op_copy = {
|
|
47
|
+
"op_name": op.get("op_name"),
|
|
48
|
+
"params": op.get("params", {}),
|
|
49
|
+
}
|
|
50
|
+
normalized.append(op_copy)
|
|
51
|
+
|
|
52
|
+
# Sort to ensure stability
|
|
53
|
+
normalized.sort(key=lambda x: str(x))
|
|
54
|
+
json_str = json.dumps(normalized, sort_keys=True)
|
|
55
|
+
return hashlib.sha256(json_str.encode()).hexdigest()
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
def calculate_macs(model_info: Dict[str, Any]) -> int:
|
|
59
|
+
"""
|
|
60
|
+
Calculate the total number of multiply-accumulate operations.
|
|
61
|
+
|
|
62
|
+
Args:
|
|
63
|
+
model_info: ModelInfo dictionary.
|
|
64
|
+
|
|
65
|
+
Returns:
|
|
66
|
+
Total MACs count.
|
|
67
|
+
"""
|
|
68
|
+
total = 0
|
|
69
|
+
ops = model_info.get("ops", [])
|
|
70
|
+
tensors = model_info.get("tensors", {})
|
|
71
|
+
|
|
72
|
+
for op in ops:
|
|
73
|
+
op_name = op.get("op_name")
|
|
74
|
+
|
|
75
|
+
if op_name == "CONV_2D":
|
|
76
|
+
conv_params = op.get("conv_params", {})
|
|
77
|
+
kernel_size = conv_params.get("kernel_size", 3)
|
|
78
|
+
stride = conv_params.get("stride", 1)
|
|
79
|
+
|
|
80
|
+
# Get input and output shapes
|
|
81
|
+
input_idx = op.get("input_indices", [0])[0]
|
|
82
|
+
output_idx = op.get("output_indices", [0])[0]
|
|
83
|
+
|
|
84
|
+
input_shape = tensors.get(input_idx, {}).get(
|
|
85
|
+
"shape", [1, 1, 1, 1]
|
|
86
|
+
)
|
|
87
|
+
output_shape = tensors.get(output_idx, {}).get(
|
|
88
|
+
"shape", [1, 1, 1, 1]
|
|
89
|
+
)
|
|
90
|
+
|
|
91
|
+
# NHWC format: [batch, height, width, channels]
|
|
92
|
+
if len(input_shape) >= 4 and len(output_shape) >= 4:
|
|
93
|
+
h = output_shape[1] if output_shape[1] else 1
|
|
94
|
+
w = output_shape[2] if output_shape[2] else 1
|
|
95
|
+
c_in = input_shape[3] if input_shape[3] else 1
|
|
96
|
+
c_out = output_shape[3] if output_shape[3] else 1
|
|
97
|
+
# MACs = H * W * C_in * C_out * K * K
|
|
98
|
+
# Bias is not counted as a MAC
|
|
99
|
+
total += h * w * c_in * c_out * kernel_size * kernel_size
|
|
100
|
+
|
|
101
|
+
elif op_name == "FULLY_CONNECTED":
|
|
102
|
+
fc_params = op.get("fc_params", {})
|
|
103
|
+
input_size = 1
|
|
104
|
+
output_size = fc_params.get("units", 64)
|
|
105
|
+
|
|
106
|
+
# Get input shape
|
|
107
|
+
input_idx = op.get("input_indices", [0])[0]
|
|
108
|
+
input_shape = tensors.get(input_idx, {}).get("shape", [])
|
|
109
|
+
for dim in input_shape:
|
|
110
|
+
input_size *= dim
|
|
111
|
+
|
|
112
|
+
# MACs = input_size * output_size
|
|
113
|
+
total += input_size * output_size
|
|
114
|
+
|
|
115
|
+
elif op_name == "DEPTHWISE_CONV_2D":
|
|
116
|
+
# Similar to CONV_2D but with groups = C_in = C_out
|
|
117
|
+
dw_params = op.get("dw_params", {})
|
|
118
|
+
kernel_size = dw_params.get("kernel_size", 3)
|
|
119
|
+
|
|
120
|
+
input_idx = op.get("input_indices", [0])[0]
|
|
121
|
+
output_idx = op.get("output_indices", [0])[0]
|
|
122
|
+
|
|
123
|
+
input_shape = tensors.get(input_idx, {}).get(
|
|
124
|
+
"shape", [1, 1, 1, 1]
|
|
125
|
+
)
|
|
126
|
+
output_shape = tensors.get(output_idx, {}).get(
|
|
127
|
+
"shape", [1, 1, 1, 1]
|
|
128
|
+
)
|
|
129
|
+
|
|
130
|
+
if len(input_shape) >= 4 and len(output_shape) >= 4:
|
|
131
|
+
h = output_shape[1] if output_shape[1] else 1
|
|
132
|
+
w = output_shape[2] if output_shape[2] else 1
|
|
133
|
+
c = input_shape[3] if input_shape[3] else 1
|
|
134
|
+
# MACs = H * W * C * K * K (no C_out multiplier)
|
|
135
|
+
total += h * w * c * kernel_size * kernel_size
|
|
136
|
+
elif op_name == "UPSAMPLE_2D":
|
|
137
|
+
# Upsample has no MACs, just interpolation.
|
|
138
|
+
# Computation is 0, but reserved for potential future weighting.
|
|
139
|
+
total += 0
|
|
140
|
+
|
|
141
|
+
elif op_name == "CONCAT":
|
|
142
|
+
# Concat has no MACs; it's just memory concatenation.
|
|
143
|
+
total += 0
|
|
144
|
+
|
|
145
|
+
elif op_name == "ADD":
|
|
146
|
+
# Add is element-wise and typically doesn't count as MACs
|
|
147
|
+
# (though some implementations count 1 op/element).
|
|
148
|
+
# We conservatively estimate it as 0 here.
|
|
149
|
+
total += 0
|
|
150
|
+
|
|
151
|
+
elif op_name == "DETECTION_HEAD":
|
|
152
|
+
# Detection heads usually have 1-2 conv layers.
|
|
153
|
+
# We simplify this by reading from params if available.
|
|
154
|
+
# Otherwise, we estimate it as 0 for generality,
|
|
155
|
+
# as the true MACs are covered by internal convolutions.
|
|
156
|
+
total += 0
|
|
157
|
+
|
|
158
|
+
elif op_name in ["CONV_TRANSPOSE", "TRANSPOSED_CONV"]:
|
|
159
|
+
# Transposed convolution: Same calculation as standard convolution,
|
|
160
|
+
# but reversed, enlarging H/W.
|
|
161
|
+
conv_params = op.get("conv_params", {})
|
|
162
|
+
kernel_size = conv_params.get("kernel_size", 3)
|
|
163
|
+
stride = conv_params.get("stride", 2)
|
|
164
|
+
|
|
165
|
+
input_idx = op.get("input_indices", [0])[0]
|
|
166
|
+
output_idx = op.get("output_indices", [0])[0]
|
|
167
|
+
|
|
168
|
+
input_shape = tensors.get(input_idx, {}).get("shape", [1, 1, 1, 1])
|
|
169
|
+
output_shape = tensors.get(output_idx, {}).get("shape",
|
|
170
|
+
[1, 1, 1, 1])
|
|
171
|
+
|
|
172
|
+
if len(input_shape) >= 4 and len(output_shape) >= 4:
|
|
173
|
+
h = output_shape[1] if output_shape[1] else 1
|
|
174
|
+
w = output_shape[2] if output_shape[2] else 1
|
|
175
|
+
c_in = input_shape[3] if input_shape[3] else 1
|
|
176
|
+
c_out = output_shape[3] if output_shape[3] else 1
|
|
177
|
+
# MACs of transposed convolution: H * W * C_in * C_out * K * K
|
|
178
|
+
total += h * w * c_in * c_out * kernel_size * kernel_size
|
|
179
|
+
|
|
180
|
+
return total
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
def calculate_params(model_info: Dict[str, Any]) -> int:
|
|
184
|
+
"""
|
|
185
|
+
Calculate the total number of parameters.
|
|
186
|
+
|
|
187
|
+
Args:
|
|
188
|
+
model_info: ModelInfo dictionary.
|
|
189
|
+
|
|
190
|
+
Returns:
|
|
191
|
+
Total parameter count.
|
|
192
|
+
"""
|
|
193
|
+
total = 0
|
|
194
|
+
weights = model_info.get("weights", {})
|
|
195
|
+
|
|
196
|
+
for weight in weights.values():
|
|
197
|
+
# Weight is a numpy array or a list
|
|
198
|
+
if hasattr(weight, "size"):
|
|
199
|
+
total += weight.size
|
|
200
|
+
elif isinstance(weight, list):
|
|
201
|
+
total += len(weight)
|
|
202
|
+
|
|
203
|
+
return total
|
|
204
|
+
|
|
205
|
+
|
|
206
|
+
def calculate_peak_ram(model_info: Dict[str, Any]) -> int:
|
|
207
|
+
"""
|
|
208
|
+
Calculate the peak RAM usage of the model.
|
|
209
|
+
|
|
210
|
+
This is the maximum size of all tensors that need to be held in
|
|
211
|
+
memory simultaneously at any point during inference.
|
|
212
|
+
|
|
213
|
+
Args:
|
|
214
|
+
model_info: ModelInfo dictionary.
|
|
215
|
+
|
|
216
|
+
Returns:
|
|
217
|
+
Peak RAM usage in bytes.
|
|
218
|
+
"""
|
|
219
|
+
ops = model_info.get("ops", [])
|
|
220
|
+
tensors = model_info.get("tensors", {})
|
|
221
|
+
|
|
222
|
+
# Track which tensors are live at each op
|
|
223
|
+
tensor_sizes = {}
|
|
224
|
+
for idx, spec in tensors.items():
|
|
225
|
+
shape = spec.get("shape", [])
|
|
226
|
+
size = 1
|
|
227
|
+
for dim in shape:
|
|
228
|
+
size *= dim
|
|
229
|
+
# Assume int8 = 1 byte per element
|
|
230
|
+
tensor_sizes[idx] = size
|
|
231
|
+
|
|
232
|
+
# Simulate execution to find peak memory usage
|
|
233
|
+
live_tensors = set()
|
|
234
|
+
peak = 0
|
|
235
|
+
|
|
236
|
+
for op in ops:
|
|
237
|
+
# Input tensors become live before the op
|
|
238
|
+
for idx in op.get("input_indices", []):
|
|
239
|
+
live_tensors.add(idx)
|
|
240
|
+
|
|
241
|
+
# Output tensors become live after the op
|
|
242
|
+
for idx in op.get("output_indices", []):
|
|
243
|
+
live_tensors.add(idx)
|
|
244
|
+
|
|
245
|
+
# Calculate current memory usage
|
|
246
|
+
current = 0
|
|
247
|
+
for idx in live_tensors:
|
|
248
|
+
current += tensor_sizes.get(idx, 0)
|
|
249
|
+
|
|
250
|
+
peak = max(peak, current)
|
|
251
|
+
|
|
252
|
+
# Tensors that are no longer needed can be freed
|
|
253
|
+
# For simplicity, we assume only the last output is kept
|
|
254
|
+
# More precise analysis would require liveness tracking
|
|
255
|
+
|
|
256
|
+
return peak
|
|
257
|
+
|
|
258
|
+
|
|
259
|
+
def calculate_flash(model_info):
|
|
260
|
+
# Estimated Flash usage = Params + Code size (approximate)
|
|
261
|
+
params = calculate_params(model_info)
|
|
262
|
+
# Code size is roughly 1KB, but can be calculated more precisely if needed
|
|
263
|
+
return params + 1024
|
|
264
|
+
|
|
265
|
+
|
|
266
|
+
def flatten_weights(weights: Dict[int, np.ndarray]) -> Dict[int, List[int]]:
|
|
267
|
+
"""
|
|
268
|
+
Flatten all weight tensors into lists.
|
|
269
|
+
|
|
270
|
+
Args:
|
|
271
|
+
weights: Dictionary of weight tensors (numpy arrays).
|
|
272
|
+
|
|
273
|
+
Returns:
|
|
274
|
+
Dictionary of flattened weight lists.
|
|
275
|
+
"""
|
|
276
|
+
result = {}
|
|
277
|
+
for idx, weight in weights.items():
|
|
278
|
+
if hasattr(weight, "flatten"):
|
|
279
|
+
result[idx] = weight.flatten().tolist()
|
|
280
|
+
elif isinstance(weight, list):
|
|
281
|
+
result[idx] = weight
|
|
282
|
+
else:
|
|
283
|
+
result[idx] = [weight]
|
|
284
|
+
return result
|
|
285
|
+
|
|
286
|
+
|
|
287
|
+
def generate_random_weights_from_structure(
|
|
288
|
+
structure: Dict[str, Any],
|
|
289
|
+
seed: Optional[int] = None,
|
|
290
|
+
) -> Dict[int, np.ndarray]:
|
|
291
|
+
"""
|
|
292
|
+
Generate random weights for a network structure.
|
|
293
|
+
|
|
294
|
+
Args:
|
|
295
|
+
structure: Structure dict with 'layers', 'input_shape', 'output_shape'
|
|
296
|
+
seed: Random seed for reproducibility
|
|
297
|
+
|
|
298
|
+
Returns:
|
|
299
|
+
Dict mapping tensor index to weight array
|
|
300
|
+
"""
|
|
301
|
+
if seed is not None:
|
|
302
|
+
np.random.seed(seed)
|
|
303
|
+
|
|
304
|
+
weights = {}
|
|
305
|
+
layers = structure.get("layers", [])
|
|
306
|
+
input_shape = structure.get("input_shape", [1, 28, 28, 1])
|
|
307
|
+
|
|
308
|
+
next_idx = 1
|
|
309
|
+
current_shape = list(input_shape)
|
|
310
|
+
current_channels = input_shape[-1] if len(input_shape) >= 2 else 1
|
|
311
|
+
|
|
312
|
+
for layer in layers:
|
|
313
|
+
layer_type = layer.get("type")
|
|
314
|
+
|
|
315
|
+
if layer_type == "conv":
|
|
316
|
+
kernel = layer.get("kernel", 3)
|
|
317
|
+
channels_out = layer.get("channels", 16)
|
|
318
|
+
|
|
319
|
+
# Weight: [K, K, C_in, C_out]
|
|
320
|
+
weight_shape = [kernel, kernel, current_channels, channels_out]
|
|
321
|
+
weight = np.random.uniform(-0.5, 0.5, size=weight_shape)
|
|
322
|
+
weight = np.clip(np.round(weight * 256), -128, 127).astype(np.int8)
|
|
323
|
+
weights[next_idx] = weight
|
|
324
|
+
|
|
325
|
+
# Bias: [C_out]
|
|
326
|
+
bias_shape = [channels_out]
|
|
327
|
+
bias = np.random.uniform(-0.5, 0.5, size=bias_shape)
|
|
328
|
+
bias = np.clip(np.round(bias * 256), -128, 127).astype(np.int32)
|
|
329
|
+
weights[next_idx + 1] = bias
|
|
330
|
+
|
|
331
|
+
next_idx += 2
|
|
332
|
+
current_channels = channels_out
|
|
333
|
+
|
|
334
|
+
elif layer_type == "fc":
|
|
335
|
+
units = layer.get("units", 64)
|
|
336
|
+
|
|
337
|
+
# Flatten current shape to get input size
|
|
338
|
+
input_size = 1
|
|
339
|
+
for d in current_shape:
|
|
340
|
+
input_size *= d
|
|
341
|
+
|
|
342
|
+
# Weight: [input_size, units]
|
|
343
|
+
weight_shape = [input_size, units]
|
|
344
|
+
weight = np.random.uniform(-0.5, 0.5, size=weight_shape)
|
|
345
|
+
weight = np.clip(np.round(weight * 256), -128, 127).astype(np.int8)
|
|
346
|
+
weights[next_idx] = weight
|
|
347
|
+
|
|
348
|
+
# Bias: [units]
|
|
349
|
+
bias_shape = [units]
|
|
350
|
+
bias = np.random.uniform(-0.5, 0.5, size=bias_shape)
|
|
351
|
+
bias = np.clip(np.round(bias * 256), -128, 127).astype(np.int32)
|
|
352
|
+
weights[next_idx + 1] = bias
|
|
353
|
+
|
|
354
|
+
next_idx += 2
|
|
355
|
+
|
|
356
|
+
elif layer_type == "pool":
|
|
357
|
+
# Pool layers have no weights
|
|
358
|
+
pass
|
|
359
|
+
|
|
360
|
+
elif layer_type == "detection_head":
|
|
361
|
+
# Detection head is a placeholder, no weights for now
|
|
362
|
+
pass
|
|
363
|
+
|
|
364
|
+
return weights
|
|
365
|
+
|
|
366
|
+
|
|
367
|
+
def _random_int8_weight(shape: List[int]) -> np.ndarray:
|
|
368
|
+
"""
|
|
369
|
+
Generate random int8 weight tensor.
|
|
370
|
+
|
|
371
|
+
Values are uniformly distributed in [-128, 127] range.
|
|
372
|
+
|
|
373
|
+
Args:
|
|
374
|
+
shape: Shape of the weight tensor.
|
|
375
|
+
|
|
376
|
+
Returns:
|
|
377
|
+
int8 numpy array.
|
|
378
|
+
"""
|
|
379
|
+
# Generate random values in [-128, 127] range
|
|
380
|
+
# Use uniform distribution centered at 0
|
|
381
|
+
weight = np.random.uniform(-0.5, 0.5, size=shape)
|
|
382
|
+
# Scale to int8 range: [-128, 127]
|
|
383
|
+
# 0.5 * 256 = 128, so this maps [-0.5, 0.5] to [-128, 127]
|
|
384
|
+
weight = np.round(weight * 256).astype(np.int8)
|
|
385
|
+
return weight
|
|
386
|
+
|
|
387
|
+
|
|
388
|
+
def _random_int32_bias(shape: List[int]) -> np.ndarray:
|
|
389
|
+
"""
|
|
390
|
+
Generate random int32 bias tensor.
|
|
391
|
+
|
|
392
|
+
Values are uniformly distributed in [-128, 127] range.
|
|
393
|
+
|
|
394
|
+
Args:
|
|
395
|
+
shape: Shape of the bias tensor.
|
|
396
|
+
|
|
397
|
+
Returns:
|
|
398
|
+
int32 numpy array.
|
|
399
|
+
"""
|
|
400
|
+
# Use same range as weights for consistency
|
|
401
|
+
bias = np.random.uniform(-0.5, 0.5, size=shape)
|
|
402
|
+
bias = np.round(bias * 256).astype(np.int32)
|
|
403
|
+
return bias
|
|
404
|
+
|
|
405
|
+
|
|
406
|
+
def fill_model_info_with_weights(
|
|
407
|
+
model_info: Dict[str, Any],
|
|
408
|
+
weights: Dict[int, np.ndarray],
|
|
409
|
+
) -> Dict[str, Any]:
|
|
410
|
+
"""
|
|
411
|
+
Fill a model_info dictionary with weight data.
|
|
412
|
+
"""
|
|
413
|
+
result = copy.deepcopy(model_info)
|
|
414
|
+
result["weights"] = {}
|
|
415
|
+
for idx, weight in weights.items():
|
|
416
|
+
if isinstance(weight, np.ndarray):
|
|
417
|
+
result["weights"][str(idx)] = weight.tolist()
|
|
418
|
+
else:
|
|
419
|
+
result["weights"][str(idx)] = weight
|
|
420
|
+
return result
|
TinyMLC/__init__.py
ADDED
|
File without changes
|