sparsepixels 0.2.1__tar.gz → 0.2.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {sparsepixels-0.2.1/sparsepixels.egg-info → sparsepixels-0.2.3}/PKG-INFO +43 -10
- {sparsepixels-0.2.1 → sparsepixels-0.2.3}/README.md +41 -8
- {sparsepixels-0.2.1 → sparsepixels-0.2.3}/notebook/utils.py +98 -0
- {sparsepixels-0.2.1 → sparsepixels-0.2.3}/setup.cfg +2 -2
- {sparsepixels-0.2.1 → sparsepixels-0.2.3}/sparsepixels/layers.py +20 -2
- {sparsepixels-0.2.1 → sparsepixels-0.2.3/sparsepixels.egg-info}/PKG-INFO +43 -10
- {sparsepixels-0.2.1 → sparsepixels-0.2.3}/sparsepixels.egg-info/requires.txt +1 -1
- {sparsepixels-0.2.1 → sparsepixels-0.2.3}/sparsepixels.egg-info/top_level.txt +0 -2
- {sparsepixels-0.2.1 → sparsepixels-0.2.3}/tests/test_model.py +12 -10
- {sparsepixels-0.2.1 → sparsepixels-0.2.3}/LICENSE +0 -0
- {sparsepixels-0.2.1 → sparsepixels-0.2.3}/pyproject.toml +0 -0
- {sparsepixels-0.2.1 → sparsepixels-0.2.3}/setup.py +0 -0
- {sparsepixels-0.2.1 → sparsepixels-0.2.3}/sparsepixels/__init__.py +0 -0
- {sparsepixels-0.2.1 → sparsepixels-0.2.3}/sparsepixels/img/logo.png +0 -0
- {sparsepixels-0.2.1 → sparsepixels-0.2.3}/sparsepixels.egg-info/SOURCES.txt +0 -0
- {sparsepixels-0.2.1 → sparsepixels-0.2.3}/sparsepixels.egg-info/dependency_links.txt +0 -0
- {sparsepixels-0.2.1 → sparsepixels-0.2.3}/tests/__init__.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: sparsepixels
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.3
|
|
4
4
|
Summary: Efficient convolution for sparse data on FPGAs
|
|
5
5
|
Home-page: https://github.com/hftsoi/sparse-pixels
|
|
6
6
|
Author: Ho Fung Tsoi
|
|
@@ -13,7 +13,7 @@ Description-Content-Type: text/markdown
|
|
|
13
13
|
License-File: LICENSE
|
|
14
14
|
Requires-Dist: tensorflow
|
|
15
15
|
Requires-Dist: keras>=3.0
|
|
16
|
-
Requires-Dist: HGQ2
|
|
16
|
+
Requires-Dist: HGQ2>=0.1.8
|
|
17
17
|
Dynamic: license-file
|
|
18
18
|
|
|
19
19
|
<p align="center">
|
|
@@ -50,40 +50,73 @@ Import sparse layers and quantization library (HGQ2):
|
|
|
50
50
|
|
|
51
51
|
```python
|
|
52
52
|
import keras
|
|
53
|
-
from keras.layers import Flatten, Activation
|
|
53
|
+
from keras.layers import Flatten, Activation
|
|
54
54
|
from hgq.layers import QConv2D, QDense
|
|
55
55
|
from hgq.config import QuantizerConfigScope, LayerConfigScope
|
|
56
|
+
from hgq.quantizer.config import QuantizerConfig
|
|
56
57
|
from sparsepixels.layers import InputReduce, QConv2DSparse, AveragePooling2DSparse
|
|
57
58
|
```
|
|
58
59
|
|
|
59
|
-
Build an example sparse CNN within HGQ2 quantization scopes
|
|
60
|
+
Build an example sparse CNN within HGQ2 quantization scopes. A custom input quantizer
|
|
61
|
+
config with higher initial fractional bits (`f0=8`) is used to prevent the default (`f0=2`)
|
|
62
|
+
from zeroing out sparse signals in early training epochs:
|
|
60
63
|
|
|
61
64
|
```python
|
|
65
|
+
iq_conf = QuantizerConfig(place='datalane', q_type='kif', i0=4, f0=8, overflow_mode='WRAP')
|
|
66
|
+
|
|
62
67
|
with (
|
|
63
68
|
QuantizerConfigScope(place='all', default_q_type='kbi', overflow_mode='SAT_SYM'),
|
|
64
69
|
QuantizerConfigScope(place='datalane', default_q_type='kif', overflow_mode='WRAP'),
|
|
65
|
-
LayerConfigScope(enable_ebops=
|
|
70
|
+
LayerConfigScope(enable_ebops=True, enable_iq=True, beta0=1e-5),
|
|
66
71
|
):
|
|
67
|
-
x_in = keras.Input(shape=(
|
|
72
|
+
x_in = keras.Input(shape=(28, 28, 1), name='x_in')
|
|
68
73
|
|
|
69
74
|
# Sparse input reduction: retain up to n_max_pixels active pixels
|
|
70
75
|
x, keep_mask = InputReduce(n_max_pixels=20, threshold=0.1, name='input_reduce')(x_in)
|
|
71
76
|
|
|
72
77
|
# Sparse convolution
|
|
73
|
-
x = QConv2DSparse(filters=3, kernel_size=3, name='conv1', padding='same', strides=1
|
|
74
|
-
|
|
78
|
+
x = QConv2DSparse(filters=3, kernel_size=3, name='conv1', padding='same', strides=1,
|
|
79
|
+
activation='relu', iq_conf=iq_conf)([x, keep_mask])
|
|
75
80
|
|
|
76
81
|
# Sparse pooling
|
|
77
82
|
x, keep_mask = AveragePooling2DSparse(2, name='pool1')([x, keep_mask])
|
|
78
83
|
|
|
79
84
|
x = Flatten(name='flatten')(x)
|
|
80
|
-
x = QDense(10, name='dense1', activation='relu')(x)
|
|
85
|
+
x = QDense(10, name='dense1', activation='relu', iq_conf=iq_conf)(x)
|
|
81
86
|
x = Activation('softmax', name='softmax')(x)
|
|
82
87
|
|
|
83
88
|
model = keras.Model(x_in, x)
|
|
84
89
|
```
|
|
85
90
|
|
|
86
|
-
|
|
91
|
+
## Converting a trained model to HLS with hls4ml
|
|
92
|
+
|
|
93
|
+
> **Note:** A [PR](https://github.com/fastmachinelearning/hls4ml/pull/1468) adding `sparsepixels` support to the official [hls4ml](https://github.com/fastmachinelearning/hls4ml) repo has been submitted but is not yet merged. In the meantime you can install hls4ml from the PR branch on this fork to try the converter:
|
|
94
|
+
>
|
|
95
|
+
> ```bash
|
|
96
|
+
> pip install "git+https://github.com/hftsoi/hls4ml.git@sparsepixels"
|
|
97
|
+
> ```
|
|
98
|
+
|
|
99
|
+
Once installed, converting a trained sparsepixels model to HLS is as usual:
|
|
100
|
+
|
|
101
|
+
```python
|
|
102
|
+
import hls4ml
|
|
103
|
+
|
|
104
|
+
hls_config = hls4ml.utils.config_from_keras_model(model, granularity='name')
|
|
105
|
+
hls_config.setdefault('Model', {})['PipelineStyle'] = 'dataflow' # use "#pragma HLS DATAFLOW" (instead of the default "#pragma HLS PIPELINE" for io_parallel)
|
|
106
|
+
|
|
107
|
+
hls_model = hls4ml.converters.convert_from_keras_model(
|
|
108
|
+
model,
|
|
109
|
+
hls_config=hls_config,
|
|
110
|
+
output_dir='hls_proj/my_sparse_cnn',
|
|
111
|
+
backend='Vitis',
|
|
112
|
+
io_type='io_parallel', # io_stream is not supported yet
|
|
113
|
+
)
|
|
114
|
+
hls_model.write()
|
|
115
|
+
hls_model.compile()
|
|
116
|
+
y_hls = hls_model.predict(x_test)
|
|
117
|
+
```
|
|
118
|
+
|
|
119
|
+
> **Note:** The converter currently supports only fully parallelized `io_parallel` HLS. We are working on expanding to partial parallelization and `io_stream` for larger flexibility.
|
|
87
120
|
|
|
88
121
|
## Documentation
|
|
89
122
|
|
|
@@ -32,40 +32,73 @@ Import sparse layers and quantization library (HGQ2):
|
|
|
32
32
|
|
|
33
33
|
```python
|
|
34
34
|
import keras
|
|
35
|
-
from keras.layers import Flatten, Activation
|
|
35
|
+
from keras.layers import Flatten, Activation
|
|
36
36
|
from hgq.layers import QConv2D, QDense
|
|
37
37
|
from hgq.config import QuantizerConfigScope, LayerConfigScope
|
|
38
|
+
from hgq.quantizer.config import QuantizerConfig
|
|
38
39
|
from sparsepixels.layers import InputReduce, QConv2DSparse, AveragePooling2DSparse
|
|
39
40
|
```
|
|
40
41
|
|
|
41
|
-
Build an example sparse CNN within HGQ2 quantization scopes
|
|
42
|
+
Build an example sparse CNN within HGQ2 quantization scopes. A custom input quantizer
|
|
43
|
+
config with higher initial fractional bits (`f0=8`) is used to prevent the default (`f0=2`)
|
|
44
|
+
from zeroing out sparse signals in early training epochs:
|
|
42
45
|
|
|
43
46
|
```python
|
|
47
|
+
iq_conf = QuantizerConfig(place='datalane', q_type='kif', i0=4, f0=8, overflow_mode='WRAP')
|
|
48
|
+
|
|
44
49
|
with (
|
|
45
50
|
QuantizerConfigScope(place='all', default_q_type='kbi', overflow_mode='SAT_SYM'),
|
|
46
51
|
QuantizerConfigScope(place='datalane', default_q_type='kif', overflow_mode='WRAP'),
|
|
47
|
-
LayerConfigScope(enable_ebops=
|
|
52
|
+
LayerConfigScope(enable_ebops=True, enable_iq=True, beta0=1e-5),
|
|
48
53
|
):
|
|
49
|
-
x_in = keras.Input(shape=(
|
|
54
|
+
x_in = keras.Input(shape=(28, 28, 1), name='x_in')
|
|
50
55
|
|
|
51
56
|
# Sparse input reduction: retain up to n_max_pixels active pixels
|
|
52
57
|
x, keep_mask = InputReduce(n_max_pixels=20, threshold=0.1, name='input_reduce')(x_in)
|
|
53
58
|
|
|
54
59
|
# Sparse convolution
|
|
55
|
-
x = QConv2DSparse(filters=3, kernel_size=3, name='conv1', padding='same', strides=1
|
|
56
|
-
|
|
60
|
+
x = QConv2DSparse(filters=3, kernel_size=3, name='conv1', padding='same', strides=1,
|
|
61
|
+
activation='relu', iq_conf=iq_conf)([x, keep_mask])
|
|
57
62
|
|
|
58
63
|
# Sparse pooling
|
|
59
64
|
x, keep_mask = AveragePooling2DSparse(2, name='pool1')([x, keep_mask])
|
|
60
65
|
|
|
61
66
|
x = Flatten(name='flatten')(x)
|
|
62
|
-
x = QDense(10, name='dense1', activation='relu')(x)
|
|
67
|
+
x = QDense(10, name='dense1', activation='relu', iq_conf=iq_conf)(x)
|
|
63
68
|
x = Activation('softmax', name='softmax')(x)
|
|
64
69
|
|
|
65
70
|
model = keras.Model(x_in, x)
|
|
66
71
|
```
|
|
67
72
|
|
|
68
|
-
|
|
73
|
+
## Converting a trained model to HLS with hls4ml
|
|
74
|
+
|
|
75
|
+
> **Note:** A [PR](https://github.com/fastmachinelearning/hls4ml/pull/1468) adding `sparsepixels` support to the official [hls4ml](https://github.com/fastmachinelearning/hls4ml) repo has been submitted but is not yet merged. In the meantime you can install hls4ml from the PR branch on this fork to try the converter:
|
|
76
|
+
>
|
|
77
|
+
> ```bash
|
|
78
|
+
> pip install "git+https://github.com/hftsoi/hls4ml.git@sparsepixels"
|
|
79
|
+
> ```
|
|
80
|
+
|
|
81
|
+
Once installed, converting a trained sparsepixels model to HLS is as usual:
|
|
82
|
+
|
|
83
|
+
```python
|
|
84
|
+
import hls4ml
|
|
85
|
+
|
|
86
|
+
hls_config = hls4ml.utils.config_from_keras_model(model, granularity='name')
|
|
87
|
+
hls_config.setdefault('Model', {})['PipelineStyle'] = 'dataflow' # use "#pragma HLS DATAFLOW" (instead of the default "#pragma HLS PIPELINE" for io_parallel)
|
|
88
|
+
|
|
89
|
+
hls_model = hls4ml.converters.convert_from_keras_model(
|
|
90
|
+
model,
|
|
91
|
+
hls_config=hls_config,
|
|
92
|
+
output_dir='hls_proj/my_sparse_cnn',
|
|
93
|
+
backend='Vitis',
|
|
94
|
+
io_type='io_parallel', # io_stream is not supported yet
|
|
95
|
+
)
|
|
96
|
+
hls_model.write()
|
|
97
|
+
hls_model.compile()
|
|
98
|
+
y_hls = hls_model.predict(x_test)
|
|
99
|
+
```
|
|
100
|
+
|
|
101
|
+
> **Note:** The converter currently supports only fully parallelized `io_parallel` HLS. We are working on expanding to partial parallelization and `io_stream` for larger flexibility.
|
|
69
102
|
|
|
70
103
|
## Documentation
|
|
71
104
|
|
|
@@ -1149,3 +1149,101 @@ def preview_patches_with_pooling(
|
|
|
1149
1149
|
plt.show()
|
|
1150
1150
|
|
|
1151
1151
|
|
|
1152
|
+
def _fmt_bits(bits_tensor):
|
|
1153
|
+
b = np.array(bits_tensor).flatten()
|
|
1154
|
+
if b.size == 1:
|
|
1155
|
+
v = f"{b[0]:.1f}"
|
|
1156
|
+
return v, v, v
|
|
1157
|
+
return f"{np.mean(b):.1f}", f"{np.min(b):.1f}", f"{np.max(b):.1f}"
|
|
1158
|
+
|
|
1159
|
+
def _get_layer_info(layer):
|
|
1160
|
+
if hasattr(layer, 'conv') and hasattr(layer.conv, '_kq'):
|
|
1161
|
+
core = layer.conv
|
|
1162
|
+
kernel = core.kernel
|
|
1163
|
+
bias = getattr(layer, 'sparse_bias', None)
|
|
1164
|
+
bq = getattr(layer, '_bq', None)
|
|
1165
|
+
elif hasattr(layer, '_kq'):
|
|
1166
|
+
core = layer
|
|
1167
|
+
kernel = core.kernel
|
|
1168
|
+
bias = core.bias if core.use_bias else None
|
|
1169
|
+
bq = getattr(core, '_bq', None)
|
|
1170
|
+
else:
|
|
1171
|
+
return None
|
|
1172
|
+
kq = getattr(core, '_kq', None)
|
|
1173
|
+
iq = getattr(core, '_iq', None)
|
|
1174
|
+
ebops = float(core._ebops) if getattr(core, '_ebops', None) is not None else None
|
|
1175
|
+
return dict(name=layer.name, n_kernel=int(np.prod(kernel.shape)),
|
|
1176
|
+
n_bias=int(np.prod(bias.shape)) if bias is not None else 0,
|
|
1177
|
+
kq=kq, bq=bq, iq=iq, ebops=ebops)
|
|
1178
|
+
|
|
1179
|
+
def print_quantization(model):
|
|
1180
|
+
print(f"\nModel: {model.name}")
|
|
1181
|
+
h = (f"{'Layer':<12} {'#Kernel':>8} {'#Bias':>6}"
|
|
1182
|
+
f" {'K mean':>6} {'min':>5} {'max':>5}"
|
|
1183
|
+
f" {'B mean':>6} {'min':>5} {'max':>5}"
|
|
1184
|
+
f" {'I mean':>6} {'min':>5} {'max':>5}"
|
|
1185
|
+
f" {'eBOPs':>8}")
|
|
1186
|
+
print(h)
|
|
1187
|
+
print("-" * len(h))
|
|
1188
|
+
total_ebops = 0
|
|
1189
|
+
for layer in model.layers:
|
|
1190
|
+
info = _get_layer_info(layer)
|
|
1191
|
+
if info is None:
|
|
1192
|
+
continue
|
|
1193
|
+
km, klo, khi = _fmt_bits(info['kq'].bits) if info['kq'] else ("-", "-", "-")
|
|
1194
|
+
bm, blo, bhi = _fmt_bits(info['bq'].bits) if info['bq'] else ("-", "-", "-")
|
|
1195
|
+
im, ilo, ihi = _fmt_bits(info['iq'].bits) if info['iq'] else ("-", "-", "-")
|
|
1196
|
+
if info['ebops'] is not None:
|
|
1197
|
+
total_ebops += info['ebops']
|
|
1198
|
+
es = f"{info['ebops']:.0f}"
|
|
1199
|
+
else:
|
|
1200
|
+
es = "-"
|
|
1201
|
+
print(f"{info['name']:<12} {info['n_kernel']:>8} {info['n_bias']:>6}"
|
|
1202
|
+
f" {km:>6} {klo:>5} {khi:>5}"
|
|
1203
|
+
f" {bm:>6} {blo:>5} {bhi:>5}"
|
|
1204
|
+
f" {im:>6} {ilo:>5} {ihi:>5}"
|
|
1205
|
+
f" {es:>8}")
|
|
1206
|
+
print("-" * len(h))
|
|
1207
|
+
print(f"{'Total eBOPs':>{len(h)-8}}{total_ebops:>8.0f}")
|
|
1208
|
+
|
|
1209
|
+
|
|
1210
|
+
def plot_quantization(models, figsize=(14, 3)):
|
|
1211
|
+
categories = [
|
|
1212
|
+
('kq', 'Kernel bits'),
|
|
1213
|
+
('bq', 'Bias bits'),
|
|
1214
|
+
('iq', 'Input bits'),
|
|
1215
|
+
]
|
|
1216
|
+
fig, axes = plt.subplots(len(models), len(categories),
|
|
1217
|
+
figsize=(figsize[0], figsize[1] * len(models)),
|
|
1218
|
+
squeeze=False, constrained_layout=True)
|
|
1219
|
+
for row, model in enumerate(models):
|
|
1220
|
+
infos = [(info['name'], info) for layer in model.layers
|
|
1221
|
+
if (info := _get_layer_info(layer)) is not None]
|
|
1222
|
+
colors = plt.cm.tab10(np.linspace(0, 1, max(len(infos), 1)))
|
|
1223
|
+
for col, (key, title) in enumerate(categories):
|
|
1224
|
+
ax = axes[row][col]
|
|
1225
|
+
names, data = [], []
|
|
1226
|
+
for (name, info), color in zip(infos, colors):
|
|
1227
|
+
q = info[key]
|
|
1228
|
+
if q is None:
|
|
1229
|
+
continue
|
|
1230
|
+
names.append(name)
|
|
1231
|
+
data.append(np.array(q.bits).flatten())
|
|
1232
|
+
if not data:
|
|
1233
|
+
ax.text(0.5, 0.5, 'N/A', transform=ax.transAxes,
|
|
1234
|
+
ha='center', va='center', fontsize=14, color='gray')
|
|
1235
|
+
ax.set_title(f'{model.name} — {title}')
|
|
1236
|
+
continue
|
|
1237
|
+
parts = ax.violinplot(data, positions=range(len(data)), vert=False,
|
|
1238
|
+
showmedians=True, showextrema=False)
|
|
1239
|
+
used_colors = [c for (_, info), c in zip(infos, colors) if info[key] is not None]
|
|
1240
|
+
for body, c in zip(parts['bodies'], used_colors):
|
|
1241
|
+
body.set_facecolor(c)
|
|
1242
|
+
body.set_alpha(0.7)
|
|
1243
|
+
parts['cmedians'].set_color('black')
|
|
1244
|
+
ax.set_yticks(range(len(names)))
|
|
1245
|
+
ax.set_yticklabels(names)
|
|
1246
|
+
ax.set_xlabel('Bitwidth')
|
|
1247
|
+
ax.set_xlim(-0.5, 6.5)
|
|
1248
|
+
ax.set_title(f'{model.name} — {title}')
|
|
1249
|
+
plt.show()
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[metadata]
|
|
2
2
|
name = sparsepixels
|
|
3
|
-
version = 0.2.
|
|
3
|
+
version = 0.2.3
|
|
4
4
|
description = Efficient convolution for sparse data on FPGAs
|
|
5
5
|
author = Ho Fung Tsoi
|
|
6
6
|
author_email = ho.fung.tsoi@cern.ch
|
|
@@ -19,7 +19,7 @@ python_requires = >=3.10
|
|
|
19
19
|
install_requires =
|
|
20
20
|
tensorflow
|
|
21
21
|
keras>=3.0
|
|
22
|
-
HGQ2
|
|
22
|
+
HGQ2>=0.1.8
|
|
23
23
|
include_package_data = True
|
|
24
24
|
|
|
25
25
|
[options.package_data]
|
|
@@ -64,13 +64,21 @@ class QConv2DSparse(keras.layers.Layer):
|
|
|
64
64
|
super().__init__(name=conv_kwargs.get("name", None))
|
|
65
65
|
self._use_bias = conv_kwargs.pop("use_bias", True)
|
|
66
66
|
self._bq_conf = conv_kwargs.pop("bq_conf", None) or QuantizerConfig("default", "bias")
|
|
67
|
+
self._activation = keras.activations.get(conv_kwargs.pop("activation", None))
|
|
67
68
|
|
|
68
69
|
conv_kwargs["use_bias"] = False
|
|
69
|
-
conv_kwargs
|
|
70
|
+
conv_kwargs["activation"] = None
|
|
70
71
|
self.conv = QConv2D(*conv_args, **conv_kwargs)
|
|
71
72
|
self.masker = RemoveDilatedPixels()
|
|
72
73
|
|
|
73
74
|
def build(self, input_shape):
|
|
75
|
+
# Build the wrapped conv here (eagerly, during layer build) instead of lazily inside call().
|
|
76
|
+
# Otherwise, when Keras traces call() symbolically to infer the output shape, the conv would
|
|
77
|
+
# build in graph mode and HGQ2 (>=0.1.9) runs a weight check there that evaluates a tensor as
|
|
78
|
+
# a Python bool -- which is not allowed in graph mode. See also compute_output_shape below.
|
|
79
|
+
x_shape = input_shape[0]
|
|
80
|
+
if not self.conv.built:
|
|
81
|
+
self.conv.build(x_shape)
|
|
74
82
|
if self._use_bias:
|
|
75
83
|
self.sparse_bias = self.add_weight(
|
|
76
84
|
name="sparse_bias",
|
|
@@ -82,6 +90,11 @@ class QConv2DSparse(keras.layers.Layer):
|
|
|
82
90
|
self._bq.build((self.conv.filters,))
|
|
83
91
|
super().build(input_shape)
|
|
84
92
|
|
|
93
|
+
def compute_output_shape(self, input_shape):
|
|
94
|
+
# Provide the output shape directly so Keras does not trace call() symbolically (masking
|
|
95
|
+
# preserves shape, so the output shape is the wrapped conv's output shape).
|
|
96
|
+
return self.conv.compute_output_shape(input_shape[0])
|
|
97
|
+
|
|
85
98
|
def call(self, inputs, **kwargs):
|
|
86
99
|
x, keep_mask = inputs
|
|
87
100
|
x = self.masker((x, keep_mask))
|
|
@@ -93,6 +106,9 @@ class QConv2DSparse(keras.layers.Layer):
|
|
|
93
106
|
non_zero = ops.cast(y != 0, y.dtype)
|
|
94
107
|
y = y + b * non_zero
|
|
95
108
|
|
|
109
|
+
if self._activation is not None:
|
|
110
|
+
y = self._activation(y)
|
|
111
|
+
|
|
96
112
|
y = self.masker((y, keep_mask))
|
|
97
113
|
return y
|
|
98
114
|
|
|
@@ -101,6 +117,7 @@ class QConv2DSparse(keras.layers.Layer):
|
|
|
101
117
|
cfg["conv_config"] = self.conv.get_config()
|
|
102
118
|
cfg["use_bias"] = self._use_bias
|
|
103
119
|
cfg["bq_conf"] = self._bq_conf
|
|
120
|
+
cfg["activation"] = keras.activations.serialize(self._activation)
|
|
104
121
|
return cfg
|
|
105
122
|
|
|
106
123
|
@classmethod
|
|
@@ -108,7 +125,8 @@ class QConv2DSparse(keras.layers.Layer):
|
|
|
108
125
|
conv_cfg = config.pop("conv_config")
|
|
109
126
|
use_bias = config.pop("use_bias", True)
|
|
110
127
|
bq_conf = config.pop("bq_conf", None)
|
|
111
|
-
|
|
128
|
+
activation = config.pop("activation", None)
|
|
129
|
+
return cls(**conv_cfg, use_bias=use_bias, bq_conf=bq_conf, activation=activation)
|
|
112
130
|
|
|
113
131
|
|
|
114
132
|
class AveragePooling2DSparse(keras.layers.Layer):
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: sparsepixels
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.3
|
|
4
4
|
Summary: Efficient convolution for sparse data on FPGAs
|
|
5
5
|
Home-page: https://github.com/hftsoi/sparse-pixels
|
|
6
6
|
Author: Ho Fung Tsoi
|
|
@@ -13,7 +13,7 @@ Description-Content-Type: text/markdown
|
|
|
13
13
|
License-File: LICENSE
|
|
14
14
|
Requires-Dist: tensorflow
|
|
15
15
|
Requires-Dist: keras>=3.0
|
|
16
|
-
Requires-Dist: HGQ2
|
|
16
|
+
Requires-Dist: HGQ2>=0.1.8
|
|
17
17
|
Dynamic: license-file
|
|
18
18
|
|
|
19
19
|
<p align="center">
|
|
@@ -50,40 +50,73 @@ Import sparse layers and quantization library (HGQ2):
|
|
|
50
50
|
|
|
51
51
|
```python
|
|
52
52
|
import keras
|
|
53
|
-
from keras.layers import Flatten, Activation
|
|
53
|
+
from keras.layers import Flatten, Activation
|
|
54
54
|
from hgq.layers import QConv2D, QDense
|
|
55
55
|
from hgq.config import QuantizerConfigScope, LayerConfigScope
|
|
56
|
+
from hgq.quantizer.config import QuantizerConfig
|
|
56
57
|
from sparsepixels.layers import InputReduce, QConv2DSparse, AveragePooling2DSparse
|
|
57
58
|
```
|
|
58
59
|
|
|
59
|
-
Build an example sparse CNN within HGQ2 quantization scopes
|
|
60
|
+
Build an example sparse CNN within HGQ2 quantization scopes. A custom input quantizer
|
|
61
|
+
config with higher initial fractional bits (`f0=8`) is used to prevent the default (`f0=2`)
|
|
62
|
+
from zeroing out sparse signals in early training epochs:
|
|
60
63
|
|
|
61
64
|
```python
|
|
65
|
+
iq_conf = QuantizerConfig(place='datalane', q_type='kif', i0=4, f0=8, overflow_mode='WRAP')
|
|
66
|
+
|
|
62
67
|
with (
|
|
63
68
|
QuantizerConfigScope(place='all', default_q_type='kbi', overflow_mode='SAT_SYM'),
|
|
64
69
|
QuantizerConfigScope(place='datalane', default_q_type='kif', overflow_mode='WRAP'),
|
|
65
|
-
LayerConfigScope(enable_ebops=
|
|
70
|
+
LayerConfigScope(enable_ebops=True, enable_iq=True, beta0=1e-5),
|
|
66
71
|
):
|
|
67
|
-
x_in = keras.Input(shape=(
|
|
72
|
+
x_in = keras.Input(shape=(28, 28, 1), name='x_in')
|
|
68
73
|
|
|
69
74
|
# Sparse input reduction: retain up to n_max_pixels active pixels
|
|
70
75
|
x, keep_mask = InputReduce(n_max_pixels=20, threshold=0.1, name='input_reduce')(x_in)
|
|
71
76
|
|
|
72
77
|
# Sparse convolution
|
|
73
|
-
x = QConv2DSparse(filters=3, kernel_size=3, name='conv1', padding='same', strides=1
|
|
74
|
-
|
|
78
|
+
x = QConv2DSparse(filters=3, kernel_size=3, name='conv1', padding='same', strides=1,
|
|
79
|
+
activation='relu', iq_conf=iq_conf)([x, keep_mask])
|
|
75
80
|
|
|
76
81
|
# Sparse pooling
|
|
77
82
|
x, keep_mask = AveragePooling2DSparse(2, name='pool1')([x, keep_mask])
|
|
78
83
|
|
|
79
84
|
x = Flatten(name='flatten')(x)
|
|
80
|
-
x = QDense(10, name='dense1', activation='relu')(x)
|
|
85
|
+
x = QDense(10, name='dense1', activation='relu', iq_conf=iq_conf)(x)
|
|
81
86
|
x = Activation('softmax', name='softmax')(x)
|
|
82
87
|
|
|
83
88
|
model = keras.Model(x_in, x)
|
|
84
89
|
```
|
|
85
90
|
|
|
86
|
-
|
|
91
|
+
## Converting a trained model to HLS with hls4ml
|
|
92
|
+
|
|
93
|
+
> **Note:** A [PR](https://github.com/fastmachinelearning/hls4ml/pull/1468) adding `sparsepixels` support to the official [hls4ml](https://github.com/fastmachinelearning/hls4ml) repo has been submitted but is not yet merged. In the meantime you can install hls4ml from the PR branch on this fork to try the converter:
|
|
94
|
+
>
|
|
95
|
+
> ```bash
|
|
96
|
+
> pip install "git+https://github.com/hftsoi/hls4ml.git@sparsepixels"
|
|
97
|
+
> ```
|
|
98
|
+
|
|
99
|
+
Once installed, converting a trained sparsepixels model to HLS is as usual:
|
|
100
|
+
|
|
101
|
+
```python
|
|
102
|
+
import hls4ml
|
|
103
|
+
|
|
104
|
+
hls_config = hls4ml.utils.config_from_keras_model(model, granularity='name')
|
|
105
|
+
hls_config.setdefault('Model', {})['PipelineStyle'] = 'dataflow' # use "#pragma HLS DATAFLOW" (instead of the default "#pragma HLS PIPELINE" for io_parallel)
|
|
106
|
+
|
|
107
|
+
hls_model = hls4ml.converters.convert_from_keras_model(
|
|
108
|
+
model,
|
|
109
|
+
hls_config=hls_config,
|
|
110
|
+
output_dir='hls_proj/my_sparse_cnn',
|
|
111
|
+
backend='Vitis',
|
|
112
|
+
io_type='io_parallel', # io_stream is not supported yet
|
|
113
|
+
)
|
|
114
|
+
hls_model.write()
|
|
115
|
+
hls_model.compile()
|
|
116
|
+
y_hls = hls_model.predict(x_test)
|
|
117
|
+
```
|
|
118
|
+
|
|
119
|
+
> **Note:** The converter currently supports only fully parallelized `io_parallel` HLS. We are working on expanding to partial parallelization and `io_stream` for larger flexibility.
|
|
87
120
|
|
|
88
121
|
## Documentation
|
|
89
122
|
|
|
@@ -1,15 +1,17 @@
|
|
|
1
1
|
import keras
|
|
2
|
-
from keras.layers import Flatten, Activation, AveragePooling2D
|
|
2
|
+
from keras.layers import Flatten, Activation, AveragePooling2D
|
|
3
3
|
from hgq.layers import QConv2D, QDense
|
|
4
4
|
from hgq.config import QuantizerConfigScope, LayerConfigScope
|
|
5
|
+
from hgq.quantizer.config import QuantizerConfig
|
|
5
6
|
from sparsepixels.layers import InputReduce, QConv2DSparse, AveragePooling2DSparse
|
|
6
7
|
|
|
7
8
|
|
|
8
9
|
def build_cnn(is_sparse, n_max_pixels=None):
|
|
10
|
+
iq_conf = QuantizerConfig(place='datalane', q_type='kif', i0=4, f0=8, overflow_mode='WRAP')
|
|
9
11
|
with (
|
|
10
12
|
QuantizerConfigScope(place='all', default_q_type='kbi', overflow_mode='SAT_SYM'),
|
|
11
13
|
QuantizerConfigScope(place='datalane', default_q_type='kif', overflow_mode='WRAP'),
|
|
12
|
-
LayerConfigScope(enable_ebops=
|
|
14
|
+
LayerConfigScope(enable_ebops=True, enable_iq=True, beta0=1e-5),
|
|
13
15
|
):
|
|
14
16
|
x_in = keras.Input(shape=(32, 32, 1), name='x_in')
|
|
15
17
|
if is_sparse:
|
|
@@ -18,27 +20,27 @@ def build_cnn(is_sparse, n_max_pixels=None):
|
|
|
18
20
|
x = x_in
|
|
19
21
|
|
|
20
22
|
if is_sparse:
|
|
21
|
-
x = QConv2DSparse(filters=1, kernel_size=7, name='conv1', padding='same', strides=1
|
|
22
|
-
|
|
23
|
+
x = QConv2DSparse(filters=1, kernel_size=7, name='conv1', padding='same', strides=1,
|
|
24
|
+
activation='relu', iq_conf=iq_conf)([x, keep_mask])
|
|
23
25
|
x, keep_mask = AveragePooling2DSparse(4, name='pool1')([x, keep_mask])
|
|
24
26
|
|
|
25
|
-
x = QConv2DSparse(filters=3, kernel_size=5, name='conv2', padding='same', strides=1
|
|
26
|
-
|
|
27
|
+
x = QConv2DSparse(filters=3, kernel_size=5, name='conv2', padding='same', strides=1,
|
|
28
|
+
activation='relu', iq_conf=iq_conf)([x, keep_mask])
|
|
27
29
|
x, keep_mask = AveragePooling2DSparse(2, name='pool2')([x, keep_mask])
|
|
28
30
|
else:
|
|
29
31
|
x = QConv2D(filters=1, kernel_size=7, name='conv1', padding='same', strides=1,
|
|
30
|
-
activation='relu')(x)
|
|
32
|
+
activation='relu', iq_conf=iq_conf)(x)
|
|
31
33
|
x = AveragePooling2D(4, name='pool1')(x)
|
|
32
34
|
|
|
33
35
|
x = QConv2D(filters=3, kernel_size=5, name='conv2', padding='same', strides=1,
|
|
34
|
-
activation='relu')(x)
|
|
36
|
+
activation='relu', iq_conf=iq_conf)(x)
|
|
35
37
|
x = AveragePooling2D(2, name='pool2')(x)
|
|
36
38
|
|
|
37
39
|
x = Flatten(name='flatten')(x)
|
|
38
40
|
|
|
39
|
-
x = QDense(36, name='dense1', activation='relu')(x)
|
|
41
|
+
x = QDense(36, name='dense1', activation='relu', iq_conf=iq_conf)(x)
|
|
40
42
|
|
|
41
|
-
x = QDense(10, name='dense2')(x)
|
|
43
|
+
x = QDense(10, name='dense2', iq_conf=iq_conf)(x)
|
|
42
44
|
x = Activation('softmax', name='softmax')(x)
|
|
43
45
|
|
|
44
46
|
return keras.Model(x_in, x)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|