ultralytics-thop 0.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- thop/__init__.py +8 -0
- thop/__version__.py +1 -0
- thop/fx_profile.py +224 -0
- thop/onnx_profile.py +76 -0
- thop/profile.py +233 -0
- thop/rnn_hooks.py +195 -0
- thop/utils.py +56 -0
- thop/vision/__init__.py +0 -0
- thop/vision/basic_hooks.py +146 -0
- thop/vision/calc_func.py +118 -0
- thop/vision/efficientnet.py +9 -0
- thop/vision/onnx_counter.py +351 -0
- ultralytics_thop-0.0.1.dist-info/LICENSE +661 -0
- ultralytics_thop-0.0.1.dist-info/METADATA +846 -0
- ultralytics_thop-0.0.1.dist-info/RECORD +17 -0
- ultralytics_thop-0.0.1.dist-info/WHEEL +5 -0
- ultralytics_thop-0.0.1.dist-info/top_level.txt +1 -0
thop/rnn_hooks.py
ADDED
|
@@ -0,0 +1,195 @@
|
|
|
1
|
+
import torch
|
|
2
|
+
import torch.nn as nn
|
|
3
|
+
from torch.nn.utils.rnn import PackedSequence
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def _count_rnn_cell(input_size, hidden_size, bias=True):
|
|
7
|
+
# h' = \tanh(W_{ih} x + b_{ih} + W_{hh} h + b_{hh})
|
|
8
|
+
total_ops = hidden_size * (input_size + hidden_size) + hidden_size
|
|
9
|
+
if bias:
|
|
10
|
+
total_ops += hidden_size * 2
|
|
11
|
+
|
|
12
|
+
return total_ops
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def count_rnn_cell(m: nn.RNNCell, x: torch.Tensor, y: torch.Tensor):
|
|
16
|
+
total_ops = _count_rnn_cell(m.input_size, m.hidden_size, m.bias)
|
|
17
|
+
|
|
18
|
+
batch_size = x[0].size(0)
|
|
19
|
+
total_ops *= batch_size
|
|
20
|
+
|
|
21
|
+
m.total_ops += torch.DoubleTensor([int(total_ops)])
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def _count_gru_cell(input_size, hidden_size, bias=True):
|
|
25
|
+
total_ops = 0
|
|
26
|
+
# r = \sigma(W_{ir} x + b_{ir} + W_{hr} h + b_{hr}) \\
|
|
27
|
+
# z = \sigma(W_{iz} x + b_{iz} + W_{hz} h + b_{hz}) \\
|
|
28
|
+
state_ops = (hidden_size + input_size) * hidden_size + hidden_size
|
|
29
|
+
if bias:
|
|
30
|
+
state_ops += hidden_size * 2
|
|
31
|
+
total_ops += state_ops * 2
|
|
32
|
+
|
|
33
|
+
# n = \tanh(W_{in} x + b_{in} + r * (W_{hn} h + b_{hn})) \\
|
|
34
|
+
total_ops += (hidden_size + input_size) * hidden_size + hidden_size
|
|
35
|
+
if bias:
|
|
36
|
+
total_ops += hidden_size * 2
|
|
37
|
+
# r hadamard : r * (~)
|
|
38
|
+
total_ops += hidden_size
|
|
39
|
+
|
|
40
|
+
# h' = (1 - z) * n + z * h
|
|
41
|
+
# hadamard hadamard add
|
|
42
|
+
total_ops += hidden_size * 3
|
|
43
|
+
|
|
44
|
+
return total_ops
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def count_gru_cell(m: nn.GRUCell, x: torch.Tensor, y: torch.Tensor):
|
|
48
|
+
total_ops = _count_gru_cell(m.input_size, m.hidden_size, m.bias)
|
|
49
|
+
|
|
50
|
+
batch_size = x[0].size(0)
|
|
51
|
+
total_ops *= batch_size
|
|
52
|
+
|
|
53
|
+
m.total_ops += torch.DoubleTensor([int(total_ops)])
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def _count_lstm_cell(input_size, hidden_size, bias=True):
|
|
57
|
+
total_ops = 0
|
|
58
|
+
|
|
59
|
+
# i = \sigma(W_{ii} x + b_{ii} + W_{hi} h + b_{hi}) \\
|
|
60
|
+
# f = \sigma(W_{if} x + b_{if} + W_{hf} h + b_{hf}) \\
|
|
61
|
+
# o = \sigma(W_{io} x + b_{io} + W_{ho} h + b_{ho}) \\
|
|
62
|
+
# g = \tanh(W_{ig} x + b_{ig} + W_{hg} h + b_{hg}) \\
|
|
63
|
+
state_ops = (input_size + hidden_size) * hidden_size + hidden_size
|
|
64
|
+
if bias:
|
|
65
|
+
state_ops += hidden_size * 2
|
|
66
|
+
total_ops += state_ops * 4
|
|
67
|
+
|
|
68
|
+
# c' = f * c + i * g \\
|
|
69
|
+
# hadamard hadamard add
|
|
70
|
+
total_ops += hidden_size * 3
|
|
71
|
+
|
|
72
|
+
# h' = o * \tanh(c') \\
|
|
73
|
+
total_ops += hidden_size
|
|
74
|
+
|
|
75
|
+
return total_ops
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def count_lstm_cell(m: nn.LSTMCell, x: torch.Tensor, y: torch.Tensor):
|
|
79
|
+
total_ops = _count_lstm_cell(m.input_size, m.hidden_size, m.bias)
|
|
80
|
+
|
|
81
|
+
batch_size = x[0].size(0)
|
|
82
|
+
total_ops *= batch_size
|
|
83
|
+
|
|
84
|
+
m.total_ops += torch.DoubleTensor([int(total_ops)])
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def count_rnn(m: nn.RNN, x, y):
|
|
88
|
+
bias = m.bias
|
|
89
|
+
input_size = m.input_size
|
|
90
|
+
hidden_size = m.hidden_size
|
|
91
|
+
num_layers = m.num_layers
|
|
92
|
+
|
|
93
|
+
if isinstance(x[0], PackedSequence):
|
|
94
|
+
batch_size = torch.max(x[0].batch_sizes)
|
|
95
|
+
num_steps = x[0].batch_sizes.size(0)
|
|
96
|
+
else:
|
|
97
|
+
if m.batch_first:
|
|
98
|
+
batch_size = x[0].size(0)
|
|
99
|
+
num_steps = x[0].size(1)
|
|
100
|
+
else:
|
|
101
|
+
batch_size = x[0].size(1)
|
|
102
|
+
num_steps = x[0].size(0)
|
|
103
|
+
|
|
104
|
+
total_ops = 0
|
|
105
|
+
if m.bidirectional:
|
|
106
|
+
total_ops += _count_rnn_cell(input_size, hidden_size, bias) * 2
|
|
107
|
+
else:
|
|
108
|
+
total_ops += _count_rnn_cell(input_size, hidden_size, bias)
|
|
109
|
+
|
|
110
|
+
for i in range(num_layers - 1):
|
|
111
|
+
if m.bidirectional:
|
|
112
|
+
total_ops += _count_rnn_cell(hidden_size * 2, hidden_size, bias) * 2
|
|
113
|
+
else:
|
|
114
|
+
total_ops += _count_rnn_cell(hidden_size, hidden_size, bias)
|
|
115
|
+
|
|
116
|
+
# time unroll
|
|
117
|
+
total_ops *= num_steps
|
|
118
|
+
# batch_size
|
|
119
|
+
total_ops *= batch_size
|
|
120
|
+
|
|
121
|
+
m.total_ops += torch.DoubleTensor([int(total_ops)])
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
def count_gru(m: nn.GRU, x, y):
|
|
125
|
+
bias = m.bias
|
|
126
|
+
input_size = m.input_size
|
|
127
|
+
hidden_size = m.hidden_size
|
|
128
|
+
num_layers = m.num_layers
|
|
129
|
+
|
|
130
|
+
if isinstance(x[0], PackedSequence):
|
|
131
|
+
batch_size = torch.max(x[0].batch_sizes)
|
|
132
|
+
num_steps = x[0].batch_sizes.size(0)
|
|
133
|
+
else:
|
|
134
|
+
if m.batch_first:
|
|
135
|
+
batch_size = x[0].size(0)
|
|
136
|
+
num_steps = x[0].size(1)
|
|
137
|
+
else:
|
|
138
|
+
batch_size = x[0].size(1)
|
|
139
|
+
num_steps = x[0].size(0)
|
|
140
|
+
|
|
141
|
+
total_ops = 0
|
|
142
|
+
if m.bidirectional:
|
|
143
|
+
total_ops += _count_gru_cell(input_size, hidden_size, bias) * 2
|
|
144
|
+
else:
|
|
145
|
+
total_ops += _count_gru_cell(input_size, hidden_size, bias)
|
|
146
|
+
|
|
147
|
+
for i in range(num_layers - 1):
|
|
148
|
+
if m.bidirectional:
|
|
149
|
+
total_ops += _count_gru_cell(hidden_size * 2, hidden_size, bias) * 2
|
|
150
|
+
else:
|
|
151
|
+
total_ops += _count_gru_cell(hidden_size, hidden_size, bias)
|
|
152
|
+
|
|
153
|
+
# time unroll
|
|
154
|
+
total_ops *= num_steps
|
|
155
|
+
# batch_size
|
|
156
|
+
total_ops *= batch_size
|
|
157
|
+
|
|
158
|
+
m.total_ops += torch.DoubleTensor([int(total_ops)])
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
def count_lstm(m: nn.LSTM, x, y):
|
|
162
|
+
bias = m.bias
|
|
163
|
+
input_size = m.input_size
|
|
164
|
+
hidden_size = m.hidden_size
|
|
165
|
+
num_layers = m.num_layers
|
|
166
|
+
|
|
167
|
+
if isinstance(x[0], PackedSequence):
|
|
168
|
+
batch_size = torch.max(x[0].batch_sizes)
|
|
169
|
+
num_steps = x[0].batch_sizes.size(0)
|
|
170
|
+
else:
|
|
171
|
+
if m.batch_first:
|
|
172
|
+
batch_size = x[0].size(0)
|
|
173
|
+
num_steps = x[0].size(1)
|
|
174
|
+
else:
|
|
175
|
+
batch_size = x[0].size(1)
|
|
176
|
+
num_steps = x[0].size(0)
|
|
177
|
+
|
|
178
|
+
total_ops = 0
|
|
179
|
+
if m.bidirectional:
|
|
180
|
+
total_ops += _count_lstm_cell(input_size, hidden_size, bias) * 2
|
|
181
|
+
else:
|
|
182
|
+
total_ops += _count_lstm_cell(input_size, hidden_size, bias)
|
|
183
|
+
|
|
184
|
+
for i in range(num_layers - 1):
|
|
185
|
+
if m.bidirectional:
|
|
186
|
+
total_ops += _count_lstm_cell(hidden_size * 2, hidden_size, bias) * 2
|
|
187
|
+
else:
|
|
188
|
+
total_ops += _count_lstm_cell(hidden_size, hidden_size, bias)
|
|
189
|
+
|
|
190
|
+
# time unroll
|
|
191
|
+
total_ops *= num_steps
|
|
192
|
+
# batch_size
|
|
193
|
+
total_ops *= batch_size
|
|
194
|
+
|
|
195
|
+
m.total_ops += torch.DoubleTensor([int(total_ops)])
|
thop/utils.py
ADDED
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
from collections.abc import Iterable
|
|
2
|
+
|
|
3
|
+
COLOR_RED = "91m"
|
|
4
|
+
COLOR_GREEN = "92m"
|
|
5
|
+
COLOR_YELLOW = "93m"
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def colorful_print(fn_print, color=COLOR_RED):
|
|
9
|
+
def actual_call(*args, **kwargs):
|
|
10
|
+
print(f"\033[{color}", end="")
|
|
11
|
+
fn_print(*args, **kwargs)
|
|
12
|
+
print("\033[00m", end="")
|
|
13
|
+
|
|
14
|
+
return actual_call
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
prRed = colorful_print(print, color=COLOR_RED)
|
|
18
|
+
prGreen = colorful_print(print, color=COLOR_GREEN)
|
|
19
|
+
prYellow = colorful_print(print, color=COLOR_YELLOW)
|
|
20
|
+
|
|
21
|
+
# def prRed(skk):
|
|
22
|
+
# print("\033[91m{}\033[00m".format(skk))
|
|
23
|
+
|
|
24
|
+
# def prGreen(skk):
|
|
25
|
+
# print("\033[92m{}\033[00m".format(skk))
|
|
26
|
+
|
|
27
|
+
# def prYellow(skk):
|
|
28
|
+
# print("\033[93m{}\033[00m".format(skk))
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def clever_format(nums, format="%.2f"):
|
|
32
|
+
if not isinstance(nums, Iterable):
|
|
33
|
+
nums = [nums]
|
|
34
|
+
clever_nums = []
|
|
35
|
+
|
|
36
|
+
for num in nums:
|
|
37
|
+
if num > 1e12:
|
|
38
|
+
clever_nums.append(format % (num / 1e12) + "T")
|
|
39
|
+
elif num > 1e9:
|
|
40
|
+
clever_nums.append(format % (num / 1e9) + "G")
|
|
41
|
+
elif num > 1e6:
|
|
42
|
+
clever_nums.append(format % (num / 1e6) + "M")
|
|
43
|
+
elif num > 1e3:
|
|
44
|
+
clever_nums.append(format % (num / 1e3) + "K")
|
|
45
|
+
else:
|
|
46
|
+
clever_nums.append(format % num + "B")
|
|
47
|
+
|
|
48
|
+
clever_nums = clever_nums[0] if len(clever_nums) == 1 else (*clever_nums,)
|
|
49
|
+
|
|
50
|
+
return clever_nums
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
if __name__ == "__main__":
|
|
54
|
+
prRed("hello", "world")
|
|
55
|
+
prGreen("hello", "world")
|
|
56
|
+
prYellow("hello", "world")
|
thop/vision/__init__.py
ADDED
|
File without changes
|
|
@@ -0,0 +1,146 @@
|
|
|
1
|
+
import argparse
|
|
2
|
+
import logging
|
|
3
|
+
|
|
4
|
+
import torch
|
|
5
|
+
import torch.nn as nn
|
|
6
|
+
from torch.nn.modules.conv import _ConvNd
|
|
7
|
+
|
|
8
|
+
from .calc_func import *
|
|
9
|
+
|
|
10
|
+
multiply_adds = 1
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def count_parameters(m, x, y):
|
|
14
|
+
total_params = 0
|
|
15
|
+
for p in m.parameters():
|
|
16
|
+
total_params += torch.DoubleTensor([p.numel()])
|
|
17
|
+
m.total_params[0] = calculate_parameters(m.parameters())
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def zero_ops(m, x, y):
|
|
21
|
+
m.total_ops += calculate_zero_ops()
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def count_convNd(m: _ConvNd, x, y: torch.Tensor):
|
|
25
|
+
x = x[0]
|
|
26
|
+
|
|
27
|
+
kernel_ops = torch.zeros(m.weight.size()[2:]).numel() # Kw x Kh
|
|
28
|
+
bias_ops = 1 if m.bias is not None else 0
|
|
29
|
+
|
|
30
|
+
m.total_ops += calculate_conv2d_flops(
|
|
31
|
+
input_size=list(x.shape),
|
|
32
|
+
output_size=list(y.shape),
|
|
33
|
+
kernel_size=list(m.weight.shape),
|
|
34
|
+
groups=m.groups,
|
|
35
|
+
bias=m.bias,
|
|
36
|
+
)
|
|
37
|
+
# N x Cout x H x W x (Cin x Kw x Kh + bias)
|
|
38
|
+
# m.total_ops += calculate_conv(
|
|
39
|
+
# bias_ops,
|
|
40
|
+
# torch.zeros(m.weight.size()[2:]).numel(),
|
|
41
|
+
# y.nelement(),
|
|
42
|
+
# m.in_channels,
|
|
43
|
+
# m.groups,
|
|
44
|
+
# )
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def count_convNd_ver2(m: _ConvNd, x, y: torch.Tensor):
|
|
48
|
+
x = x[0]
|
|
49
|
+
|
|
50
|
+
# N x H x W (exclude Cout)
|
|
51
|
+
output_size = torch.zeros((y.size()[:1] + y.size()[2:])).numel()
|
|
52
|
+
# # Cout x Cin x Kw x Kh
|
|
53
|
+
# kernel_ops = m.weight.nelement()
|
|
54
|
+
# if m.bias is not None:
|
|
55
|
+
# # Cout x 1
|
|
56
|
+
# kernel_ops += + m.bias.nelement()
|
|
57
|
+
# # x N x H x W x Cout x (Cin x Kw x Kh + bias)
|
|
58
|
+
# m.total_ops += torch.DoubleTensor([int(output_size * kernel_ops)])
|
|
59
|
+
m.total_ops += calculate_conv(m.bias.nelement(), m.weight.nelement(), output_size)
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def count_normalization(m: nn.modules.batchnorm._BatchNorm, x, y):
|
|
63
|
+
# TODO: add test cases
|
|
64
|
+
# https://github.com/Lyken17/pytorch-OpCounter/issues/124
|
|
65
|
+
# y = (x - mean) / sqrt(eps + var) * weight + bias
|
|
66
|
+
x = x[0]
|
|
67
|
+
# bn is by default fused in inference
|
|
68
|
+
flops = calculate_norm(x.numel())
|
|
69
|
+
if getattr(m, "affine", False) or getattr(m, "elementwise_affine", False):
|
|
70
|
+
flops *= 2
|
|
71
|
+
m.total_ops += flops
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
# def count_layer_norm(m, x, y):
|
|
75
|
+
# x = x[0]
|
|
76
|
+
# m.total_ops += calculate_norm(x.numel())
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
# def count_instance_norm(m, x, y):
|
|
80
|
+
# x = x[0]
|
|
81
|
+
# m.total_ops += calculate_norm(x.numel())
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def count_prelu(m, x, y):
|
|
85
|
+
x = x[0]
|
|
86
|
+
|
|
87
|
+
nelements = x.numel()
|
|
88
|
+
if not m.training:
|
|
89
|
+
m.total_ops += calculate_relu(nelements)
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
def count_relu(m, x, y):
|
|
93
|
+
x = x[0]
|
|
94
|
+
|
|
95
|
+
nelements = x.numel()
|
|
96
|
+
|
|
97
|
+
m.total_ops += calculate_relu_flops(list(x.shape))
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def count_softmax(m, x, y):
|
|
101
|
+
x = x[0]
|
|
102
|
+
nfeatures = x.size()[m.dim]
|
|
103
|
+
batch_size = x.numel() // nfeatures
|
|
104
|
+
|
|
105
|
+
m.total_ops += calculate_softmax(batch_size, nfeatures)
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
def count_avgpool(m, x, y):
|
|
109
|
+
# total_add = torch.prod(torch.Tensor([m.kernel_size]))
|
|
110
|
+
# total_div = 1
|
|
111
|
+
# kernel_ops = total_add + total_div
|
|
112
|
+
num_elements = y.numel()
|
|
113
|
+
m.total_ops += calculate_avgpool(num_elements)
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
def count_adap_avgpool(m, x, y):
|
|
117
|
+
kernel = torch.div(torch.DoubleTensor([*(x[0].shape[2:])]), torch.DoubleTensor([*(y.shape[2:])]))
|
|
118
|
+
total_add = torch.prod(kernel)
|
|
119
|
+
num_elements = y.numel()
|
|
120
|
+
m.total_ops += calculate_adaptive_avg(total_add, num_elements)
|
|
121
|
+
|
|
122
|
+
|
|
123
|
+
# TODO: verify the accuracy
|
|
124
|
+
def count_upsample(m, x, y):
|
|
125
|
+
if m.mode not in (
|
|
126
|
+
"nearest",
|
|
127
|
+
"linear",
|
|
128
|
+
"bilinear",
|
|
129
|
+
"bicubic",
|
|
130
|
+
): # "trilinear"
|
|
131
|
+
logging.warning("mode %s is not implemented yet, take it a zero op" % m.mode)
|
|
132
|
+
m.total_ops += 0
|
|
133
|
+
else:
|
|
134
|
+
x = x[0]
|
|
135
|
+
m.total_ops += calculate_upsample(m.mode, y.nelement())
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
# nn.Linear
|
|
139
|
+
def count_linear(m, x, y):
|
|
140
|
+
# per output element
|
|
141
|
+
total_mul = m.in_features
|
|
142
|
+
# total_add = m.in_features - 1
|
|
143
|
+
# total_add += 1 if m.bias is not None else 0
|
|
144
|
+
num_elements = y.numel()
|
|
145
|
+
|
|
146
|
+
m.total_ops += calculate_linear(total_mul, num_elements)
|
thop/vision/calc_func.py
ADDED
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
import warnings
|
|
2
|
+
|
|
3
|
+
import numpy as np
|
|
4
|
+
import torch
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def l_prod(in_list):
|
|
8
|
+
res = 1
|
|
9
|
+
for _ in in_list:
|
|
10
|
+
res *= _
|
|
11
|
+
return res
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def l_sum(in_list):
|
|
15
|
+
res = 0
|
|
16
|
+
for _ in in_list:
|
|
17
|
+
res += _
|
|
18
|
+
return res
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def calculate_parameters(param_list):
|
|
22
|
+
total_params = 0
|
|
23
|
+
for p in param_list:
|
|
24
|
+
total_params += torch.DoubleTensor([p.nelement()])
|
|
25
|
+
return total_params
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def calculate_zero_ops():
|
|
29
|
+
return torch.DoubleTensor([int(0)])
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def calculate_conv2d_flops(input_size: list, output_size: list, kernel_size: list, groups: int, bias: bool = False):
|
|
33
|
+
# n, out_c, oh, ow = output_size
|
|
34
|
+
# n, in_c, ih, iw = input_size
|
|
35
|
+
# out_c, in_c, kh, kw = kernel_size
|
|
36
|
+
in_c = input_size[1]
|
|
37
|
+
g = groups
|
|
38
|
+
return l_prod(output_size) * (in_c // g) * l_prod(kernel_size[2:])
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def calculate_conv(bias, kernel_size, output_size, in_channel, group):
|
|
42
|
+
warnings.warn("This API is being deprecated.")
|
|
43
|
+
"""Inputs are all numbers!"""
|
|
44
|
+
return torch.DoubleTensor([output_size * (in_channel / group * kernel_size + bias)])
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def calculate_norm(input_size):
|
|
48
|
+
"""Input is a number not a array or tensor."""
|
|
49
|
+
return torch.DoubleTensor([2 * input_size])
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def calculate_relu_flops(input_size):
|
|
53
|
+
# x[x < 0] = 0
|
|
54
|
+
return 0
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def calculate_relu(input_size: torch.Tensor):
|
|
58
|
+
warnings.warn("This API is being deprecated")
|
|
59
|
+
return torch.DoubleTensor([int(input_size)])
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def calculate_softmax(batch_size, nfeatures):
|
|
63
|
+
total_exp = nfeatures
|
|
64
|
+
total_add = nfeatures - 1
|
|
65
|
+
total_div = nfeatures
|
|
66
|
+
total_ops = batch_size * (total_exp + total_add + total_div)
|
|
67
|
+
return torch.DoubleTensor([int(total_ops)])
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def calculate_avgpool(input_size):
|
|
71
|
+
return torch.DoubleTensor([int(input_size)])
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def calculate_adaptive_avg(kernel_size, output_size):
|
|
75
|
+
total_div = 1
|
|
76
|
+
kernel_op = kernel_size + total_div
|
|
77
|
+
return torch.DoubleTensor([int(kernel_op * output_size)])
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def calculate_upsample(mode: str, output_size):
|
|
81
|
+
total_ops = output_size
|
|
82
|
+
if mode == "linear":
|
|
83
|
+
total_ops *= 5
|
|
84
|
+
elif mode == "bilinear":
|
|
85
|
+
total_ops *= 11
|
|
86
|
+
elif mode == "bicubic":
|
|
87
|
+
ops_solve_A = 224 # 128 muls + 96 adds
|
|
88
|
+
ops_solve_p = 35 # 16 muls + 12 adds + 4 muls + 3 adds
|
|
89
|
+
total_ops *= ops_solve_A + ops_solve_p
|
|
90
|
+
elif mode == "trilinear":
|
|
91
|
+
total_ops *= 13 * 2 + 5
|
|
92
|
+
return torch.DoubleTensor([int(total_ops)])
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
def calculate_linear(in_feature, num_elements):
|
|
96
|
+
return torch.DoubleTensor([int(in_feature * num_elements)])
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
def counter_matmul(input_size, output_size):
|
|
100
|
+
input_size = np.array(input_size)
|
|
101
|
+
output_size = np.array(output_size)
|
|
102
|
+
return np.prod(input_size) * output_size[-1]
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
def counter_mul(input_size):
|
|
106
|
+
return input_size
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
def counter_pow(input_size):
|
|
110
|
+
return input_size
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
def counter_sqrt(input_size):
|
|
114
|
+
return input_size
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
def counter_div(input_size):
|
|
118
|
+
return input_size
|