xttmp 2.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- xttmp/__init__.py +1 -0
- xttmp/api/__init__.py +5 -0
- xttmp/api/evaluate.py +163 -0
- xttmp/api/get_visualize_handle.py +29 -0
- xttmp/api/instancing_model.py +35 -0
- xttmp/core/__init__.py +0 -0
- xttmp/core/apgstmd_core.py +188 -0
- xttmp/core/apgstmdv2_core.py +79 -0
- xttmp/core/base_core.py +36 -0
- xttmp/core/dstmd_core.py +213 -0
- xttmp/core/estmd_backbone.py +110 -0
- xttmp/core/estmd_core.py +356 -0
- xttmp/core/feedbackstmd_core.py +61 -0
- xttmp/core/fracstmd_core.py +98 -0
- xttmp/core/fstmd_core.py +15 -0
- xttmp/core/fstmdv2_core.py +42 -0
- xttmp/core/haarstmd_core.py +140 -0
- xttmp/core/math_operator.py +307 -0
- xttmp/core/stfeedbackstmd_core.py +233 -0
- xttmp/core/stmdplus_core.py +187 -0
- xttmp/core/stmdplusv2_core.py +82 -0
- xttmp/core/vstmd_core.py +420 -0
- xttmp/demo/evaluate_model.py +92 -0
- xttmp/demo/inference_gui.py +148 -0
- xttmp/demo/inference_gui_single_process.py +134 -0
- xttmp/demo/inference_image_stream.py +67 -0
- xttmp/demo/inference_video.py +66 -0
- xttmp/main.py +14 -0
- xttmp/model/__init__.py +13 -0
- xttmp/model/backbone.py +514 -0
- xttmp/model/facilitated_model.py +230 -0
- xttmp/model/feedback_model.py +271 -0
- xttmp/model/haarstmd.py +61 -0
- xttmp/model/vstmd.py +457 -0
- xttmp/util/__init__.py +0 -0
- xttmp/util/compute_module.py +402 -0
- xttmp/util/create_kernel.py +363 -0
- xttmp/util/evaluate_module.py +697 -0
- xttmp/util/iostream.py +660 -0
- xttmp-2.3.0.dist-info/METADATA +85 -0
- xttmp-2.3.0.dist-info/RECORD +45 -0
- xttmp-2.3.0.dist-info/WHEEL +5 -0
- xttmp-2.3.0.dist-info/entry_points.txt +2 -0
- xttmp-2.3.0.dist-info/licenses/LICENSE +201 -0
- xttmp-2.3.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,402 @@
|
|
|
1
|
+
from copy import deepcopy
|
|
2
|
+
|
|
3
|
+
import cv2
|
|
4
|
+
import numpy as np
|
|
5
|
+
import torch
|
|
6
|
+
import torch.nn.functional as F
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def compute_temporal_conv(iptCell, kernel, pointer=None):
|
|
10
|
+
"""
|
|
11
|
+
Computes temporal convolution.
|
|
12
|
+
|
|
13
|
+
Parameters:
|
|
14
|
+
- iptCell: A list of arrays where each element has the same dimension.
|
|
15
|
+
- kernel: A vector representing the convolution kernel.
|
|
16
|
+
- headPointer: Head pointer of the input cell array (optional).
|
|
17
|
+
|
|
18
|
+
Returns:
|
|
19
|
+
- optMatrix: The result of the temporal convolution.
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
# Default value for headPointer
|
|
23
|
+
if pointer is None:
|
|
24
|
+
pointer = len(iptCell) - 1
|
|
25
|
+
|
|
26
|
+
# Initialize output matrix
|
|
27
|
+
if iptCell[pointer] is None:
|
|
28
|
+
return None
|
|
29
|
+
|
|
30
|
+
# Ensure kernel is a vector
|
|
31
|
+
kernel = np.squeeze(kernel)
|
|
32
|
+
if not np.ndim(kernel) == 1:
|
|
33
|
+
raise ValueError('The kernel must be a vector.')
|
|
34
|
+
|
|
35
|
+
# Determine the lengths of input cell array and kernel
|
|
36
|
+
k1 = len(iptCell)
|
|
37
|
+
k2 = len(kernel)
|
|
38
|
+
length = min(k1, k2)
|
|
39
|
+
|
|
40
|
+
if isinstance(iptCell[pointer], np.ndarray):
|
|
41
|
+
optMatrix = np.zeros_like(iptCell[pointer])
|
|
42
|
+
elif isinstance(iptCell[pointer], torch.Tensor):
|
|
43
|
+
optMatrix = torch.zeros_like(iptCell[pointer])
|
|
44
|
+
# Perform temporal convolution
|
|
45
|
+
for t in range(length):
|
|
46
|
+
j = (pointer - t) % k1
|
|
47
|
+
if abs(kernel[t]) > 1e-16 and iptCell[j] is not None:
|
|
48
|
+
optMatrix += iptCell[j] * kernel[t]
|
|
49
|
+
|
|
50
|
+
return optMatrix
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def compute_circularlist_conv(circularCell, temporalKernel):
|
|
54
|
+
"""
|
|
55
|
+
Compute the convolution of a circular cell with a temporal kernel.
|
|
56
|
+
|
|
57
|
+
Args:
|
|
58
|
+
- circularCell: The circular cell data.
|
|
59
|
+
- temporalKernel: The temporal kernel data.
|
|
60
|
+
|
|
61
|
+
Returns:
|
|
62
|
+
- opt_matrix: The result of the convolution.
|
|
63
|
+
"""
|
|
64
|
+
optMatrix = compute_temporal_conv(circularCell,
|
|
65
|
+
temporalKernel,
|
|
66
|
+
circularCell.pointer )
|
|
67
|
+
return optMatrix
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def compute_response(ipt):
|
|
71
|
+
"""
|
|
72
|
+
Computes the maximum response from multiple inputs.
|
|
73
|
+
|
|
74
|
+
Parameters:
|
|
75
|
+
- ipt: List containing input data.
|
|
76
|
+
|
|
77
|
+
Returns:
|
|
78
|
+
- response: Maximum response computed from the inputs.
|
|
79
|
+
"""
|
|
80
|
+
return torch.amax(ipt, dim=1, keepdim=True)
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def compute_direction(ipt):
|
|
84
|
+
"""
|
|
85
|
+
Compute the dominant direction given a set of directional responses
|
|
86
|
+
|
|
87
|
+
Parameters:
|
|
88
|
+
- ipt: List containing directional responses.
|
|
89
|
+
|
|
90
|
+
Returns:
|
|
91
|
+
- direction_opt: Dominant direction computed from the responses.
|
|
92
|
+
"""
|
|
93
|
+
|
|
94
|
+
B, C, H, W = ipt.shape # C = 8 (numDirection)
|
|
95
|
+
device = ipt.device # 获取输入信号所在的设备
|
|
96
|
+
|
|
97
|
+
# 1. 预计算每个通道对应的单位向量角度 (theta)
|
|
98
|
+
# angles = [0, 1/8*2pi, 2/8*2pi, ...]
|
|
99
|
+
angles = torch.linspace(0, 2 * torch.pi, steps=C+1, device=device)[:-1]
|
|
100
|
+
|
|
101
|
+
# 2. 计算对应的 Cos 和 Sin 权重基准
|
|
102
|
+
# 形状为 [8],调整为 [1, 8, 1, 1] 以便进行广播乘法
|
|
103
|
+
cos_weight = torch.cos(angles).view(1, C, 1, 1)
|
|
104
|
+
sin_weight = torch.sin(angles).view(1, C, 1, 1)
|
|
105
|
+
|
|
106
|
+
# 3. 计算加权和 (替代原代码中的 for 循环)
|
|
107
|
+
# ipt * cos_weight 形状仍为 [1, 8, H, W]
|
|
108
|
+
# 对 dim=1 (通道维) 求和,得到 [1, H, W]
|
|
109
|
+
output_cos = torch.sum(ipt * cos_weight, dim=1)
|
|
110
|
+
output_sin = torch.sum(ipt * sin_weight, dim=1)
|
|
111
|
+
|
|
112
|
+
# 4. 使用 atan2 计算合成方向
|
|
113
|
+
# 结果范围是 (-pi, pi]
|
|
114
|
+
direction_opt = torch.atan2(output_sin, output_cos)
|
|
115
|
+
|
|
116
|
+
# 5. 调整范围到 [0, 2*pi]
|
|
117
|
+
direction_opt = torch.where(direction_opt < 0, direction_opt + 2 * torch.pi, direction_opt)
|
|
118
|
+
|
|
119
|
+
# 6. 处理无效像素 (Sin 和 Cos 同时接近 0 的地方)
|
|
120
|
+
# 只有当两个分量都非常小时才设为 NaN
|
|
121
|
+
mask = (output_sin == 0) & (output_cos == 0)
|
|
122
|
+
direction_opt[mask] = float('nan')
|
|
123
|
+
direction_opt = direction_opt.unsqueeze(0) # 去掉批次维度
|
|
124
|
+
|
|
125
|
+
return direction_opt
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
def slice_matrix_holding_size(iptMatrix, shiftX, shiftY):
|
|
129
|
+
"""
|
|
130
|
+
Slice the input matrix while maintaining its size by circular shifting.
|
|
131
|
+
|
|
132
|
+
Parameters:
|
|
133
|
+
- input_mat: Input matrix.
|
|
134
|
+
- shiftX: Shift value along the x-axis.
|
|
135
|
+
- shiftY: Shift value along the y-axis.
|
|
136
|
+
|
|
137
|
+
Returns:
|
|
138
|
+
- Opt: Sliced matrix holding the original size.
|
|
139
|
+
"""
|
|
140
|
+
# Round shift values to integers
|
|
141
|
+
shiftX = round(shiftX)
|
|
142
|
+
shiftY = round(shiftY)
|
|
143
|
+
|
|
144
|
+
# Get the size of the input matrix
|
|
145
|
+
m, n = iptMatrix.shape
|
|
146
|
+
|
|
147
|
+
# If the shifts exceed the matrix dimensions, return a matrix of zeros
|
|
148
|
+
if abs(shiftX) >= n or abs(shiftY) >= m:
|
|
149
|
+
return np.zeros((m, n))
|
|
150
|
+
|
|
151
|
+
# Perform circular shift on the input matrix
|
|
152
|
+
Opt = np.roll(iptMatrix, (shiftX, shiftY), axis=(1, 0))
|
|
153
|
+
|
|
154
|
+
# Set the sliced regions to zero based on the shift direction
|
|
155
|
+
if shiftX > 0:
|
|
156
|
+
Opt[:, :shiftX] = 0
|
|
157
|
+
else:
|
|
158
|
+
Opt[:, shiftX:] = 0
|
|
159
|
+
|
|
160
|
+
if shiftY > 0:
|
|
161
|
+
Opt[:shiftY, :] = 0
|
|
162
|
+
else:
|
|
163
|
+
Opt[shiftY:, :] = 0
|
|
164
|
+
|
|
165
|
+
return Opt
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
def matrix_to_sparse_list(matrix):
|
|
169
|
+
"""
|
|
170
|
+
Convert a matrix to a list of non-zero elements in the format [row, col, value].
|
|
171
|
+
|
|
172
|
+
Parameters:
|
|
173
|
+
- matrix (numpy.ndarray): The input matrix to be converted.
|
|
174
|
+
|
|
175
|
+
Returns:
|
|
176
|
+
- list: A list of non-zero elements in the format [row, col, value].
|
|
177
|
+
"""
|
|
178
|
+
|
|
179
|
+
import numpy as np
|
|
180
|
+
|
|
181
|
+
# Ensure the input is a NumPy array
|
|
182
|
+
matrix = np.array(matrix)
|
|
183
|
+
|
|
184
|
+
# Get the indices and values of non-zero elements
|
|
185
|
+
rows, cols = np.nonzero(matrix)
|
|
186
|
+
values = matrix[rows, cols]
|
|
187
|
+
|
|
188
|
+
# Convert numpy types to Python types
|
|
189
|
+
rows = rows.astype(int).tolist()
|
|
190
|
+
cols = cols.astype(int).tolist()
|
|
191
|
+
values = values.astype(float).tolist()
|
|
192
|
+
|
|
193
|
+
# Combine rows, cols, and values into a list of tuples
|
|
194
|
+
sparseList = [[x, y, v] for y, x, v in zip(rows, cols, values)]
|
|
195
|
+
|
|
196
|
+
return sparseList
|
|
197
|
+
|
|
198
|
+
|
|
199
|
+
class AreaNMS:
|
|
200
|
+
"""
|
|
201
|
+
Area-based Non-Maximum Suppression (NMS).
|
|
202
|
+
Suppresses non-local maximum values using dilation (CPU) or max pooling (GPU).
|
|
203
|
+
"""
|
|
204
|
+
|
|
205
|
+
def __init__(self, radio=8):
|
|
206
|
+
"""
|
|
207
|
+
Args:
|
|
208
|
+
radio (int): Neighborhood radius. Window size = 2 * radio + 1.
|
|
209
|
+
device (str): Computing device ('cpu' or 'cuda').
|
|
210
|
+
"""
|
|
211
|
+
self.radio = radio
|
|
212
|
+
self.ksize = self.radio * 2 + 1
|
|
213
|
+
|
|
214
|
+
def __call__(self, matrix):
|
|
215
|
+
"""
|
|
216
|
+
Apply NMS to the input matrix.
|
|
217
|
+
|
|
218
|
+
Args:
|
|
219
|
+
matrix (torch.Tensor): Input heatmap or score map.
|
|
220
|
+
- If CUDA: Expected shape (B, C, H, W) as torch tensor.
|
|
221
|
+
|
|
222
|
+
Returns:
|
|
223
|
+
nms_matrix: Matrix where non-maximum pixels are set to zero.
|
|
224
|
+
"""
|
|
225
|
+
# Find local maxima via 2D max pooling
|
|
226
|
+
local_max = F.max_pool2d(
|
|
227
|
+
matrix,
|
|
228
|
+
kernel_size=self.ksize,
|
|
229
|
+
stride=1,
|
|
230
|
+
padding=self.radio
|
|
231
|
+
)
|
|
232
|
+
|
|
233
|
+
return matrix * (matrix == local_max)
|
|
234
|
+
|
|
235
|
+
|
|
236
|
+
def get_top_k_torch(response_tensor, direction_tensor, k=1000):
|
|
237
|
+
"""
|
|
238
|
+
输入:
|
|
239
|
+
response_tensor: (..., H, W) 任意维度的 Tensor
|
|
240
|
+
direction_tensor: (..., H, W) 形状需与 response 匹配 (可选)
|
|
241
|
+
输出:
|
|
242
|
+
torch.Tensor: shape=(M, 4), dtype=float32, 其中 M <= k
|
|
243
|
+
格式: [[x, y, response, direction], ...]
|
|
244
|
+
"""
|
|
245
|
+
# 1. 获取维度
|
|
246
|
+
H, W = response_tensor.shape[-2:]
|
|
247
|
+
k = min(k, H * W)
|
|
248
|
+
|
|
249
|
+
# 2. 展平 (Flatten)
|
|
250
|
+
# view(-1) 零拷贝,极快
|
|
251
|
+
flat_response = response_tensor.view(-1)
|
|
252
|
+
|
|
253
|
+
# 3. TopK (GPU 上极速排序)
|
|
254
|
+
top_vals, top_indices = torch.topk(flat_response, k=k)
|
|
255
|
+
|
|
256
|
+
# 4. 过滤掉 <= 0 的值 ---
|
|
257
|
+
# 创建掩码:只保留大于 0 的值
|
|
258
|
+
mask = top_vals > 0
|
|
259
|
+
|
|
260
|
+
# 如果全都是 0,直接返回空数组,避免后续报错
|
|
261
|
+
if not mask.any():
|
|
262
|
+
return torch.empty((0, 4))
|
|
263
|
+
|
|
264
|
+
# 应用掩码,缩减 tensor 长度
|
|
265
|
+
top_vals = top_vals[mask]
|
|
266
|
+
top_indices = top_indices[mask]
|
|
267
|
+
# ------------------------------------
|
|
268
|
+
|
|
269
|
+
# 5. 计算坐标 (x, y)
|
|
270
|
+
# 此时计算量已经减少,只计算非零点
|
|
271
|
+
top_y = top_indices.div(W, rounding_mode='floor').float()
|
|
272
|
+
top_x = (top_indices % W).float()
|
|
273
|
+
|
|
274
|
+
# 6. 获取 Direction
|
|
275
|
+
if direction_tensor is not None and direction_tensor.numel() > 0:
|
|
276
|
+
flat_direction = direction_tensor.view(-1)
|
|
277
|
+
# 注意:这里使用过滤后的 top_indices
|
|
278
|
+
top_dirs = flat_direction[top_indices]
|
|
279
|
+
else:
|
|
280
|
+
top_dirs = torch.empty_like(top_vals).fill_(float('nan'))
|
|
281
|
+
|
|
282
|
+
# 7. 堆叠 (Stack) -> (M, 4)
|
|
283
|
+
result_tensor = torch.stack([top_x, top_y, top_vals, top_dirs], dim=1)
|
|
284
|
+
|
|
285
|
+
return result_tensor
|
|
286
|
+
|
|
287
|
+
|
|
288
|
+
def get_top_k_numpy(response_array, direction_array=None, k=1000):
|
|
289
|
+
"""
|
|
290
|
+
输入:
|
|
291
|
+
response_array: (..., H, W) numpy.ndarray
|
|
292
|
+
direction_array: (..., H, W) (可选)
|
|
293
|
+
输出:
|
|
294
|
+
numpy.ndarray: shape=(M, 4), dtype=float32, 其中 M <= k
|
|
295
|
+
格式: [[x, y, response, direction], ...]
|
|
296
|
+
"""
|
|
297
|
+
# 1. 获取维度
|
|
298
|
+
shape = response_array.shape
|
|
299
|
+
H, W = shape[-2:]
|
|
300
|
+
|
|
301
|
+
# 零拷贝展平
|
|
302
|
+
flat_response = response_array.ravel()
|
|
303
|
+
k = min(k, flat_response.size)
|
|
304
|
+
|
|
305
|
+
# 2. TopK 核心优化 (O(N))
|
|
306
|
+
# argpartition 找出最大的 k 个 (无序)
|
|
307
|
+
unsorted_top_indices = np.argpartition(flat_response, -k)[-k:]
|
|
308
|
+
unsorted_top_vals = flat_response[unsorted_top_indices]
|
|
309
|
+
|
|
310
|
+
# 3. 局部排序 (O(k log k))
|
|
311
|
+
# argsort 默认升序,[::-1] 翻转为降序
|
|
312
|
+
sort_idx = np.argsort(unsorted_top_vals)[::-1]
|
|
313
|
+
|
|
314
|
+
# 获取排序后的 Top K 索引和值
|
|
315
|
+
top_indices = unsorted_top_indices[sort_idx]
|
|
316
|
+
top_vals = unsorted_top_vals[sort_idx]
|
|
317
|
+
|
|
318
|
+
# --- [关键修改] 4. 过滤掉 <= 0 的值 ---
|
|
319
|
+
# 创建掩码
|
|
320
|
+
mask = top_vals > 0
|
|
321
|
+
|
|
322
|
+
# 极速判断:如果没有有效值,直接返回空数组
|
|
323
|
+
# np.any() 很快
|
|
324
|
+
if not np.any(mask):
|
|
325
|
+
return np.empty((0, 4), dtype=np.float32)
|
|
326
|
+
|
|
327
|
+
# 应用掩码 (切片操作,只保留有效值)
|
|
328
|
+
# 因为 k 通常不大 (比如 1000),这里的拷贝开销可忽略不计
|
|
329
|
+
top_vals = top_vals[mask]
|
|
330
|
+
top_indices = top_indices[mask]
|
|
331
|
+
|
|
332
|
+
# 更新实际数量 M
|
|
333
|
+
M = top_vals.size
|
|
334
|
+
# ------------------------------------
|
|
335
|
+
|
|
336
|
+
# 5. 计算坐标 (x, y)
|
|
337
|
+
# 只对过滤后的索引计算,节省算力
|
|
338
|
+
top_y, top_x = np.unravel_index(top_indices, (H, W))
|
|
339
|
+
|
|
340
|
+
# 6. 获取 Direction
|
|
341
|
+
if direction_array is not None and direction_array.size > 0:
|
|
342
|
+
flat_direction = direction_array.ravel()
|
|
343
|
+
top_dirs = flat_direction[top_indices]
|
|
344
|
+
else:
|
|
345
|
+
top_dirs = np.full(M, np.nan, dtype=np.float32)
|
|
346
|
+
|
|
347
|
+
# 7. 堆叠结果
|
|
348
|
+
# 分配恰好大小为 M 的内存
|
|
349
|
+
result = np.empty((M, 4), dtype=np.float32)
|
|
350
|
+
result[:, 0] = top_x # x
|
|
351
|
+
result[:, 1] = top_y # y
|
|
352
|
+
result[:, 2] = top_vals # response
|
|
353
|
+
result[:, 3] = top_dirs # direction
|
|
354
|
+
|
|
355
|
+
return result
|
|
356
|
+
|
|
357
|
+
|
|
358
|
+
class PostProcessing:
|
|
359
|
+
"""
|
|
360
|
+
Post-processing class to apply AreaNMS, get top K, and return list format.
|
|
361
|
+
"""
|
|
362
|
+
|
|
363
|
+
def __init__(self, device='cpu', nms_radio = 8, get_top_num=1000):
|
|
364
|
+
"""
|
|
365
|
+
Args:
|
|
366
|
+
device (str): Computing device ('cpu' or 'cuda').
|
|
367
|
+
"""
|
|
368
|
+
self.device = device
|
|
369
|
+
self.area_nms = AreaNMS(radio=nms_radio)
|
|
370
|
+
self.get_top_num = get_top_num
|
|
371
|
+
|
|
372
|
+
def __call__(self, response, direction=None):
|
|
373
|
+
if self.get_top_num == 1:
|
|
374
|
+
idx = torch.argmax(response)
|
|
375
|
+
y, x = divmod(idx.item(), response.shape[-1])
|
|
376
|
+
response_value = response[0, 0, y, x].item()
|
|
377
|
+
direction_value = direction[0, 0, y, x].item() if direction is not None else float('nan')
|
|
378
|
+
return torch.tensor([[x, y, response_value, direction_value]])
|
|
379
|
+
else:
|
|
380
|
+
return self.process(response, direction)
|
|
381
|
+
|
|
382
|
+
def process(self, response, direction=None):
|
|
383
|
+
"""
|
|
384
|
+
Apply AreaNMS to the 'response' key in the result dictionary.
|
|
385
|
+
|
|
386
|
+
Args:
|
|
387
|
+
result (dict): Dictionary containing the results with a 'response' key.
|
|
388
|
+
Returns: result (dict): Updated dictionary with NMS applied to 'response'.
|
|
389
|
+
"""
|
|
390
|
+
nms_response = self.area_nms(response)
|
|
391
|
+
|
|
392
|
+
res = get_top_k_torch(nms_response,
|
|
393
|
+
direction,
|
|
394
|
+
k=self.get_top_num)
|
|
395
|
+
if res.shape[0] == 0:
|
|
396
|
+
res = torch.empty((0, 4), device=response.device)
|
|
397
|
+
else:
|
|
398
|
+
max_score = deepcopy(res[0, 2])
|
|
399
|
+
if max_score > 0:
|
|
400
|
+
res[:, 2] /= max_score
|
|
401
|
+
|
|
402
|
+
return res
|