PyPI - xttmp - Versions diffs - 2.3.0.1__tar.gz → 2.3.0.3__tar.gz - Mend

xttmp 2.3.0.1tar.gz → 2.3.0.3tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (51) hide show

{xttmp-2.3.0.1/src/xttmp.egg-info → xttmp-2.3.0.3}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: xttmp
-Version: 2.3.0.1
+Version: 2.3.0.3
 Summary: eXtremely Tiny Target - Motion Perception
 Author-email: Shawn MX <mingshuoxu@hotmail.com>
 Project-URL: Homepage, https://github.com/MingshuoXu/Small-Target-Motion-Detectors
@@ -16,15 +16,16 @@ Classifier: Programming Language :: Python :: 3.9
 Classifier: Programming Language :: Python :: 3.10
 Classifier: Programming Language :: Python :: 3.11
 Classifier: Programming Language :: Python :: 3.12
-Classifier: Topic :: Scientific/Engineering :: Image Recognition
+Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
 Requires-Python: >=3.8
 Description-Content-Type: text/markdown
 License-File: LICENSE
 Requires-Dist: matplotlib
 Requires-Dist: opencv-python
 Requires-Dist: scipy
-Requires-Dist: torch>=2.5.0
-Requires-Dist: torchvision>=0.20.0
+Provides-Extra: torch
+Requires-Dist: torch>=2.5.0; extra == "torch"
+Requires-Dist: torchvision>=0.20.0; extra == "torch"
 Dynamic: license-file
 # Small Target Motion Detectors, Version 2.3 (XTT-MP: Extremely Tiny Target - Motion Perception)
@@ -66,8 +67,18 @@ Built with modularity and extensibility in mind, XTT-MP provides a robust suite
 - After `pip install xttmp`, use the installed code and bring your own input data, or run from a repository checkout to access the bundled examples.
 ### Via PyPI
+#### CPU
+```bash
+pip install xttmp[torch]
+```
+#### NVIDIA GPU (CUDA 12.6)
+```bash
+pip install torch torchvision --index-url https://download.pytorch.org/whl/cu126
+```
+### Running the GUI Demo
 ```bash
-pip install xttmp
 xttmp_gui
 ```

{xttmp-2.3.0.1 → xttmp-2.3.0.3}/README.md RENAMED Viewed

@@ -37,8 +37,18 @@ Built with modularity and extensibility in mind, XTT-MP provides a robust suite
 - After `pip install xttmp`, use the installed code and bring your own input data, or run from a repository checkout to access the bundled examples.
 ### Via PyPI
+#### CPU
+```bash
+pip install xttmp[torch]
+```
+#### NVIDIA GPU (CUDA 12.6)
+```bash
+pip install torch torchvision --index-url https://download.pytorch.org/whl/cu126
+```
+### Running the GUI Demo
 ```bash
-pip install xttmp
 xttmp_gui
 ```

{xttmp-2.3.0.1 → xttmp-2.3.0.3}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "xttmp"
-version = "2.3.0.1"
+version = "2.3.0.3"
 description = "eXtremely Tiny Target - Motion Perception"
 readme = "README.md"
 requires-python = ">=3.8"
@@ -20,14 +20,12 @@ classifiers = [
     "Programming Language :: Python :: 3.10",
     "Programming Language :: Python :: 3.11",
     "Programming Language :: Python :: 3.12",
-    "Topic :: Scientific/Engineering :: Image Recognition",
+    "Topic :: Scientific/Engineering :: Artificial Intelligence",
 ]
 dependencies = [
     "matplotlib",
     "opencv-python",
     "scipy",
-    "torch>=2.5.0",
-    "torchvision>=0.20.0",
 ]
 [project.urls]
@@ -47,6 +45,12 @@ where = ["src"]
 [tool.setuptools.package-data]
 "xttmp" = ["util/*.ico"]
+[project.optional-dependencies]
+torch = [
+    "torch>=2.5.0",
+    "torchvision>=0.20.0"
+]
 [dependency-groups]
 dev = []

{xttmp-2.3.0.1 → xttmp-2.3.0.3}/src/xttmp/demo/inference_gui.py RENAMED Viewed

@@ -12,19 +12,14 @@ file_path = os.path.realpath(__file__)
 py_pkg_path = os.path.dirname(os.path.dirname(os.path.dirname(file_path)))
 sys.path.append(py_pkg_path)
-try:
-    from xttmp.util.iostream import ( # type: ignore
-                    ModelAndInputSelectorGUI,
-                    FrameIterator,
-                    FrameVisualizer,
-                )
-    from xttmp.util.compute_module import PostProcessing # type: ignore
-    from xttmp.api import ( # type: ignore
-        instancing_model,
-    )
-except ImportError as e:
-    raise ImportError("Failed to import required modules. "
-                      "Ensure that the 'xttmp' package is correctly installed.") from e
+from xttmp.util.iostream import ( # type: ignore
+                XTTMP_GUI,
+                FrameIterator,
+                FrameVisualizer,
+            )
+from xttmp.api import instancing_model
 # configure logging
 logging.basicConfig(level=logging.INFO,
@@ -32,18 +27,15 @@ logging.basicConfig(level=logging.INFO,
 logger = logging.getLogger(__name__)
 class StmdGui:
-    def __init__(self, device='cpu', show_threshold: float = 0.8, get_top_num: int = 1):
+    def __init__(self):
         """ Initialize STMD GUI """
-        self.device = device
-        self.show_threshold = show_threshold
-        self.get_top_num = get_top_num
-        self.ModelAndInputSelectorGUI = ModelAndInputSelectorGUI
+        self.device = None
+        self.ModelAndInputSelectorGUI = XTTMP_GUI
         self.FrameIterator = FrameIterator
         self.FrameVisualizer = FrameVisualizer
-        self.PostProcessing = PostProcessing
+        self.post_processor = None
         self.instancing_model = instancing_model
     def _get_user_input(self) -> tuple:
         """ get user input """
         root = tk.Tk()
@@ -93,21 +85,17 @@ class StmdGui:
                 logger.info("User cancelled input.")
                 return
-            model_name, opt1, opt2, is_stepping = user_input
+            model_name, opt1, opt2, is_stepping, device, post_processor, show_threshold = user_input
+            self.post_processor = post_processor
+            self.device = device
             reader = self._create_frame_reader(opt1, opt2)
-            model = self.instancing_model(model_name, device=self.device)
-            post_processor = self.PostProcessing(
-                device=self.device,
-                nms_radio=8,
-                get_top_num=self.get_top_num,
-            )
+            model = self.instancing_model(model_name, device)
             visualizer = self.FrameVisualizer(
                 window_name=model_name,
-                result_index_type="dots",
                 win_width=reader.img_width,
                 win_height=reader.img_height,
-                conf_threshold=self.show_threshold,
+                conf_threshold=show_threshold,
             )
             if is_stepping:
                 visualizer.paused = True
@@ -117,16 +105,17 @@ class StmdGui:
                 if not is_valid:
                     break
-                if self.device != 'cpu' and torch.cuda.is_available():
+                if self.device == 'cuda':
                     torch.cuda.synchronize()
                 time_start = time.perf_counter()
                 result = model(gray_tensor)
-                if self.device != 'cpu' and torch.cuda.is_available():
+                if self.device == 'cuda':
                     torch.cuda.synchronize()
                 run_time = time.perf_counter() - time_start
-                dots = post_processor(result['response'], result.get('direction'))
-                if not visualizer.update(color_img, result=dots, process_time=run_time):
+                post_res = post_processor(result['response'], result.get('direction'))
+                show_str = f'{self.device} : {run_time*1000:.1f} ms'
+                if not visualizer.update(color_img, result=post_res, show_str=show_str):
                     break
         except Exception as e:
@@ -139,10 +128,7 @@ class StmdGui:
                 reader.release()
             logger.info("Shutdown completed")
-def main(show_threshold: float = 0, get_top_num: int = 10):
-    DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
-    obj = StmdGui(DEVICE, show_threshold = show_threshold, get_top_num = get_top_num)
-    obj.run()
 if __name__ == "__main__":
-    main(get_top_num = 20)
+    obj = StmdGui()
+    obj.run()

{xttmp-2.3.0.1 → xttmp-2.3.0.3}/src/xttmp/main.py RENAMED Viewed

@@ -2,6 +2,13 @@ from pathlib import Path
 import subprocess
 import sys
+try:
+    import torch
+except ImportError:
+    raise ImportError(
+        "Please install PyTorch first. "
+        "See https://pytorch.org/get-started/locally/"
+    )
 def main():
 	script_path = Path(__file__).resolve().parent / 'demo' / 'inference_gui.py'

{xttmp-2.3.0.1 → xttmp-2.3.0.3}/src/xttmp/util/compute_module.py RENAMED Viewed

@@ -6,67 +6,6 @@ import torch
 import torch.nn.functional as F
-def compute_temporal_conv(iptCell, kernel, pointer=None):
-    """
-    Computes temporal convolution.
-    Parameters:
-    - iptCell: A list of arrays where each element has the same dimension.
-    - kernel: A vector representing the convolution kernel.
-    - headPointer: Head pointer of the input cell array (optional).
-    Returns:
-    - optMatrix: The result of the temporal convolution.
-    """
-    # Default value for headPointer
-    if pointer is None:
-        pointer = len(iptCell) - 1
-    # Initialize output matrix
-    if iptCell[pointer] is None:
-        return None
-    # Ensure kernel is a vector
-    kernel = np.squeeze(kernel)
-    if not np.ndim(kernel) == 1:
-        raise ValueError('The kernel must be a vector.')
-    # Determine the lengths of input cell array and kernel
-    k1 = len(iptCell)
-    k2 = len(kernel)
-    length = min(k1, k2)
-    if isinstance(iptCell[pointer], np.ndarray):
-        optMatrix = np.zeros_like(iptCell[pointer])
-    elif isinstance(iptCell[pointer], torch.Tensor):
-        optMatrix = torch.zeros_like(iptCell[pointer])
-    # Perform temporal convolution
-    for t in range(length):
-        j = (pointer - t) % k1
-        if abs(kernel[t]) > 1e-16 and iptCell[j] is not None:
-            optMatrix += iptCell[j] * kernel[t]
-    return optMatrix
-def compute_circularlist_conv(circularCell, temporalKernel):
-    """
-    Compute the convolution of a circular cell with a temporal kernel.
-    Args:
-    - circularCell: The circular cell data.
-    - temporalKernel: The temporal kernel data.
-    Returns:
-    - opt_matrix: The result of the convolution.
-    """
-    optMatrix = compute_temporal_conv(circularCell,
-                                      temporalKernel,
-                                      circularCell.pointer )
-    return optMatrix
 def compute_response(ipt):
     """
     Computes the maximum response from multiple inputs.
@@ -233,139 +172,17 @@ class AreaNMS:
         return matrix * (matrix == local_max)
-def get_top_k_torch(response_tensor, direction_tensor, k=1000):
-    """
-    输入:
-        response_tensor: (..., H, W) 任意维度的 Tensor
-        direction_tensor: (..., H, W) 形状需与 response 匹配 (可选)
-    输出:
-        torch.Tensor: shape=(M, 4), dtype=float32, 其中 M <= k
-        格式: [[x, y, response, direction], ...]
-    """
-    # 1. 获取维度
-    H, W = response_tensor.shape[-2:]
-    k = min(k, H * W)
-    # 2. 展平 (Flatten)
-    # view(-1) 零拷贝，极快
-    flat_response = response_tensor.view(-1)
-    # 3. TopK (GPU 上极速排序)
-    top_vals, top_indices = torch.topk(flat_response, k=k)
-    # 4. 过滤掉 <= 0 的值 ---
-    # 创建掩码：只保留大于 0 的值
-    mask = top_vals > 0
-    # 如果全都是 0，直接返回空数组，避免后续报错
-    if not mask.any():
-        return torch.empty((0, 4))
-    # 应用掩码，缩减 tensor 长度
-    top_vals = top_vals[mask]
-    top_indices = top_indices[mask]
-    # ------------------------------------
-    # 5. 计算坐标 (x, y)
-    # 此时计算量已经减少，只计算非零点
-    top_y = top_indices.div(W, rounding_mode='floor').float()
-    top_x = (top_indices % W).float()
-    # 6. 获取 Direction
-    if direction_tensor is not None and direction_tensor.numel() > 0:
-        flat_direction = direction_tensor.view(-1)
-        # 注意：这里使用过滤后的 top_indices
-        top_dirs = flat_direction[top_indices]
-    else:
-        top_dirs = torch.empty_like(top_vals).fill_(float('nan'))
-    # 7. 堆叠 (Stack) -> (M, 4)
-    result_tensor = torch.stack([top_x, top_y, top_vals, top_dirs], dim=1)
-    return result_tensor
-def get_top_k_numpy(response_array, direction_array=None, k=1000):
-    """
-    输入:
-        response_array: (..., H, W) numpy.ndarray
-        direction_array: (..., H, W) (可选)
-    输出:
-        numpy.ndarray: shape=(M, 4), dtype=float32, 其中 M <= k
-        格式: [[x, y, response, direction], ...]
-    """
-    # 1. 获取维度
-    shape = response_array.shape
-    H, W = shape[-2:]
-    # 零拷贝展平
-    flat_response = response_array.ravel()
-    k = min(k, flat_response.size)
-    # 2. TopK 核心优化 (O(N))
-    # argpartition 找出最大的 k 个 (无序)
-    unsorted_top_indices = np.argpartition(flat_response, -k)[-k:]
-    unsorted_top_vals = flat_response[unsorted_top_indices]
-    # 3. 局部排序 (O(k log k))
-    # argsort 默认升序，[::-1] 翻转为降序
-    sort_idx = np.argsort(unsorted_top_vals)[::-1]
-    # 获取排序后的 Top K 索引和值
-    top_indices = unsorted_top_indices[sort_idx]
-    top_vals = unsorted_top_vals[sort_idx]
-    # --- [关键修改] 4. 过滤掉 <= 0 的值 ---
-    # 创建掩码
-    mask = top_vals > 0
-    # 极速判断：如果没有有效值，直接返回空数组
-    # np.any() 很快
-    if not np.any(mask):
-        return np.empty((0, 4), dtype=np.float32)
-    # 应用掩码 (切片操作，只保留有效值)
-    # 因为 k 通常不大 (比如 1000)，这里的拷贝开销可忽略不计
-    top_vals = top_vals[mask]
-    top_indices = top_indices[mask]
-    # 更新实际数量 M
-    M = top_vals.size
-    # ------------------------------------
-    # 5. 计算坐标 (x, y)
-    # 只对过滤后的索引计算，节省算力
-    top_y, top_x = np.unravel_index(top_indices, (H, W))
-    # 6. 获取 Direction
-    if direction_array is not None and direction_array.size > 0:
-        flat_direction = direction_array.ravel()
-        top_dirs = flat_direction[top_indices]
-    else:
-        top_dirs = np.full(M, np.nan, dtype=np.float32)
-    # 7. 堆叠结果
-    # 分配恰好大小为 M 的内存
-    result = np.empty((M, 4), dtype=np.float32)
-    result[:, 0] = top_x       # x
-    result[:, 1] = top_y       # y
-    result[:, 2] = top_vals    # response
-    result[:, 3] = top_dirs    # direction
-    return result
 class PostProcessing:
     """
     Post-processing class to apply AreaNMS, get top K, and return list format.
     """
-    def __init__(self, device='cpu', nms_radio = 8, get_top_num=1000):
+    def __init__(self, nms_radio = 8, get_top_num=1000):
         """
         Args:
-            device (str): Computing device ('cpu' or 'cuda').
+            nms_radio (int): Radius for AreaNMS.
+            get_top_num (int): Number of top points to extract.
         """
-        self.device = device
         self.area_nms = AreaNMS(radio=nms_radio)
         self.get_top_num = get_top_num
@@ -389,7 +206,7 @@ class PostProcessing:
         """
         nms_response = self.area_nms(response)
-        res = get_top_k_torch(nms_response,
+        res, _ = get_top_k_torch(nms_response,
                                 direction,
                                 k=self.get_top_num)
         if res.shape[0] == 0:
@@ -400,3 +217,144 @@ class PostProcessing:
                 res[:, 2] /= max_score
         return res
+@torch.no_grad()
+def gen_bboxes_around_points(results, box_size=16, shift_ratio=0.3):
+    """Generate initial bboxes around detected motion points.
+    Args:
+        results: (N, 4) -> [x, y, response, direction]
+        box_size: int, box size
+    Returns:
+        [[x1, y1, x2, y2], ... ] (N, 4) tensors
+    """
+    N = results.shape[0]
+    if N == 0:
+        return torch.empty((0, 4), device=results.device, dtype=torch.int)
+    rear_x, rear_y, direction = results[:, 0], results[:, 1], results[:, 3]
+    radius = box_size * 0.5
+    shift_mag = box_size * shift_ratio
+    # 1. 计算偏移量 (dx, dy)，根据方向和预设的 shift_mag
+    dx = torch.cos(direction) * shift_mag
+    dy = -torch.sin(direction) * shift_mag
+    # 2. 一次性将 NaN 偏移量替换为 0.0
+    dx = torch.nan_to_num(dx, nan=0.0)
+    dy = torch.nan_to_num(dy, nan=0.0)
+    # 3. 计算中心点
+    center_x = rear_x + dx
+    center_y = rear_y + dy
+    x1 = (center_x - radius)
+    y1 = (center_y - radius)
+    x2 = (center_x + radius)
+    y2 = (center_y + radius)
+    # Stack as (N, 4) -> [x1, y1, x2, y2]
+    return torch.stack([x1, y1, x2, y2], dim=1)
+@torch.no_grad()
+def get_top_k_torch(response_tensor, direction_tensor=None, k=100):
+    """
+    Extract the top-k points with highest responses from feature maps, filtering out non-positive values.
+    Args:
+        response_tensor (torch.Tensor): The response map tensor of shape (B, 1, H, W) or (B, H, W).
+        direction_tensor (torch.Tensor, optional): The corresponding direction map tensor of
+            shape (B, 1, H, W) or (B, H, W). Must match response_tensor's shape. Defaults to None.
+        k (int, optional): The maximum number of top points to extract per batch. Defaults to 100.
+    Returns:
+        Tuple[torch.Tensor, torch.Tensor]:
+            - results (torch.Tensor): A tensor of shape (M, 4) containing the valid extracted points
+            across the entire batch. M <= B * k. Each row is formatted as [x, y, response, direction].
+            - batch_ids (torch.Tensor): A 1D tensor of shape (M,) containing the corresponding
+            batch index (from 0 to B-1) for each point in `results`. dtype is torch.long.
+    """
+    B, _, H, W = response_tensor.shape
+    k = min(k, H * W)
+    device = response_tensor.device
+    # 1. Flatten -> (B, H*W)
+    flat_response = response_tensor.reshape(B, -1)
+    # 2. TopK -> top_vals and top_indices are both (B, k)
+    top_vals, top_indices = torch.topk(flat_response, k=k, dim=-1)
+    # 3. Get Direction -> (B, k)
+    if direction_tensor is not None and direction_tensor.numel() > 0:
+        flat_direction = direction_tensor.reshape(B, -1)
+        top_dirs = torch.gather(flat_direction, dim=-1, index=top_indices)
+    else:
+        top_dirs = torch.full_like(top_vals, float('nan'))
+    # 4. Calculate coordinates (x, y) -> (B, k)
+    top_y = top_indices.div(W, rounding_mode='floor').float()
+    top_x = (top_indices % W).float()
+    # 5. Stack -> merge on the last dimension, shape becomes (B, k, 4)
+    stacked = torch.stack([top_x, top_y, top_vals, top_dirs], dim=-1)
+    # 6. Generate Mask -> (B, k)
+    mask = top_vals > 0
+    # 7. Split and filter by Batch
+    result_list = []
+    batch_id_list = []
+    for i in range(B):
+        batch_mask = mask[i] # Get the mask for the i-th batch
+        # Apply mask: [k, 4] -> [M_i, 4]
+        valid_stacked = stacked[i][batch_mask]
+        result_list.append(valid_stacked)
+        # Create a batch index tensor of shape (M_i,) filled with the current batch index 'i'
+        batch_id_list.append(torch.full((valid_stacked.shape[0],), i, device=device, dtype=torch.long))
+    # Concatenate all valid items into continuous tensors
+    return torch.cat(result_list, dim=0), torch.cat(batch_id_list, dim=0)
+@torch.no_grad()
+def get_STMD_region_proposal(response_tensor, direction_tensor=None, top_k=1, box_size=16, spatial_scale=1.0, shift_ratio=0.3):
+    nms_win = int(box_size * spatial_scale) | 1  # 确保是奇数
+    score_mask = F.max_pool2d(response_tensor, kernel_size=nms_win, stride=1, padding=nms_win//2)
+    nms_response_tensor = torch.where(response_tensor == score_mask, response_tensor, 0.0)
+    vSTMD_res, batch_id = get_top_k_torch(nms_response_tensor, direction_tensor, k=top_k)
+    if spatial_scale > 1:
+        vSTMD_res[:, :2] *= spatial_scale   # 将坐标放大回原图尺度
+    bboxes = gen_bboxes_around_points(vSTMD_res, box_size, shift_ratio)
+    return vSTMD_res, bboxes, batch_id
+@torch.no_grad()
+def bbox_post_processing(top_k=1, box_size=16, spatial_scale=1.0, shift_ratio=0.3):
+    def post_process_func(
+        response_tensor,
+        direction_tensor=None
+    ):
+        vSTMD_res, bboxes, _ =  get_STMD_region_proposal(response_tensor,
+                                                        direction_tensor,
+                                                        top_k=top_k,
+                                                        box_size=box_size,
+                                                        spatial_scale=spatial_scale,
+                                                        shift_ratio=shift_ratio )
+        return torch.cat([bboxes, vSTMD_res[..., 2:3]], dim=1)
+    return post_process_func

{xttmp-2.3.0.1 → xttmp-2.3.0.3}/src/xttmp/util/iostream.py RENAMED Viewed

@@ -3,6 +3,7 @@ import re
 from pathlib import Path
 import logging
 from typing import Optional, List, Union, Tuple, Any
+from functools import partial
 import cv2
 import numpy as np
@@ -12,6 +13,7 @@ import torch
 from .. import model
+from .compute_module import PostProcessing, bbox_post_processing
 # Get the full path of this file
@@ -215,7 +217,6 @@ class FrameIterator:
 class FrameVisualizer:
     def __init__(self, window_name="Visualizer",
-                 result_index_type="matrix",
                  win_width=None, win_height=None,
                  is_headless=False,
                  conf_threshold=0.8 # 阈值参数
@@ -225,7 +226,6 @@ class FrameVisualizer:
         :param conf_threshold: 可视化过滤的相对阈值 (0.0 ~ 1.0)
         """
         self.window_name = window_name
-        self.result_index_type = result_index_type  # "matrix", "dots", "bbox"
         self.win_width = win_width or 800
         self.win_height = win_height or 600
         self.is_headless = is_headless
@@ -260,24 +260,26 @@ class FrameVisualizer:
         self.save_output = True
         print(f">>> Video writer initialized: {output_path}")
-    def update(self, frame, result=None, direction=None, annotation=None, process_time=None) -> bool:
+    def update(self, frame, result=None, direction=None, annotation=None, show_str=None) -> bool:
         if frame is None:
             return False
         # --- 绘制逻辑 ---
         # 即使 result 是空的，只要不为 None 也可以处理
         if result is not None:
-            if self.result_index_type == "matrix":
+            if result.dim() == 4:
                 self._draw_matrix(frame, result, direction, self.conf_threshold)
-            elif self.result_index_type == "dots":
+            elif result.shape[1] == 4:
                 result = result.cpu().numpy() if isinstance(result, torch.Tensor) else result
                 self._draw_dots(frame, result, self.conf_threshold)
-            elif self.result_index_type == "bbox":
+            elif result.shape[1] == 5:
                 self._draw_bbox(frame, result, self.conf_threshold, annotation)
+            device_str = f'{result.device}'
+        else:
+            device_str = 'Time'
         # --- 信息显示 ---
-        if process_time is not None:
-            cv2.putText(frame, f'Time: {process_time*1000:.1f} ms',
+        if show_str is not None and show_str != '':
+            cv2.putText(frame, str(show_str),
                         (10, 30), cv2.FONT_HERSHEY_SIMPLEX, 0.8,
                         (0, 255, 0), 2, cv2.LINE_AA)
@@ -344,10 +346,12 @@ class FrameVisualizer:
     @staticmethod
     def _draw_matrix(frame, matrix, direction_map, threshold):
         """处理 Matrix 格式 (Heatmap)"""
-        if np.max(matrix) <= 0: return
+        if torch.max(matrix) <= 0: return
         # np.where 返回 (rows, cols) 即 (y, x)
-        rows, cols = np.where(matrix > threshold)
+        _, _, rows, cols = torch.where(matrix > threshold)
+        rows = rows.cpu().numpy()
+        cols = cols.cpu().numpy()
         # 画点
         for r, c in zip(rows, cols):
@@ -358,7 +362,7 @@ class FrameVisualizer:
         # 画箭头
         if direction_map is not None and len(rows) > 0:
             # 确保 direction_map 维度匹配，这里假设是同样大小的矩阵
-            valid_dirs = direction_map[rows, cols]
+            valid_dirs = direction_map[0, 0, rows, cols]
             # 过滤 NaN
             valid_mask = ~np.isnan(valid_dirs)
@@ -450,9 +454,9 @@ class ModelSelectorGUI:
         self.modelLabel = ttk.Label(self.root, text="Select a model:", width = 15)
         self.modelLabel.grid(row=0, column=0, padx=10, pady=10)
-        self.modelCombobox = ttk.Combobox(self.root, values=modelList, width = 30)
+        self.modelCombobox = ttk.Combobox(self.root, values=modelList, width = 25)
         self.modelCombobox.current(11)
-        self.modelCombobox.grid(row=0, column=1, columnspan=2, padx=10, pady=10)
+        self.modelCombobox.grid(row=0, column=1, columnspan=2, pady=10, sticky='w')
 class InputSelectorGUI:
@@ -476,25 +480,24 @@ class InputSelectorGUI:
         self.endImgName = None
     def create_gui(self):
-        self.inputTypeLabel = ttk.Label(self.root, text="Select input from:", width = 15)
-        self.inputTypeLabel.grid(row=1, column=0, padx=10, pady=10)
+        self.inputTypeLabel = ttk.Label(self.root, text="Input Type:", width = 15)
+        self.inputTypeLabel.grid(row=1, column=0, padx=10, pady=10, sticky='w')
         self.selectedOption = tk.IntVar(value=0)
-        self.vidLabel = ttk.Radiobutton(self.root,
-                                        text='Video stream',
-                                        variable=self.selectedOption,
-                                        value=1,
-                                        command=self.select_vidstream)
-        self.vidLabel.grid(row=1, column=2, padx=10, pady=10)
         self.imgLabel = ttk.Radiobutton(self.root,
-                                        text='Image stream',
+                                        text='Image Sequence',
                                         variable=self.selectedOption,
                                         value=2,
                                         command=self.select_imgstream)
-        self.imgLabel.grid(row=1, column=1, padx=10, pady=10)
+        self.imgLabel.grid(row=1, column=2, padx=10, pady=10, sticky="w")
+        self.vidLabel = ttk.Radiobutton(self.root,
+                                        text='Video',
+                                        variable=self.selectedOption,
+                                        value=1,
+                                        command=self.select_vidstream)
+        self.vidLabel.grid(row=1, column=1, padx=10, pady=10, sticky="w")
     def select_vidstream(self):
         self.imgSelectFolder = None
@@ -503,16 +506,16 @@ class InputSelectorGUI:
         for element in self.imgElement.values():
             element.destroy()
-        self.vidElement['lblVidIndicate'] = ttk.Label(self.root, text= 'Video\'s path:',  width = 15)
-        self.vidElement['lblVidIndicate'].grid(row=2, column=0, padx=10, pady=30)
+        self.vidElement['lblVidIndicate'] = ttk.Label(self.root, text= 'Video\'s path:')
+        self.vidElement['lblVidIndicate'].grid(row=2, column=1, padx=10, pady=30, sticky='w')
         self.vidElement['lblVidPath'] = ttk.Label(self.root,
                                            text="Waiting for the selection",
                                            wraplength=220
                                            )
-        self.vidElement['lblVidPath'].grid(row=2, column=1, columnspan=2, padx=10, pady=10)
+        self.vidElement['lblVidPath'].grid(row=2, column=2, padx=10, pady=10, sticky='w')
         self.vidElement['btn'] = ttk.Button(self.root, text="Select a video", command=self._clicked_vid)
-        self.vidElement['btn'].grid(row=3, column=2, padx=10, pady=10)
+        self.vidElement['btn'].grid(row=3, column=2, padx=10, pady=10, sticky='w')
     def _clicked_vid(self):
         self.vidName = filedialog.askopenfilenames(initialdir=VID_DEFAULT_FOLDER)
@@ -524,15 +527,20 @@ class InputSelectorGUI:
         for element in self.vidElement.values():
             element.destroy()
-        self.imgElement['lblFolder'] = ttk.Label(self.root, text="Image's folder: ",  width = 15)
-        self.imgElement['lblFolder'].grid(row=2, column=0, padx=10, pady=10)
+        self.imgElement['lblFolder'] = ttk.Label(self.root, text="Image's folder: ")
+        self.imgElement['lblFolder'].grid(row=2, column=1, padx=10, pady=10, sticky='w')
         self.imgElement['lblFolderName'] = ttk.Label(self.root, text="Waiting for the selection", wraplength=220)
-        self.imgElement['lblFolderName'].grid(row=2, column=1, columnspan=2, padx=10, pady=30)
+        self.imgElement['lblFolderName'].grid(row=2, column=2, padx=10, pady=30, sticky='w')
         self.imgElement['btnStart'] = ttk.Button(self.root, text="Select start frame", command=self._clicked_start_img)
-        self.imgElement['btnStart'].grid(row=3, column=1,  padx=10, pady=10)
+        self.imgElement['btnStart'].grid(row=3, column=1,  padx=10, pady=10, sticky='w')
+        self.imgElement['lblStartImg'] = ttk.Label(self.root, text=self.startImgName)
+        self.imgElement['lblStartImg'].grid(row=3, column=2, padx=10, pady=10, sticky='w')
         self.imgElement['btnEnd'] = ttk.Button(self.root, text="Select end frame", command=self._clicked_end_img)
-        self.imgElement['btnEnd'].grid(row=4, column=1,  padx=10, pady=10)
+        self.imgElement['btnEnd'].grid(row=4, column=1,  padx=10, pady=10, sticky='w')
+        self.imgElement['lblEndImg'] = ttk.Label(self.root, text=self.endImgName)
+        self.imgElement['lblEndImg'].grid(row=4, column=2, padx=10, pady=10, sticky='w')
     def _clicked_start_img(self):
         startImgFullPath = filedialog.askopenfilenames(
@@ -551,8 +559,7 @@ class InputSelectorGUI:
         self.imgSelectFolder = self.startFolder
         self.imgElement['lblFolderName'].config(text=self.imgSelectFolder)
-        self.imgElement['lblStartImg'] = ttk.Label(self.root, text=self.startImgName)
-        self.imgElement['lblStartImg'].grid(row=3, column=2, padx=10, pady=10)
+        self.imgElement['lblStartImg'].config(text=self.startImgName)
     def _clicked_end_img(self):
         endImgFullPath = filedialog.askopenfilenames(
@@ -573,16 +580,127 @@ class InputSelectorGUI:
         self.imgSelectFolder = self.endFolder
         self.imgElement['lblFolderName'].config(text=self.imgSelectFolder)
-        self.imgElement['lblEndImg'] = ttk.Label(self.root, text=self.endImgName)
-        self.imgElement['lblEndImg'].grid(row=4, column=2, padx=10, pady=10)
+        self.imgElement['lblEndImg'].config(text=self.endImgName)
+class PostProcessingSelectorGUI:
+    def __init__(self, root):
+        self.root = root
+        self.output_type = "dot"
+        self.show_threshold = 0.0
+        self.top_num = 1
+        self.outputTypeLabel = ttk.Label(self.root, text="Output Type:", width = 15)
+        self.outputTypeLabel.grid(row=5, column=0, padx=10, pady=10)
+        self.selectedOption = tk.IntVar(value=2)
+        self.dotLabel = ttk.Radiobutton(self.root,
+                                        text='dot output',
+                                        variable=self.selectedOption,
+                                        value=2,
+                                        command=self.select_dot)
+        self.dotLabel.grid(row=5, column=1, padx=10, pady=10, sticky="w")
+        self.bboxLabel = ttk.Radiobutton(self.root,
+                                        text='bbox output',
+                                        variable=self.selectedOption,
+                                        value=1,
+                                        command=self.select_bbox)
+        self.bboxLabel.grid(row=5, column=2, padx=10, pady=10, sticky="w")
+        self.showThresholdLabel = ttk.Label(self.root, text="Threshold:", width=10)
+        self.showThresholdLabel.grid(row=6, column=1, padx=10, pady=10, sticky='w')
+        self.showThresholdVar = tk.StringVar(value="0")
+        self.showThresholdVar.trace_add('write', self.update_show_threshold)
+        self.showThresholdEntry = ttk.Entry(self.root, textvariable=self.showThresholdVar, width=5)
+        self.showThresholdEntry.grid(row=6, column=2, padx=10, pady=10, sticky='w')
-class ModelAndInputSelectorGUI:
+        self.getTopNumLabel = ttk.Label(self.root, text="Top Num:", width=10)
+        self.getTopNumLabel.grid(row=7, column=1, padx=10, pady=10, sticky='w')
+        self.getTopNumVar = tk.StringVar(value="1")
+        self.getTopNumVar.trace_add('write', self.update_top_num)
+        self.getTopNumEntry = ttk.Entry(self.root, textvariable=self.getTopNumVar, width=5)
+        self.getTopNumEntry.grid(row=7, column=2, padx=10, pady=10, sticky='w')
+        self.select_dot()
+    def select_dot(self):
+        self.selectedOption.set(2)
+        self.output_type = "dot"
+    def select_bbox(self):
+        self.selectedOption.set(1)
+        self.output_type = "bbox"
+    def get_post_processing(self):
+        if self.output_type == "dot":
+            return PostProcessing(get_top_num = self.top_num)
+        elif self.output_type == "bbox":
+            return bbox_post_processing(self.top_num)
+        else:
+            raise ValueError(f"Unknown output type: {self.output_type}")
+    def update_show_threshold(self, *args):
+        value = float(self.showThresholdVar.get())
+        self.show_threshold = min(max(value, 0.0), 1.0)  # 确保在 [0.0, 1.0] 范围内
+    def update_top_num(self, *args):
+        value = int(self.getTopNumVar.get())
+        self.top_num = max(value, 1)  # 确保 top_num 至少为 1
+class DeviceSelectorGUI:
     def __init__(self, root):
         self.root = root
+        self.device = "cpu"
-        windowHeight = 350
-        windowWidth = 400
+        self.deviceLabel = ttk.Label(self.root, text="Select Device:", width=15)
+        self.deviceLabel.grid(row=8, column=0, padx=10, pady=10)
+        self.selectedOption = tk.IntVar(value=1)
+        if torch.cuda.is_available():
+            self.selectedOption.set(2)
+            self.device = "cuda"
+        self.cpuLabel = ttk.Radiobutton(self.root,
+                                        text='CPU',
+                                        variable=self.selectedOption,
+                                        value=1,
+                                        command=self.select_cpu)
+        self.cpuLabel.grid(row=8, column=1, padx=10, pady=10, sticky="w")
+        self.gpuLabel = ttk.Radiobutton(self.root,
+                                        text='GPU',
+                                        variable=self.selectedOption,
+                                        value=2,
+                                        command=self.select_gpu)
+        self.gpuLabel.grid(row=8, column=2, padx=10, pady=10, sticky="w")
+    def select_cpu(self):
+        self.selectedOption.set(1)
+        self.device = "cpu"
+    def select_gpu(self):
+        if torch.cuda.is_available():
+            self.selectedOption.set(2)
+            self.device = "cuda"
+        else:
+            messagebox.showinfo("Message title", "CUDA is not available. Please select CPU.")
+            self.select_cpu()
+class XTTMP_GUI:
+    def __init__(self, root):
+        self.root = root
+        windowHeight = 550
+        windowWidth = 510
         startHeight = (root.winfo_screenheight() - windowHeight) // 2
         startWidth = (root.winfo_screenwidth() - windowWidth) // 2
@@ -592,13 +710,16 @@ class ModelAndInputSelectorGUI:
         self._set_window_icon()
         self.objModelSelector = ModelSelectorGUI(root)
+        self.objPostProcessingSelector = PostProcessingSelectorGUI(root)
         self.objInputSelector = InputSelectorGUI(root)
+        self.objDeviceSelector = DeviceSelectorGUI(root)
-        self.btnRun = ttk.Button(self.root, text="Run", command=self._run)
-        self.btnRun.place(x = 20, y=300)
         self.btnStepping = ttk.Button(self.root, text="Stepping", command=self._stepping)
-        self.btnStepping.place(x = 20, y=270)
         self.isStepping = False
+        self.btnStepping.grid(row=9, column=2, padx=10, pady=10, sticky='e')
+        self.btnRun = ttk.Button(self.root, text="Run", command=self._run)
+        self.btnRun.grid(row=10, column=2, padx=10, pady=10, sticky='e')
     def create_gui(self):
         self.objModelSelector.create_gui(ALL_MODEL)
@@ -607,9 +728,13 @@ class ModelAndInputSelectorGUI:
         self.root.mainloop()
         if self.objInputSelector.selectedOption.get() == 1:
-            return self.modelName, self.vidName, None, self.isStepping
+            return (self.modelName, self.vidName, None, self.isStepping,
+                    self.objDeviceSelector.device, self.objPostProcessingSelector.get_post_processing(),
+                    self.objPostProcessingSelector.show_threshold)
         elif self.objInputSelector.selectedOption.get() == 2:
-            return self.modelName, self.startImgName, self.endImgName, self.isStepping
+            return (self.modelName, self.startImgName, self.endImgName, self.isStepping,
+                     self.objDeviceSelector.device, self.objPostProcessingSelector.get_post_processing(),
+                    self.objPostProcessingSelector.show_threshold)
     def _run(self):
         self.modelName = self.objModelSelector.modelCombobox.get()

{xttmp-2.3.0.1 → xttmp-2.3.0.3/src/xttmp.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: xttmp
-Version: 2.3.0.1
+Version: 2.3.0.3
 Summary: eXtremely Tiny Target - Motion Perception
 Author-email: Shawn MX <mingshuoxu@hotmail.com>
 Project-URL: Homepage, https://github.com/MingshuoXu/Small-Target-Motion-Detectors
@@ -16,15 +16,16 @@ Classifier: Programming Language :: Python :: 3.9
 Classifier: Programming Language :: Python :: 3.10
 Classifier: Programming Language :: Python :: 3.11
 Classifier: Programming Language :: Python :: 3.12
-Classifier: Topic :: Scientific/Engineering :: Image Recognition
+Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
 Requires-Python: >=3.8
 Description-Content-Type: text/markdown
 License-File: LICENSE
 Requires-Dist: matplotlib
 Requires-Dist: opencv-python
 Requires-Dist: scipy
-Requires-Dist: torch>=2.5.0
-Requires-Dist: torchvision>=0.20.0
+Provides-Extra: torch
+Requires-Dist: torch>=2.5.0; extra == "torch"
+Requires-Dist: torchvision>=0.20.0; extra == "torch"
 Dynamic: license-file
 # Small Target Motion Detectors, Version 2.3 (XTT-MP: Extremely Tiny Target - Motion Perception)
@@ -66,8 +67,18 @@ Built with modularity and extensibility in mind, XTT-MP provides a robust suite
 - After `pip install xttmp`, use the installed code and bring your own input data, or run from a repository checkout to access the bundled examples.
 ### Via PyPI
+#### CPU
+```bash
+pip install xttmp[torch]
+```
+#### NVIDIA GPU (CUDA 12.6)
+```bash
+pip install torch torchvision --index-url https://download.pytorch.org/whl/cu126
+```
+### Running the GUI Demo
 ```bash
-pip install xttmp
 xttmp_gui
 ```