stabilo 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- stabilo/__init__.py +4 -0
- stabilo/cfg/default.yaml +57 -0
- stabilo/stabilo.py +595 -0
- stabilo/thresholds/models/AKAZE/model_mask_False_clahe_False.txt +2 -0
- stabilo/thresholds/models/AKAZE/model_mask_False_clahe_True.txt +2 -0
- stabilo/thresholds/models/AKAZE/model_mask_True_clahe_False.txt +2 -0
- stabilo/thresholds/models/AKAZE/model_mask_True_clahe_True.txt +2 -0
- stabilo/thresholds/models/BRISK/model_mask_False_clahe_False.txt +2 -0
- stabilo/thresholds/models/BRISK/model_mask_False_clahe_True.txt +2 -0
- stabilo/thresholds/models/BRISK/model_mask_True_clahe_False.txt +2 -0
- stabilo/thresholds/models/BRISK/model_mask_True_clahe_True.txt +2 -0
- stabilo/thresholds/models/KAZE/model_mask_False_clahe_False.txt +2 -0
- stabilo/thresholds/models/KAZE/model_mask_False_clahe_True.txt +2 -0
- stabilo/thresholds/models/KAZE/model_mask_True_clahe_False.txt +2 -0
- stabilo/thresholds/models/KAZE/model_mask_True_clahe_True.txt +2 -0
- stabilo/utils.py +112 -0
- stabilo-1.0.0.dist-info/LICENSE +21 -0
- stabilo-1.0.0.dist-info/METADATA +209 -0
- stabilo-1.0.0.dist-info/RECORD +21 -0
- stabilo-1.0.0.dist-info/WHEEL +5 -0
- stabilo-1.0.0.dist-info/top_level.txt +1 -0
stabilo/__init__.py
ADDED
stabilo/cfg/default.yaml
ADDED
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
# This is the default configuration file (default.yaml) containing optimized parameters
|
|
2
|
+
# for the ORB detector, brute force (BF) matcher, ratio test, and projective transformation.
|
|
3
|
+
# This template serves as a starting point for users to customize according to their specific needs.
|
|
4
|
+
#
|
|
5
|
+
# The goal of this optimization was to balance high accuracy without compromising on speed,
|
|
6
|
+
# specifically tailored for high-altitude bird's-eye view (BeV) drone imagery analysis.
|
|
7
|
+
#
|
|
8
|
+
# The optimization involved a set of BeV drone scenes with associated vehicle masks captured
|
|
9
|
+
# from an altitude of approximately 150 meters. With the projective transformation fixed,
|
|
10
|
+
# we then optimized all the remaining parameters in a hierarchical manner.
|
|
11
|
+
#
|
|
12
|
+
# Details of the optimization process will be available soon at:
|
|
13
|
+
# https://github.com/rfonod/stabilo-optimize
|
|
14
|
+
#
|
|
15
|
+
# The detailed description of each parameter is explained in the stabilo.py module.
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
# Image pre-processing related
|
|
19
|
+
clahe: false # Contrast Limited Adaptive Histogram Equalization (CLAHE); [true, false]
|
|
20
|
+
downsample_ratio: 0.5 # Downsample ratio for image resizing; (0.0, 1.0]
|
|
21
|
+
|
|
22
|
+
# Feature detectors and descriptors related
|
|
23
|
+
detector_name: 'orb' # Feature detector name; ['orb', 'sift', 'rsift', 'brisk', 'kaze', 'akaze']
|
|
24
|
+
max_features: 2000 # Maximum number of features to detect; [0, inf)
|
|
25
|
+
ref_multiplier: 2.0 # Reference multiplier for the number of features; (0.0, inf)
|
|
26
|
+
|
|
27
|
+
# Feature matching and filtering related
|
|
28
|
+
matcher_name: 'bf' # Feature matcher name; ['bf', 'flann']
|
|
29
|
+
filter_type: 'ratio' # Filter type; ['none', 'ratio', 'distance']
|
|
30
|
+
filter_ratio: 0.9 # Ratio test threshold; (0.0, 1.0]
|
|
31
|
+
|
|
32
|
+
# Transformation matrix calculation related
|
|
33
|
+
transformation_type: 'projective' # Transformation type; ['projective', 'affine']
|
|
34
|
+
ransac_method: 38 # RANSAC method; [4: LMEDS, 8: RANSAC, 16: RHO, 32: DEGENSAC, 33: DEGENSAC (with different parameters), 35: LO-RANSAC, 36: GC-RANSAC, 37: PROSAC, 38: MAGSAC++]
|
|
35
|
+
ransac_epipolar_threshold: 2.0 # Epipolar threshold for RANSAC; (0.0, inf)
|
|
36
|
+
ransac_max_iter: 5000 # Maximum number of iterations for RANSAC; (0, inf]
|
|
37
|
+
ransac_confidence: 0.999999 # Confidence level for RANSAC; (0.0, 1.0]
|
|
38
|
+
|
|
39
|
+
# Mask related
|
|
40
|
+
mask_use: true # Use mask for feature matching; [true, false]
|
|
41
|
+
mask_margin_ratio: 0.15 # Margin ratio for mask generation; [0.0, 1.0]
|
|
42
|
+
|
|
43
|
+
# Default thresholds for BRISK, KAZE, and AKAZE (if threshold analysis not performed)
|
|
44
|
+
brisk_threshold: 130 # BRISK threshold; (0, 255]
|
|
45
|
+
kaze_threshold: 0.01 # KAZE threshold; (0.0, inf]
|
|
46
|
+
akaze_threshold: 0.01 # AKAZE threshold; (0.0, inf]
|
|
47
|
+
|
|
48
|
+
# Computation related
|
|
49
|
+
gpu: false # Use GPU for computation; [true, false]
|
|
50
|
+
|
|
51
|
+
# Miscellaneous
|
|
52
|
+
viz: false # Store features and matches for visualization; [true, false]
|
|
53
|
+
benchmark: false # Benchmark mode; [true, false]
|
|
54
|
+
|
|
55
|
+
# Debug related
|
|
56
|
+
min_good_match_count_warning: 20 # Minimum good match count warning; [0, inf]
|
|
57
|
+
min_inliers_match_count_warning: 10 # Minimum inliers match count warning; [0, inf]
|
stabilo/stabilo.py
ADDED
|
@@ -0,0 +1,595 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
# Author: Robert Fonod (robert.fonod@ieee.org)
|
|
3
|
+
|
|
4
|
+
"""
|
|
5
|
+
stabilo.py - Reference frame video stabilization with optional user-provided exclusion masks
|
|
6
|
+
|
|
7
|
+
This module provides the Stabilizer class for video or track stabilization using feature matching
|
|
8
|
+
and transformation estimation. It leverages OpenCV for core functionalities.
|
|
9
|
+
|
|
10
|
+
The class supports various feature detectors, matchers, filtering methods, and transformation types.
|
|
11
|
+
Fine-tuning these parameters allows customization for specific video stabilization needs.
|
|
12
|
+
|
|
13
|
+
The parameters for the feature detectors, matchers, filtering methods, and transformations can be
|
|
14
|
+
fine-tuned to suit specific requirements, see https://github.com/rfonod/stabilo-optimize.
|
|
15
|
+
|
|
16
|
+
Key Features:
|
|
17
|
+
- Video or bounding box (tracks) stabilization with respect to a reference frame.
|
|
18
|
+
- Fine-tunable parameters for feature detectors, matchers, filtering methods, and transformations.
|
|
19
|
+
- Support for various feature detectors (e.g., ORB, SIFT) and matchers (e.g., BF, FLANN).
|
|
20
|
+
- Projective or affine transformations for frame stabilization.
|
|
21
|
+
- RANSAC-based algorithms for robust transformation matrix estimation.
|
|
22
|
+
- CLAHE and pre-processing options for contrast enhancement.
|
|
23
|
+
- Visualization and debugging features for keypoints, descriptors, and masks.
|
|
24
|
+
- GPU acceleration for improved performance (not implemented yet).
|
|
25
|
+
|
|
26
|
+
Usage:
|
|
27
|
+
1. Create an instance of the 'Stabilizer' class with desired parameter configurations.
|
|
28
|
+
2. Set a reference frame using the 'set_ref_frame' method.
|
|
29
|
+
3. Stabilize any preceding or subsequent frames using the 'stabilize' method.
|
|
30
|
+
4. Access stabilized frames, bounding boxes, and transformation matrices using specific methods.
|
|
31
|
+
"""
|
|
32
|
+
|
|
33
|
+
import sys
|
|
34
|
+
from pathlib import Path
|
|
35
|
+
from typing import Union
|
|
36
|
+
|
|
37
|
+
import cv2
|
|
38
|
+
import numpy as np
|
|
39
|
+
|
|
40
|
+
from .utils import four2xywh, load_config, setup_logger, timer, xywh2four
|
|
41
|
+
|
|
42
|
+
# Configure logging
|
|
43
|
+
logger = setup_logger(__name__)
|
|
44
|
+
|
|
45
|
+
# Define the root directory
|
|
46
|
+
ROOT = Path(__file__).resolve().parents[0]
|
|
47
|
+
|
|
48
|
+
# Read the default parameters from a configuration file
|
|
49
|
+
cfg = load_config(ROOT / "cfg" / "default.yaml", logger)
|
|
50
|
+
|
|
51
|
+
# Profiling flag
|
|
52
|
+
PROFILING = False
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
class Stabilizer:
|
|
56
|
+
"""
|
|
57
|
+
This class implements a video stabilizer. It uses feature matching to find the transformation
|
|
58
|
+
between the reference frame and the current frame, allowing stabilization of subsequent frames.
|
|
59
|
+
The transformation matrix can be used to transform the current frame to the reference frame or
|
|
60
|
+
to transform points from the current frame to the reference frame.
|
|
61
|
+
"""
|
|
62
|
+
|
|
63
|
+
VALID_DETECTORS = ['orb', 'sift', 'rsift', 'brisk', 'kaze', 'akaze']
|
|
64
|
+
VALID_MATCHERS = ['bf', 'flann']
|
|
65
|
+
VALID_FILTER_TYPES = ['none', 'ratio', 'distance']
|
|
66
|
+
VALID_TRANSFORMATION_TYPES = ['projective', 'affine']
|
|
67
|
+
VALID_RANSAC_METHODS_DICT = {
|
|
68
|
+
'cv2.LMEDS': cv2.LMEDS, # 4 - LMEDS
|
|
69
|
+
'cv2.RANSAC': cv2.RANSAC, # 8 - RANSAC
|
|
70
|
+
'cv2.RHO': cv2.RHO, # 16 - RHO
|
|
71
|
+
'cv2.USAC_DEFAULT': cv2.USAC_DEFAULT, # 32 - DEGENSAC
|
|
72
|
+
'cv2.USAC_PARALLEL': cv2.USAC_PARALLEL, # 33 - DEGENSAC (with different parameters)
|
|
73
|
+
'cv2.USAC_FAST': cv2.USAC_FAST, # 35 - LO-RANSAC
|
|
74
|
+
'cv2.USAC_ACCURATE': cv2.USAC_ACCURATE, # 36 - GC-RANSAC
|
|
75
|
+
'cv2.USAC_PROSAC': cv2.USAC_PROSAC, # 37 - PROSAC
|
|
76
|
+
'cv2.USAC_MAGSAC': cv2.USAC_MAGSAC # 38 - MAGSAC++
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
def __init__(self, **kwargs):
|
|
80
|
+
"""
|
|
81
|
+
Initialize the Stabilizer with user-provided or default configurations.
|
|
82
|
+
|
|
83
|
+
Arguments:
|
|
84
|
+
- detector_name: str - feature detector to use (orb, sift, rsift, brisk, kaze, akaze)
|
|
85
|
+
- matcher_name: str - feature matcher to use (bf, flann)
|
|
86
|
+
- filter_type: str - filter type for the feature matcher (none, ratio, distance)
|
|
87
|
+
- transformation_type: str - transformation for stabilization (projective, affine)
|
|
88
|
+
- clahe: bool - use CLAHE for contrast enhancement
|
|
89
|
+
- downsample_ratio: float - down-sampling ratio for the frames (e.g., 0.5 for half the size)
|
|
90
|
+
- max_features: int - max number of features to detect (for BRISK, KAZE, and AKAZE this is an approximation)
|
|
91
|
+
- ref_multiplier: float - multiplier for max features in reference frame (ref_multiplier x max_features)
|
|
92
|
+
- mask_use: bool - use mask for feature detection
|
|
93
|
+
- filter_ratio: float - filtering ratio; Lowe's ratio for 'ratio' filter, distance threshold ratio for 'distance' filter
|
|
94
|
+
- ransac_method: int - method for RANSAC algorithm (see above for options)
|
|
95
|
+
- ransac_epipolar_threshold: float - threshold for RANSAC (e.g., 1.0)
|
|
96
|
+
- ransac_max_iter: int - max iterations for RANSAC (e.g., 2000)
|
|
97
|
+
- ransac_confidence: float - confidence for RANSAC (e.g., 0.999)
|
|
98
|
+
- brisk_threshold: int - threshold for BRISK detector (used only if 'max_features -> threshold' model is unavailable)
|
|
99
|
+
- kaze_threshold: float - threshold for KAZE detector (used only if 'max_features -> threshold' model is unavailable)
|
|
100
|
+
- akaze_threshold: float - threshold for AKAZE detector (used only if 'max_features -> threshold' model is unavailable)
|
|
101
|
+
- gpu: bool - use GPU acceleration (not fully implemented/tested yet)
|
|
102
|
+
- viz: bool - save some features for visualization (e.g., keypoints, descriptors, masks)
|
|
103
|
+
- benchmark: bool - different behavior for benchmarking purposes (e.g., re-use the last transformation if the current is None)
|
|
104
|
+
- min_good_match_count_warning: int - min number of good matches to trigger a warning
|
|
105
|
+
- min_inliers_match_count_warning: int - min number of inliers to trigger a warning
|
|
106
|
+
"""
|
|
107
|
+
self._load_configuration(kwargs)
|
|
108
|
+
self._validate_arguments()
|
|
109
|
+
self._initialize_variables()
|
|
110
|
+
self._create_feature_detectors()
|
|
111
|
+
self._create_matcher()
|
|
112
|
+
self._create_transformer()
|
|
113
|
+
self._create_helpers()
|
|
114
|
+
|
|
115
|
+
def _load_configuration(self, kwargs):
|
|
116
|
+
"""
|
|
117
|
+
Load configuration parameters, using defaults if not provided.
|
|
118
|
+
"""
|
|
119
|
+
for key, value in cfg.items():
|
|
120
|
+
setattr(self, key, kwargs.get(key, value))
|
|
121
|
+
|
|
122
|
+
def _initialize_variables(self):
|
|
123
|
+
"""
|
|
124
|
+
Initialize internal variables for the Stabilizer.
|
|
125
|
+
"""
|
|
126
|
+
self.ref_frame, self.cur_frame = None, None
|
|
127
|
+
self.ref_frame_gray, self.cur_frame_gray = None, None
|
|
128
|
+
self.ref_boxes, self.cur_boxes = None, None
|
|
129
|
+
self.ref_mask, self.cur_mask = None, None
|
|
130
|
+
self.ref_kpts, self.cur_kpts = None, None
|
|
131
|
+
self.ref_desc, self.cur_desc = None, None
|
|
132
|
+
self.ref_pts, self.cur_pts = None, None
|
|
133
|
+
self.cur_trans_matrix, self.trans_matrix_last_known = None, None
|
|
134
|
+
self.cur_inliers, self.cur_inliers_count = None, None
|
|
135
|
+
self.h, self.w = None, None
|
|
136
|
+
|
|
137
|
+
def _create_feature_detectors(self):
|
|
138
|
+
"""
|
|
139
|
+
Create feature detectors and descriptor extractors based on the provided configurations.
|
|
140
|
+
"""
|
|
141
|
+
self.detector_cur, self.detector_ref = self._create_detector(self.detector_name)
|
|
142
|
+
self.norm_type = self._get_norm_type()
|
|
143
|
+
|
|
144
|
+
def _create_detector(self, detector_name: str):
|
|
145
|
+
"""
|
|
146
|
+
Create a feature detector based on the provided detector name.
|
|
147
|
+
"""
|
|
148
|
+
if detector_name == "orb":
|
|
149
|
+
return self._create_orb_detectors()
|
|
150
|
+
elif detector_name in ["sift", "rsift"]:
|
|
151
|
+
return self._create_sift_detectors()
|
|
152
|
+
elif detector_name == "brisk":
|
|
153
|
+
return self._create_brisk_detectors()
|
|
154
|
+
elif detector_name == "kaze":
|
|
155
|
+
return self._create_kaze_detectors()
|
|
156
|
+
elif detector_name == "akaze":
|
|
157
|
+
return self._create_akaze_detectors()
|
|
158
|
+
|
|
159
|
+
def _create_orb_detectors(self):
|
|
160
|
+
"""
|
|
161
|
+
Create ORB detectors and descriptor extractors.
|
|
162
|
+
"""
|
|
163
|
+
return (
|
|
164
|
+
cv2.cuda.ORB_create(self.max_features) if self.gpu else cv2.ORB_create(self.max_features),
|
|
165
|
+
cv2.cuda.ORB_create(round(self.ref_multiplier * self.max_features)) if self.gpu else cv2.ORB_create(round(self.ref_multiplier * self.max_features))
|
|
166
|
+
)
|
|
167
|
+
|
|
168
|
+
def _create_sift_detectors(self):
|
|
169
|
+
"""
|
|
170
|
+
Create SIFT detectors and descriptor extractors.
|
|
171
|
+
"""
|
|
172
|
+
return (
|
|
173
|
+
cv2.cuda.SIFT_create(self.max_features) if self.gpu else cv2.SIFT_create(self.max_features), # (enable_precise_upscale=True)
|
|
174
|
+
cv2.cuda.SIFT_create(round(self.ref_multiplier * self.max_features)) if self.gpu else cv2.SIFT_create(round(self.ref_multiplier * self.max_features))
|
|
175
|
+
)
|
|
176
|
+
|
|
177
|
+
def _create_brisk_detectors(self):
|
|
178
|
+
"""
|
|
179
|
+
Create BRISK detectors and descriptor extractors.
|
|
180
|
+
"""
|
|
181
|
+
threshold_cur, threshold_ref = self._get_thresholds()
|
|
182
|
+
return (
|
|
183
|
+
cv2.cuda.BRISK.create(thresh=round(threshold_cur)) if self.gpu else cv2.BRISK_create(thresh=round(threshold_cur)),
|
|
184
|
+
cv2.cuda.BRISK.create(thresh=round(threshold_ref)) if self.gpu else cv2.BRISK_create(thresh=round(threshold_ref))
|
|
185
|
+
)
|
|
186
|
+
|
|
187
|
+
def _create_kaze_detectors(self):
|
|
188
|
+
"""
|
|
189
|
+
Create KAZE detectors and descriptor extractors.
|
|
190
|
+
"""
|
|
191
|
+
threshold_cur, threshold_ref = self._get_thresholds()
|
|
192
|
+
return (
|
|
193
|
+
cv2.cuda.KAZE_create(threshold=threshold_cur) if self.gpu else cv2.KAZE_create(threshold=threshold_cur),
|
|
194
|
+
cv2.cuda.KAZE_create(threshold=threshold_ref) if self.gpu else cv2.KAZE_create(threshold=threshold_ref)
|
|
195
|
+
)
|
|
196
|
+
|
|
197
|
+
def _create_akaze_detectors(self):
|
|
198
|
+
"""
|
|
199
|
+
Create AKAZE detectors and descriptor extractors.
|
|
200
|
+
"""
|
|
201
|
+
threshold_cur, threshold_ref = self._get_thresholds()
|
|
202
|
+
return (
|
|
203
|
+
cv2.cuda.AKAZE_create(threshold=threshold_cur) if self.gpu else cv2.AKAZE_create(threshold=threshold_cur),
|
|
204
|
+
cv2.cuda.AKAZE_create(threshold=threshold_ref) if self.gpu else cv2.AKAZE_create(threshold=threshold_ref)
|
|
205
|
+
)
|
|
206
|
+
|
|
207
|
+
def _get_thresholds(self):
|
|
208
|
+
"""
|
|
209
|
+
Get thresholds for BRISK, KAZE, and AKAZE based on precomputed models.
|
|
210
|
+
"""
|
|
211
|
+
detector_name = self.detector_name.upper()
|
|
212
|
+
threshold_model_filepath = ROOT / 'thresholds' / 'models' / f'{detector_name}' / f'model_mask_{self.mask_use}_clahe_{self.clahe}.txt'
|
|
213
|
+
if threshold_model_filepath.exists():
|
|
214
|
+
model = np.loadtxt(str(threshold_model_filepath))
|
|
215
|
+
threshold_cur = model[1] + model[0] * self.max_features
|
|
216
|
+
threshold_ref = model[1] + model[0] * self.max_features * self.ref_multiplier
|
|
217
|
+
if not self.benchmark:
|
|
218
|
+
logger.info(f"Using {detector_name} with threshold {threshold_ref} for the reference frame and {threshold_cur} for the current frame.")
|
|
219
|
+
else:
|
|
220
|
+
threshold_cur = self.brisk_threshold if detector_name == 'BRISK' else self.kaze_threshold if detector_name == 'KAZE' else self.akaze_threshold
|
|
221
|
+
threshold_ref = threshold_cur
|
|
222
|
+
if not self.benchmark:
|
|
223
|
+
logger.warning(f"No threshold analysis for {detector_name}. Using default threshold.")
|
|
224
|
+
return threshold_cur, threshold_ref
|
|
225
|
+
|
|
226
|
+
def _get_norm_type(self):
|
|
227
|
+
"""
|
|
228
|
+
Get the norm type based on the detector name.
|
|
229
|
+
"""
|
|
230
|
+
if self.detector_name in ["orb", "brisk", "akaze"]:
|
|
231
|
+
return cv2.NORM_HAMMING # N.B.: if ORB is using WTA_K == 3 or 4, cv.NORM_HAMMING2 should be used
|
|
232
|
+
elif self.detector_name in ["sift", "rsift", "kaze"]:
|
|
233
|
+
return cv2.NORM_L2
|
|
234
|
+
|
|
235
|
+
def _create_matcher(self):
|
|
236
|
+
"""
|
|
237
|
+
Create the feature matcher based on the provided configurations.
|
|
238
|
+
"""
|
|
239
|
+
if self.matcher_name == "bf":
|
|
240
|
+
self.matcher = self._create_brute_force_matcher()
|
|
241
|
+
elif self.matcher_name == "flann":
|
|
242
|
+
self.matcher = self._create_flann_matcher()
|
|
243
|
+
|
|
244
|
+
def _create_brute_force_matcher(self):
|
|
245
|
+
"""
|
|
246
|
+
Create a brute-force matcher.
|
|
247
|
+
"""
|
|
248
|
+
return cv2.cuda.DescriptorMatcher_createBFMatcher(self.norm_type, crossCheck=(True,False)[self.filter_type=='ratio']) if self.gpu else cv2.BFMatcher(self.norm_type, crossCheck=(True,False)[self.filter_type=='ratio'])
|
|
249
|
+
|
|
250
|
+
def _create_flann_matcher(self):
|
|
251
|
+
"""
|
|
252
|
+
Create a FLANN-based matcher.
|
|
253
|
+
"""
|
|
254
|
+
if self.norm_type in [cv2.NORM_HAMMING, cv2.NORM_HAMMING2]:
|
|
255
|
+
index_params = dict(algorithm=6, table_number=6, key_size=12, multi_probe_level=1)
|
|
256
|
+
elif self.norm_type == cv2.NORM_L2:
|
|
257
|
+
index_params = dict(algorithm=0, trees=5)
|
|
258
|
+
search_params = dict(checks=100)
|
|
259
|
+
return cv2.cuda.DescriptorMatcher_createFlannBasedMatcher(index_params, search_params) if self.gpu else cv2.FlannBasedMatcher(index_params, search_params)
|
|
260
|
+
|
|
261
|
+
def _create_transformer(self):
|
|
262
|
+
"""
|
|
263
|
+
Create the transformation matrix estimator based on the provided configurations.
|
|
264
|
+
"""
|
|
265
|
+
if self.transformation_type == 'projective':
|
|
266
|
+
self.transformer = cv2.findHomography
|
|
267
|
+
elif self.transformation_type == 'affine':
|
|
268
|
+
self.transformer = cv2.estimateAffinePartial2D
|
|
269
|
+
|
|
270
|
+
def _create_helpers(self):
|
|
271
|
+
"""
|
|
272
|
+
Create helper objects for grayscale conversion, CLAHE, and resizing.
|
|
273
|
+
"""
|
|
274
|
+
self.grayscale_converter = cv2.cuda.cvtColor if self.gpu else cv2.cvtColor
|
|
275
|
+
self.claher = cv2.cuda.createCLAHE(clipLimit=1.0, tileGridSize=(8, 8)) if self.gpu else cv2.createCLAHE(clipLimit=1.0, tileGridSize=(8, 8))
|
|
276
|
+
self.resizer = cv2.cuda.resize if self.gpu else cv2.resize
|
|
277
|
+
|
|
278
|
+
@timer(PROFILING)
|
|
279
|
+
def set_ref_frame(self, frame: np.ndarray, boxes: np.ndarray = None, box_format: str = 'xywh') -> None:
|
|
280
|
+
"""
|
|
281
|
+
Set the reference frame and object bounding boxes.
|
|
282
|
+
Calculate keypoints and descriptors for the reference frame.
|
|
283
|
+
"""
|
|
284
|
+
self.process_frame(frame, boxes, box_format, is_reference=True)
|
|
285
|
+
|
|
286
|
+
@timer(PROFILING)
|
|
287
|
+
def stabilize(self, frame: np.ndarray, boxes: np.ndarray = None, box_format: str = 'xywh') -> None:
|
|
288
|
+
"""
|
|
289
|
+
This method takes an un-stabilized video frame and
|
|
290
|
+
calculates a transformation matrix that can transform
|
|
291
|
+
this frame or boxes to the reference frame coordinates.
|
|
292
|
+
"""
|
|
293
|
+
success = self.process_frame(frame, boxes, box_format, is_reference=False)
|
|
294
|
+
if success:
|
|
295
|
+
matches = self.get_matches(self.ref_desc, self.cur_desc)
|
|
296
|
+
if matches and self.ref_kpts is not None and self.cur_kpts is not None:
|
|
297
|
+
self.ref_pts = np.float32([self.ref_kpts[m.queryIdx].pt for m in matches]).reshape(-1, 2)
|
|
298
|
+
self.cur_pts = np.float32([self.cur_kpts[m.trainIdx].pt for m in matches]).reshape(-1, 2)
|
|
299
|
+
else:
|
|
300
|
+
self.ref_pts = []
|
|
301
|
+
self.cur_pts = []
|
|
302
|
+
self.calculate_transformation_matrix()
|
|
303
|
+
|
|
304
|
+
def process_frame(self, frame: np.ndarray, boxes: np.ndarray = None, box_format: str = 'xywh', is_reference: bool = False) -> bool:
|
|
305
|
+
"""
|
|
306
|
+
Process the given frame and bounding boxes.
|
|
307
|
+
"""
|
|
308
|
+
if frame is None:
|
|
309
|
+
logger.error(f'{"Reference" if is_reference else "Current"} frame is invalid.')
|
|
310
|
+
sys.exit(1)
|
|
311
|
+
|
|
312
|
+
if self.mask_use:
|
|
313
|
+
if boxes is None:
|
|
314
|
+
logger.warning(f'Mask is set to be used, but no bounding boxes were provided for the {"reference" if is_reference else "current"} frame.')
|
|
315
|
+
else:
|
|
316
|
+
boxes = None # if mask is not to be used, ignore the bounding boxes even if provided
|
|
317
|
+
|
|
318
|
+
if is_reference:
|
|
319
|
+
self.h, self.w = frame.shape[:2]
|
|
320
|
+
self.ref_frame, self.ref_boxes, self.ref_box_format = frame, boxes, box_format
|
|
321
|
+
else:
|
|
322
|
+
self.cur_frame, self.cur_boxes, self.cur_box_format = frame, boxes, box_format
|
|
323
|
+
|
|
324
|
+
mask = None if boxes is None else self.create_binary_mask(boxes, box_format)
|
|
325
|
+
kpts, desc, frame_gray = self.get_features_and_descriptors(frame, mask, is_reference)
|
|
326
|
+
|
|
327
|
+
if is_reference:
|
|
328
|
+
self.ref_kpts, self.ref_desc, self.ref_frame_gray = kpts, desc, frame_gray
|
|
329
|
+
else:
|
|
330
|
+
self.cur_kpts, self.cur_desc, self.cur_frame_gray = kpts, desc, frame_gray
|
|
331
|
+
|
|
332
|
+
if self.viz:
|
|
333
|
+
if is_reference:
|
|
334
|
+
self.ref_mask = mask
|
|
335
|
+
self.ref_pts = np.array([ref_kpt.pt for ref_kpt in self.ref_kpts], dtype=np.float32).reshape(-1, 2) if self.ref_kpts is not None else []
|
|
336
|
+
else:
|
|
337
|
+
self.cur_mask = mask
|
|
338
|
+
|
|
339
|
+
return True
|
|
340
|
+
|
|
341
|
+
@timer(PROFILING)
|
|
342
|
+
def get_features_and_descriptors(self, frame: np.ndarray, mask: np.ndarray = None, ref_frame: bool = False) -> tuple:
|
|
343
|
+
"""
|
|
344
|
+
Get the features and descriptors for the given frame.
|
|
345
|
+
"""
|
|
346
|
+
if frame is None:
|
|
347
|
+
return None, None, None
|
|
348
|
+
|
|
349
|
+
if self.gpu:
|
|
350
|
+
frame = cv2.cuda_GpuMat(frame)
|
|
351
|
+
mask = cv2.cuda_GpuMat(mask) if mask is not None else None
|
|
352
|
+
|
|
353
|
+
frame = self.grayscale_converter(frame, cv2.COLOR_BGR2GRAY)
|
|
354
|
+
if self.clahe:
|
|
355
|
+
frame = self.claher.apply(frame)
|
|
356
|
+
|
|
357
|
+
frame_gray = frame if self.viz else None
|
|
358
|
+
|
|
359
|
+
if self.downsample_ratio != 1.0:
|
|
360
|
+
frame = self.resizer(frame, (0, 0), fx=self.downsample_ratio, fy=self.downsample_ratio)
|
|
361
|
+
mask = self.resizer(mask, (0, 0), fx=self.downsample_ratio, fy=self.downsample_ratio) if mask is not None else None
|
|
362
|
+
|
|
363
|
+
try:
|
|
364
|
+
kpts, desc = (self.detector_ref if ref_frame else self.detector_cur).detectAndCompute(frame, mask)
|
|
365
|
+
except cv2.error as e:
|
|
366
|
+
logger.warning(f"Features and descriptors couldn't be found. \n Error: {e}")
|
|
367
|
+
return None, None, None
|
|
368
|
+
|
|
369
|
+
if self.detector_name == 'rsift':
|
|
370
|
+
desc /= (desc.sum(axis=1, keepdims=True) + 1e-8)
|
|
371
|
+
desc = np.sqrt(desc)
|
|
372
|
+
|
|
373
|
+
if self.downsample_ratio != 1.0:
|
|
374
|
+
for kpt in kpts:
|
|
375
|
+
kpt.pt = (kpt.pt[0] / self.downsample_ratio, kpt.pt[1] / self.downsample_ratio)
|
|
376
|
+
|
|
377
|
+
if self.gpu:
|
|
378
|
+
kpts = self.detector_cur.convert(kpts)
|
|
379
|
+
desc = self.detector_cur.convert(desc)
|
|
380
|
+
frame_gray = frame.download(frame_gray)
|
|
381
|
+
|
|
382
|
+
return kpts, desc, frame_gray
|
|
383
|
+
|
|
384
|
+
@timer(PROFILING)
|
|
385
|
+
def get_matches(self, desc1: np.ndarray, desc2: np.ndarray) -> list:
|
|
386
|
+
"""
|
|
387
|
+
Match the given descriptors.
|
|
388
|
+
"""
|
|
389
|
+
if desc1 is None or desc2 is None:
|
|
390
|
+
logger.warning("One of the descriptors is invalid.")
|
|
391
|
+
return []
|
|
392
|
+
|
|
393
|
+
try:
|
|
394
|
+
if self.filter_type == 'none':
|
|
395
|
+
good_matches = self.matcher.match(desc1, desc2, None)
|
|
396
|
+
elif self.filter_type == 'distance':
|
|
397
|
+
matches = self.matcher.match(desc1, desc2, None)
|
|
398
|
+
matches = sorted(matches, key=lambda x: x.distance)
|
|
399
|
+
min_dist, max_dist = matches[0].distance, matches[-1].distance
|
|
400
|
+
good_thresh = min_dist + (max_dist - min_dist) * self.filter_ratio
|
|
401
|
+
good_matches = [m for m in matches if m.distance <= good_thresh]
|
|
402
|
+
elif self.filter_type == 'ratio':
|
|
403
|
+
matches = self.matcher.knnMatch(desc1, desc2, k=2)
|
|
404
|
+
good_matches = []
|
|
405
|
+
for pair in matches:
|
|
406
|
+
if len(pair) == 2:
|
|
407
|
+
m, n = pair
|
|
408
|
+
if m.distance < self.filter_ratio*n.distance:
|
|
409
|
+
good_matches.append(m)
|
|
410
|
+
except cv2.error as e:
|
|
411
|
+
logger.error(f"Matches couldn't be found. \n Error: {e}")
|
|
412
|
+
return []
|
|
413
|
+
|
|
414
|
+
if len(good_matches) <= self.min_good_match_count_warning:
|
|
415
|
+
logger.warning(f'Only {len(good_matches)} good matches were found.')
|
|
416
|
+
|
|
417
|
+
return list(good_matches)
|
|
418
|
+
|
|
419
|
+
@timer(PROFILING)
|
|
420
|
+
def calculate_transformation_matrix(self) -> None:
|
|
421
|
+
"""
|
|
422
|
+
Estimate the transformation matrix using the current and reference points.
|
|
423
|
+
"""
|
|
424
|
+
if self.ref_pts is not None and self.cur_pts is not None and len(self.ref_pts) >= 4 and len(self.cur_pts) >= 4:
|
|
425
|
+
try:
|
|
426
|
+
self.cur_trans_matrix, inliers = self.transformer(self.cur_pts, self.ref_pts, maxIters=self.ransac_max_iter,
|
|
427
|
+
method=self.ransac_method, confidence=self.ransac_confidence, ransacReprojThreshold=self.ransac_epipolar_threshold)
|
|
428
|
+
except cv2.error as e:
|
|
429
|
+
logger.exception(f"Transformation matrix couldn't be calculated.\n Error: {e}")
|
|
430
|
+
self.cur_trans_matrix = np.eye(3) if self.benchmark else self.trans_matrix_last_known
|
|
431
|
+
inliers = np.full((len(self.cur_pts), 1), False, dtype=bool)
|
|
432
|
+
inliers_count = 'N/A'
|
|
433
|
+
if not self.benchmark:
|
|
434
|
+
logger.warning("Re-using the last known transformation matrix.")
|
|
435
|
+
else:
|
|
436
|
+
if self.cur_trans_matrix is not None:
|
|
437
|
+
self.trans_matrix_last_known = self.cur_trans_matrix
|
|
438
|
+
inliers_count = sum(inliers.ravel().tolist())
|
|
439
|
+
if inliers_count <= self.min_inliers_match_count_warning:
|
|
440
|
+
logger.warning(f'Only {inliers_count} inliers points were used to estimate the transformation matrix.')
|
|
441
|
+
else:
|
|
442
|
+
logger.warning('Transformation matrix is None.')
|
|
443
|
+
self.cur_trans_matrix = np.eye(3) if self.benchmark else self.trans_matrix_last_known
|
|
444
|
+
inliers = np.full((len(self.cur_pts), 1), False, dtype=bool)
|
|
445
|
+
inliers_count = 'N/A'
|
|
446
|
+
if not self.benchmark:
|
|
447
|
+
logger.warning("Re-using the last known transformation matrix.")
|
|
448
|
+
else:
|
|
449
|
+
logger.warning('Not enough points to estimate the transformation matrix.')
|
|
450
|
+
self.cur_trans_matrix = np.eye(3) if self.benchmark else self.trans_matrix_last_known
|
|
451
|
+
if not self.benchmark:
|
|
452
|
+
logger.warning("Re-using the last known transformation matrix.")
|
|
453
|
+
inliers = np.full((len(self.cur_pts), 1), False, dtype=bool)
|
|
454
|
+
inliers_count = 'N/A'
|
|
455
|
+
|
|
456
|
+
if self.viz:
|
|
457
|
+
self.cur_inliers = inliers
|
|
458
|
+
self.cur_inliers_count = inliers_count
|
|
459
|
+
|
|
460
|
+
@timer(PROFILING)
|
|
461
|
+
def create_binary_mask(self, boxes: np.ndarray, box_format: str) -> np.ndarray:
|
|
462
|
+
"""
|
|
463
|
+
Create a mask from the given bounding boxes.
|
|
464
|
+
"""
|
|
465
|
+
if self.h is None or self.w is None:
|
|
466
|
+
logger.error("Reference frame is not set.")
|
|
467
|
+
sys.exit(1)
|
|
468
|
+
|
|
469
|
+
if box_format == 'four':
|
|
470
|
+
boxes = four2xywh(boxes)
|
|
471
|
+
|
|
472
|
+
mask = np.full((self.h, self.w), 255, dtype=np.uint8)
|
|
473
|
+
for box in boxes:
|
|
474
|
+
xc, yc, wb, hb = box
|
|
475
|
+
wb += wb * self.mask_margin_ratio
|
|
476
|
+
hb += hb * self.mask_margin_ratio
|
|
477
|
+
x1, y1, x2, y2 = int(xc - wb / 2), int(yc - hb / 2), int(xc + wb / 2), int(yc + hb / 2)
|
|
478
|
+
mask[max(0, y1):min(self.h, y2), max(0, x1):min(self.w, x2)] = 0
|
|
479
|
+
|
|
480
|
+
return mask
|
|
481
|
+
|
|
482
|
+
@timer(PROFILING)
|
|
483
|
+
def warp_cur_frame(self) -> Union[np.ndarray, None]:
|
|
484
|
+
"""
|
|
485
|
+
Warp the current frame to the reference frame using the current transformation matrix.
|
|
486
|
+
"""
|
|
487
|
+
return self.warp_frame(self.cur_frame)
|
|
488
|
+
|
|
489
|
+
def warp_frame(self, frame: np.ndarray) -> Union[np.ndarray, None]:
|
|
490
|
+
"""
|
|
491
|
+
Warp the given frame to the reference frame using the current transformation matrix.
|
|
492
|
+
"""
|
|
493
|
+
if frame is None:
|
|
494
|
+
return None
|
|
495
|
+
if self.w is None or self.h is None:
|
|
496
|
+
logger.error("Reference frame is not set.")
|
|
497
|
+
sys.exit(1)
|
|
498
|
+
if self.cur_trans_matrix is None:
|
|
499
|
+
logger.warning("Transformation matrix is None.")
|
|
500
|
+
return frame
|
|
501
|
+
if self.transformation_type == 'projective':
|
|
502
|
+
return cv2.warpPerspective(frame, self.cur_trans_matrix, (self.w, self.h))
|
|
503
|
+
elif self.transformation_type == 'affine':
|
|
504
|
+
return cv2.warpAffine(frame, self.cur_trans_matrix, (self.w, self.h))
|
|
505
|
+
|
|
506
|
+
def transform_cur_boxes(self, out_box_format: str = 'xywh') -> Union[np.ndarray, None]:
|
|
507
|
+
"""
|
|
508
|
+
Warp the current bounding boxes to the reference frame using the current transformation matrix.
|
|
509
|
+
"""
|
|
510
|
+
return self.transform_boxes(self.cur_boxes, self.cur_trans_matrix, self.cur_box_format, out_box_format)
|
|
511
|
+
|
|
512
|
+
def transform_boxes(self, boxes: np.ndarray, trans_matrix: np.ndarray, in_box_format: str = 'xywh', out_box_format: str = 'xywh') -> Union[np.ndarray, None]:
|
|
513
|
+
"""
|
|
514
|
+
Transform the provided bounding boxes using the provided transformation matrix.
|
|
515
|
+
"""
|
|
516
|
+
if boxes is None:
|
|
517
|
+
return None
|
|
518
|
+
if trans_matrix is None:
|
|
519
|
+
return boxes
|
|
520
|
+
|
|
521
|
+
if in_box_format == 'xywh':
|
|
522
|
+
boxes = xywh2four(boxes)
|
|
523
|
+
|
|
524
|
+
boxes = np.array([boxes]).reshape(-1, 1, 2)
|
|
525
|
+
if self.transformation_type == 'projective':
|
|
526
|
+
boxes = cv2.perspectiveTransform(boxes, trans_matrix)
|
|
527
|
+
elif self.transformation_type == 'affine':
|
|
528
|
+
boxes = cv2.transform(boxes, trans_matrix)
|
|
529
|
+
|
|
530
|
+
if out_box_format == 'xywh':
|
|
531
|
+
return four2xywh(boxes.reshape(-1, 8))
|
|
532
|
+
elif out_box_format == 'four':
|
|
533
|
+
return boxes.reshape(-1, 8)
|
|
534
|
+
|
|
535
|
+
def get_cur_frame(self) -> Union[np.ndarray, None]:
|
|
536
|
+
"""
|
|
537
|
+
Get the current frame.
|
|
538
|
+
"""
|
|
539
|
+
return self.cur_frame
|
|
540
|
+
|
|
541
|
+
def get_cur_boxes(self) -> Union[np.ndarray, None]:
|
|
542
|
+
"""
|
|
543
|
+
Get the current bounding boxes.
|
|
544
|
+
"""
|
|
545
|
+
return self.cur_boxes
|
|
546
|
+
|
|
547
|
+
def get_cur_trans_matrix(self) -> Union[np.ndarray, None]:
|
|
548
|
+
"""
|
|
549
|
+
Get the current transformation matrix.
|
|
550
|
+
"""
|
|
551
|
+
return self.cur_trans_matrix
|
|
552
|
+
|
|
553
|
+
def get_basic_info(self) -> dict:
|
|
554
|
+
"""
|
|
555
|
+
Get basic information about the Stabilizer.
|
|
556
|
+
"""
|
|
557
|
+
return {
|
|
558
|
+
'detector_name': self.detector_name,
|
|
559
|
+
'matcher_name': self.matcher_name,
|
|
560
|
+
'filter_type': self.filter_type,
|
|
561
|
+
'transformation_type': self.transformation_type,
|
|
562
|
+
'clahe': self.clahe,
|
|
563
|
+
'mask_use': self.mask_use,
|
|
564
|
+
}
|
|
565
|
+
|
|
566
|
+
def _validate_arguments(self):
|
|
567
|
+
"""
|
|
568
|
+
Validate the arguments provided during the initialization of the Stabilizer class.
|
|
569
|
+
"""
|
|
570
|
+
if self.detector_name not in self.VALID_DETECTORS:
|
|
571
|
+
raise ValueError(f"Invalid detector: {self.detector_name}. Choose from {self.VALID_DETECTORS}")
|
|
572
|
+
if self.matcher_name not in self.VALID_MATCHERS:
|
|
573
|
+
raise ValueError(f"Invalid matcher: {self.matcher_name}. Choose from {self.VALID_MATCHERS}")
|
|
574
|
+
if self.filter_type not in self.VALID_FILTER_TYPES:
|
|
575
|
+
raise ValueError(f"Invalid filter type: {self.filter_type}. Choose from {self.VALID_FILTER_TYPES}")
|
|
576
|
+
if self.transformation_type not in self.VALID_TRANSFORMATION_TYPES:
|
|
577
|
+
raise ValueError(f"Invalid transformation type: {self.transformation_type}. Choose from {self.VALID_TRANSFORMATION_TYPES}")
|
|
578
|
+
if self.ransac_method not in self.VALID_RANSAC_METHODS_DICT.values():
|
|
579
|
+
raise ValueError(f"Invalid RANSAC method: {self.ransac_method}. Choose from {self.VALID_RANSAC_METHODS_DICT.keys()}")
|
|
580
|
+
if not (0.0 < self.downsample_ratio <= 1.0):
|
|
581
|
+
raise ValueError("Invalid downsample_ratio. It should be in the range (0.0, 1.0]")
|
|
582
|
+
if not (0 < self.max_features) and isinstance(self.max_features, int):
|
|
583
|
+
raise ValueError("Invalid max_features. It should be greater than 0 and an integer")
|
|
584
|
+
if not (1 <= self.ref_multiplier):
|
|
585
|
+
raise ValueError("Invalid ref_multiplier. It should be greater than or equal to 1")
|
|
586
|
+
if not (0.0 < self.filter_ratio <= 1.0):
|
|
587
|
+
raise ValueError("Invalid filter_ratio. It should be in the range (0.0, 1.0]")
|
|
588
|
+
if not (0 < self.ransac_max_iter) and isinstance(self.ransac_max_iter, int):
|
|
589
|
+
raise ValueError("Invalid ransac_max_iter. It should be greater than 0 and an integer")
|
|
590
|
+
if not (0.0 < self.ransac_epipolar_threshold):
|
|
591
|
+
raise ValueError("Invalid ransac_epipolar_threshold. It should be greater than 0")
|
|
592
|
+
if not (0.0 < self.ransac_confidence <= 1.0):
|
|
593
|
+
raise ValueError("Invalid ransac_confidence. It should be in the range (0.0, 1.0]")
|
|
594
|
+
if self.gpu and not cv2.cuda.getCudaEnabledDeviceCount():
|
|
595
|
+
raise ValueError("GPU is enabled but no CUDA-enabled device was found")
|
stabilo/utils.py
ADDED
|
@@ -0,0 +1,112 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
# Author: Robert Fonod (robert.fonod@ieee.org)
|
|
3
|
+
|
|
4
|
+
import logging
|
|
5
|
+
import sys
|
|
6
|
+
import time
|
|
7
|
+
|
|
8
|
+
import cv2
|
|
9
|
+
import numpy as np
|
|
10
|
+
import yaml
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def setup_logger(name: str, log_file: str = None, level: int = logging.INFO) -> logging.Logger:
|
|
14
|
+
"""
|
|
15
|
+
Setup the logger
|
|
16
|
+
"""
|
|
17
|
+
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
|
|
18
|
+
logger = logging.getLogger(name)
|
|
19
|
+
logger.setLevel(level)
|
|
20
|
+
|
|
21
|
+
if log_file:
|
|
22
|
+
file_handler = logging.FileHandler(log_file)
|
|
23
|
+
file_handler.setFormatter(formatter)
|
|
24
|
+
logger.addHandler(file_handler)
|
|
25
|
+
|
|
26
|
+
console_handler = logging.StreamHandler()
|
|
27
|
+
console_handler.setFormatter(formatter)
|
|
28
|
+
logger.addHandler(console_handler)
|
|
29
|
+
|
|
30
|
+
return logger
|
|
31
|
+
|
|
32
|
+
def timer(profiling: bool = False):
|
|
33
|
+
"""
|
|
34
|
+
Decorator function to measure the execution time of a function.
|
|
35
|
+
"""
|
|
36
|
+
def decorator(func):
|
|
37
|
+
def wrapper(*args, **kwargs):
|
|
38
|
+
if not profiling:
|
|
39
|
+
return func(*args, **kwargs)
|
|
40
|
+
start_time = time.time()
|
|
41
|
+
result = func(*args, **kwargs)
|
|
42
|
+
print(f"{func.__name__:<35} execution time: {1000*(time.time() - start_time):>10.2f} ms")
|
|
43
|
+
return result
|
|
44
|
+
return wrapper
|
|
45
|
+
return decorator
|
|
46
|
+
|
|
47
|
+
def load_config(cfg_filepath: str, logger: logging.Logger = None) -> dict:
|
|
48
|
+
"""
|
|
49
|
+
Load the configuration file
|
|
50
|
+
"""
|
|
51
|
+
try:
|
|
52
|
+
with open(cfg_filepath, 'r') as f:
|
|
53
|
+
config = yaml.safe_load(f)
|
|
54
|
+
except FileNotFoundError:
|
|
55
|
+
if logger is not None:
|
|
56
|
+
logger.error(f"Configuration file {cfg_filepath} not found.")
|
|
57
|
+
sys.exit(1)
|
|
58
|
+
return config
|
|
59
|
+
|
|
60
|
+
def xywh2four(boxes: np.ndarray) -> np.ndarray:
|
|
61
|
+
"""
|
|
62
|
+
Convert bounding boxes from [xc, yc, w, h] to four point format [x1, y1, x2, y2, x3, y3, x4, y4].
|
|
63
|
+
"""
|
|
64
|
+
x_c, y_c, w, h = boxes.T
|
|
65
|
+
|
|
66
|
+
x1 = x_c - 0.5 * w
|
|
67
|
+
y1 = y_c - 0.5 * h
|
|
68
|
+
x2 = x_c + 0.5 * w
|
|
69
|
+
y2 = y_c - 0.5 * h
|
|
70
|
+
x3 = x_c + 0.5 * w
|
|
71
|
+
y3 = y_c + 0.5 * h
|
|
72
|
+
x4 = x_c - 0.5 * w
|
|
73
|
+
y4 = y_c + 0.5 * h
|
|
74
|
+
|
|
75
|
+
return np.column_stack((x1, y1, x2, y2, x3, y3, x4, y4))
|
|
76
|
+
|
|
77
|
+
def four2xywh(boxes: np.ndarray) -> np.ndarray:
|
|
78
|
+
"""
|
|
79
|
+
Convert bounding boxes from any four-point format [x1, y1, x2, y2, x3, y3, x4, y4]
|
|
80
|
+
to YOLO format [xc, yc, w, h], robust to any point order and rotation.
|
|
81
|
+
"""
|
|
82
|
+
points = boxes.reshape(-1, 4, 2)
|
|
83
|
+
|
|
84
|
+
x_min = np.min(points[:, :, 0], axis=1)
|
|
85
|
+
x_max = np.max(points[:, :, 0], axis=1)
|
|
86
|
+
y_min = np.min(points[:, :, 1], axis=1)
|
|
87
|
+
y_max = np.max(points[:, :, 1], axis=1)
|
|
88
|
+
|
|
89
|
+
x_c = (x_min + x_max) / 2
|
|
90
|
+
y_c = (y_min + y_max) / 2
|
|
91
|
+
|
|
92
|
+
w = x_max - x_min
|
|
93
|
+
h = y_max - y_min
|
|
94
|
+
|
|
95
|
+
return np.column_stack((x_c, y_c, w, h))
|
|
96
|
+
|
|
97
|
+
def detect_delimiter(filepath: str, lines_to_check: int = 5) -> str:
|
|
98
|
+
"""
|
|
99
|
+
Detect the delimiter of a CSV file by reading a few lines
|
|
100
|
+
"""
|
|
101
|
+
delimiters = {',': 0, ' ': 0, '\t': 0}
|
|
102
|
+
with open(filepath, 'r') as file:
|
|
103
|
+
for _ in range(lines_to_check):
|
|
104
|
+
line = file.readline()
|
|
105
|
+
if not line:
|
|
106
|
+
break
|
|
107
|
+
delimiters[','] += line.count(',')
|
|
108
|
+
delimiters[' '] += line.count(' ')
|
|
109
|
+
delimiters['\t'] += line.count('\t')
|
|
110
|
+
|
|
111
|
+
return max(delimiters, key=delimiters.get)
|
|
112
|
+
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2024 Robert Fonod
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,209 @@
|
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
|
+
Name: stabilo
|
|
3
|
+
Version: 1.0.0
|
|
4
|
+
Summary: Stabilizes video or extracted trajectories with respect to a selected reference frame in the video, with optional user-provided masks.
|
|
5
|
+
Author-email: Robert Fonod <robert.fonod@ieee.org>
|
|
6
|
+
License: MIT License
|
|
7
|
+
|
|
8
|
+
Copyright (c) 2024 Robert Fonod
|
|
9
|
+
|
|
10
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
11
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
12
|
+
in the Software without restriction, including without limitation the rights
|
|
13
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
14
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
15
|
+
furnished to do so, subject to the following conditions:
|
|
16
|
+
|
|
17
|
+
The above copyright notice and this permission notice shall be included in all
|
|
18
|
+
copies or substantial portions of the Software.
|
|
19
|
+
|
|
20
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
21
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
22
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
23
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
24
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
25
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
26
|
+
SOFTWARE.
|
|
27
|
+
Project-URL: Homepage, https://github.com/rfonod/stabilo/
|
|
28
|
+
Project-URL: Repository, https://github.com/rfonod/stabilo/
|
|
29
|
+
Project-URL: Changelog, https://github.com/rfonod/stabilo/releases
|
|
30
|
+
Project-URL: Issues, https://github.com/rfonod/stabilo/issues/
|
|
31
|
+
Keywords: stabilo,video-stabilization,object-stabilization,mask,reference-frame,computer-vision
|
|
32
|
+
Classifier: Development Status :: 5 - Production/Stable
|
|
33
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
34
|
+
Classifier: Operating System :: OS Independent
|
|
35
|
+
Classifier: Programming Language :: Python :: 3
|
|
36
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
37
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
38
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
39
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
40
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
41
|
+
Classifier: Intended Audience :: Developers
|
|
42
|
+
Classifier: Intended Audience :: Science/Research
|
|
43
|
+
Classifier: Topic :: Scientific/Engineering
|
|
44
|
+
Classifier: Topic :: Software Development
|
|
45
|
+
Classifier: Topic :: Multimedia :: Video
|
|
46
|
+
Requires-Python: >=3.9
|
|
47
|
+
Description-Content-Type: text/markdown
|
|
48
|
+
License-File: LICENSE
|
|
49
|
+
Requires-Dist: numpy<2.0,>=1.26.4
|
|
50
|
+
Requires-Dist: opencv-python>=4.6.0
|
|
51
|
+
Requires-Dist: pyyaml>=5.3.1
|
|
52
|
+
Requires-Dist: tqdm>=4.64.0
|
|
53
|
+
Provides-Extra: dev
|
|
54
|
+
Requires-Dist: pytest>=7.0.0; extra == "dev"
|
|
55
|
+
Requires-Dist: matplotlib>=3.5.0; extra == "dev"
|
|
56
|
+
Provides-Extra: extras
|
|
57
|
+
Requires-Dist: matplotlib>=3.5.0; extra == "extras"
|
|
58
|
+
|
|
59
|
+
# Stabilo
|
|
60
|
+
|
|
61
|
+
[](https://pypi.org/project/stabilo/) [](https://github.com/rfonod/stabilo/releases) [](https://github.com/rfonod/stabilo/blob/main/LICENSE) [](https://zenodo.org/doi/10.5281/zenodo.12117092) [](https://pypistats.org/packages/stabilo) [](https://github.com/rfonod/stabilo)
|
|
62
|
+
|
|
63
|
+
**Stabilo** is a specialized Python package for stabilizing video frames or tracked object trajectories in videos, using robust homography or affine transformations. Its core functionality focuses on aligning each frame or object track to a chosen reference frame, enabling precise stabilization that mitigates disturbances like camera movements. Key features include robust keypoint-based image registration and the option to integrate user-defined masks, which exclude dynamic regions (e.g., moving objects) to enhance stabilization accuracy. Integrating seamlessly with object detection and tracking algorithms, Stabilo is ideal for high-precision applications like urban traffic monitoring, as demonstrated in the [geo-trax](https://github.com/rfonod/geo-trax) 🚀 trajectory extraction framework. Extensive transformation and enhancement options, including multiple feature detectors and matchers, masking techniques, further expand its utility. The repository also includes valuable resources like utility scripts and example videos to demonstrate its capabilities.
|
|
64
|
+
|
|
65
|
+

|
|
66
|
+
|
|
67
|
+
## Features
|
|
68
|
+
|
|
69
|
+
- **Video Stabilization**: Align (warp) all video frames to a custom (anchor) reference frame using homography or affine transformations.
|
|
70
|
+
- **Trajectory Stabilization**: Transform object trajectories (e.g., bounding boxes) to a common fixed reference frame using homography or affine transformations.
|
|
71
|
+
- **User-Defined Masks**: Allow users to specify custom masks to exclude regions of interest during stabilization.
|
|
72
|
+
- **Wide Range of Algorithms**: Includes support for various feature detectors (ORB, (R)SIFT, BRISK, (A)KAZE), matchers (BF, FLANN), RANSAC algorithms (MAGSAC++, DEGENSAC, ...), transformation types, and pre-processing options.
|
|
73
|
+
- **Customizable Parameters**: Fine-tune the stabilization by adjusting parameters such as the number of keypoints, RANSAC parameters, matching thresholds, downsampling factors, etc..
|
|
74
|
+
- **Visualization Tools**: Generate visualizations of the stabilization process, with frame-by-frame comparisons and trajectory transformations (see the above animation).
|
|
75
|
+
- **Threshold Analysis**: Analyze the relationship between detection thresholds and keypoint counts for BRISK, KAZE, and AKAZE to fairly benchmark with different detectors.
|
|
76
|
+
- **Benchmarking Campaigns**: Use [stabilo-optimize](https://github.com/rfonod/stabilo-optimize) 🎯 to establish benchmarking campaigns to optimize algorithm and hyperparameter selection for specific applications.
|
|
77
|
+
|
|
78
|
+
<details>
|
|
79
|
+
<summary><b>🚀 Planned Enhancements</b></summary>
|
|
80
|
+
|
|
81
|
+
- **Unit Tests**: Comprehensive unit test suite to ensure package stability and reliability.
|
|
82
|
+
- **Different Mask Types**: Inclusion of additional mask types (e.g., polygonal, circular) for enhanced precision in stabilization.
|
|
83
|
+
- **GPU Acceleration**: Integration of GPU acceleration to improve processing speed.
|
|
84
|
+
- **Documentation**: Detailed documentation covering the package’s functionality and usage.
|
|
85
|
+
|
|
86
|
+
</details>
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
## Installation
|
|
90
|
+
|
|
91
|
+
First, create a **Python Virtual Environment** (Python >= 3.9) using e.g., [Miniconda3](https://docs.anaconda.com/free/miniconda/):
|
|
92
|
+
```bash
|
|
93
|
+
conda create -n stabilo python=3.9 -y
|
|
94
|
+
conda activate stabilo
|
|
95
|
+
```
|
|
96
|
+
|
|
97
|
+
Then, install the stabilo library using one of the following options:
|
|
98
|
+
|
|
99
|
+
### Option 1: Install from PyPI
|
|
100
|
+
You can install the package from PyPI using pip:
|
|
101
|
+
```sh
|
|
102
|
+
pip install stabilo
|
|
103
|
+
```
|
|
104
|
+
|
|
105
|
+
### Option 2: Install from Source
|
|
106
|
+
You can install the package directly from the repository:
|
|
107
|
+
```sh
|
|
108
|
+
pip install git+https://github.com/rfonod/stabilo.git
|
|
109
|
+
```
|
|
110
|
+
|
|
111
|
+
### Option 3: Install from Local Source
|
|
112
|
+
|
|
113
|
+
You can also clone the repository and install the package from the local source:
|
|
114
|
+
|
|
115
|
+
```sh
|
|
116
|
+
git clone https://github.com/rfonod/stabilo.git
|
|
117
|
+
cd stabilo
|
|
118
|
+
pip install .
|
|
119
|
+
```
|
|
120
|
+
|
|
121
|
+
If you want the changes you make in the repo to be reflected in your install, use `pip install -e .` instead of `pip install .`.
|
|
122
|
+
|
|
123
|
+
## Example Usage
|
|
124
|
+
|
|
125
|
+
```python
|
|
126
|
+
from stabilo import Stabilizer
|
|
127
|
+
|
|
128
|
+
# Create an instance of the Stabilizer class with default parameters
|
|
129
|
+
stabilizer = Stabilizer()
|
|
130
|
+
|
|
131
|
+
# Set a reference frame with (optional) mask
|
|
132
|
+
stabilizer.set_ref_frame(ref_frame, ref_mask)
|
|
133
|
+
|
|
134
|
+
# Stabilize any frame with (optional) mask
|
|
135
|
+
stabilizer.stabilize(cur_frame, cur_mask)
|
|
136
|
+
|
|
137
|
+
# Get the stabilized (warped) frame
|
|
138
|
+
stabilized_frame = stabilizer.warp_cur_frame()
|
|
139
|
+
|
|
140
|
+
# Transform current masks (bounding boxes) if it was provided
|
|
141
|
+
stabilized_boxes = stabilizer.transform_cur_boxes()
|
|
142
|
+
|
|
143
|
+
# Transform any point (pixel coordinates) from the current frame to reference frame
|
|
144
|
+
cur_point = np.array([x, y, 1])
|
|
145
|
+
ref_point = stabilizer.get_cur_trans_matrix() @ cur_point
|
|
146
|
+
```
|
|
147
|
+
|
|
148
|
+
## Utility Scripts
|
|
149
|
+
|
|
150
|
+
Utility scripts are provided to demonstrate the functionality of the Stabilo package. These scripts can be found in the `scripts` directory.
|
|
151
|
+
|
|
152
|
+
#### Stabilization Examples
|
|
153
|
+
|
|
154
|
+
- `stabilize_video.py`: Implements video stabilization relative to a reference frame.
|
|
155
|
+
- `stabilize_boxes.py`: Implements object trajectory stabilization relative to a reference frame.
|
|
156
|
+
|
|
157
|
+
#### Threshold Analysis
|
|
158
|
+
|
|
159
|
+
- `find_threshold_models.py`: Computes regression models between detection thresholds and average keypoint counts for BRISK, KAZE, and AKAZE feature detectors.
|
|
160
|
+
|
|
161
|
+
## Citing This Work
|
|
162
|
+
|
|
163
|
+
If you use this project in your academic research, commercial products, or any published material, please acknowledge its use by citing it.
|
|
164
|
+
|
|
165
|
+
1. **Preferred Citation:** For research-related references, please cite the related paper once it is formally published. A preprint is currently available on [arXiv](https://arxiv.org/abs/2411.02136):
|
|
166
|
+
|
|
167
|
+
```bibtex
|
|
168
|
+
@misc{fonod2024advanced,
|
|
169
|
+
title={Advanced computer vision for extracting georeferenced vehicle trajectories from drone imagery},
|
|
170
|
+
author={Robert Fonod and Haechan Cho and Hwasoo Yeo and Nikolas Geroliminis},
|
|
171
|
+
year={2024},
|
|
172
|
+
eprint={2411.02136},
|
|
173
|
+
archivePrefix={arXiv},
|
|
174
|
+
primaryClass={cs.CV},
|
|
175
|
+
url={https://arxiv.org/abs/2411.02136},
|
|
176
|
+
doi={https://doi.org/10.48550/arXiv.2411.02136}
|
|
177
|
+
}
|
|
178
|
+
```
|
|
179
|
+
|
|
180
|
+
2. **Repository Citation:** For direct use of the stabilo repository, please cite the software release version on Zenodo. You may refer to the DOI badge above for the correct version or use the BibTeX below:
|
|
181
|
+
|
|
182
|
+
```bibtex
|
|
183
|
+
@software{fonod2024stabilo,
|
|
184
|
+
author = {Fonod, Robert},
|
|
185
|
+
license = {MIT},
|
|
186
|
+
month = nov,
|
|
187
|
+
title = {Stabilo: A Comprehensive Python Library for Video and Trajectory Stabilization with User-Defined Masks},
|
|
188
|
+
url = {https://github.com/rfonod/stabilo},
|
|
189
|
+
doi = {10.5281/zenodo.12117092},
|
|
190
|
+
version = {1.0.0},
|
|
191
|
+
year = {2024}
|
|
192
|
+
}
|
|
193
|
+
```
|
|
194
|
+
|
|
195
|
+
To ensure accurate and consistent citations, a CITATION.cff file is included in this repository. GitHub automatically generates a formatted citation from this file, accessible in the "Cite this repository" option at the top right of this page.
|
|
196
|
+
|
|
197
|
+
Please select the correct citation based on your use:
|
|
198
|
+
- **Methodology:** For referencing the research framework and methodology, cite the journal paper (or preprint if unpublished).
|
|
199
|
+
- **Repository:** For direct use of the code, cite the Zenodo release of this GitHub repository.
|
|
200
|
+
|
|
201
|
+
|
|
202
|
+
## Contributing
|
|
203
|
+
|
|
204
|
+
Contributions are welcome! If you encounter any issues or have suggestions for improvements, please open a [GitHub Issue](https://github.com/rfonod/stabilo/issues) or submit a pull request. Your contributions are greatly appreciated!
|
|
205
|
+
|
|
206
|
+
|
|
207
|
+
## License
|
|
208
|
+
|
|
209
|
+
This project is licensed under the MIT License, an [OSI-approved](https://opensource.org/licenses/MIT) open-source license, which allows for both academic and commercial use. By citing this project, you help support its development and acknowledge the effort that went into creating it. For more details, see the [LICENSE](LICENSE) file. Thank you!
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
stabilo/__init__.py,sha256=QwOwL8BYKjKTEPpYw0V2qed_tl-zrk6_5QjZScLVJaU,95
|
|
2
|
+
stabilo/stabilo.py,sha256=1GDR0f4Eg1V5r52pnnkX6VBgHBB7zZDscqL65Egs70s,28322
|
|
3
|
+
stabilo/utils.py,sha256=D2QAQJXAMsYUhPhJW_6FLQKlMIh7I_2WRsripkPmR1I,3225
|
|
4
|
+
stabilo/cfg/default.yaml,sha256=kwMfJlbG7NOa8VzLVnqDI7I-_ARpI8-urjrSoWCANtY,2957
|
|
5
|
+
stabilo/thresholds/models/AKAZE/model_mask_False_clahe_False.txt,sha256=iE4OMFGbQYtwh8I26gfg5WaH_7MovCoFMXJ9Jdlz21o,51
|
|
6
|
+
stabilo/thresholds/models/AKAZE/model_mask_False_clahe_True.txt,sha256=nrfKupwWyJZ9kd9A9qBXYBT6zCTSuCdxubypnn4YJuY,51
|
|
7
|
+
stabilo/thresholds/models/AKAZE/model_mask_True_clahe_False.txt,sha256=UBHbWksqT0eiU8DBueBzC2-SPXIA6LFK3m2ZVo1KgJk,51
|
|
8
|
+
stabilo/thresholds/models/AKAZE/model_mask_True_clahe_True.txt,sha256=gfHWItznmy-BeW2Sq4_NwdVSeKX2DOipv9jnacHLoUQ,51
|
|
9
|
+
stabilo/thresholds/models/BRISK/model_mask_False_clahe_False.txt,sha256=eVkEknyAsg5rl1ixjc8Rldy-iypwGrOfG-Uz_jtFqNQ,51
|
|
10
|
+
stabilo/thresholds/models/BRISK/model_mask_False_clahe_True.txt,sha256=Ng3wCMuu8dqkWSsdJoxXUxYREB61_AqSMwmUu_BN1n0,51
|
|
11
|
+
stabilo/thresholds/models/BRISK/model_mask_True_clahe_False.txt,sha256=FxlSc9bS-uxs-Yccek7oV12xBD8Aj7D4Ua7UhDxARBo,51
|
|
12
|
+
stabilo/thresholds/models/BRISK/model_mask_True_clahe_True.txt,sha256=tgT9tTPquwxNU4FPHYT33J8cELwvKjRHAw5-g3RJJ7c,51
|
|
13
|
+
stabilo/thresholds/models/KAZE/model_mask_False_clahe_False.txt,sha256=ZgYIqNvci0jQyp_nW_BAhmDyfmc9v9hlyCOMiKu0pX8,51
|
|
14
|
+
stabilo/thresholds/models/KAZE/model_mask_False_clahe_True.txt,sha256=BKIT1m2Vh9tXEIYi0e1tULqWKxgO9q0iRAnFmfZ5ufw,51
|
|
15
|
+
stabilo/thresholds/models/KAZE/model_mask_True_clahe_False.txt,sha256=2lxN-bDBMbPl7u-IifBp7KU0tzi_DldagVxk9cdG4AI,51
|
|
16
|
+
stabilo/thresholds/models/KAZE/model_mask_True_clahe_True.txt,sha256=-XZnLgHImN70jNCYAVE-ZrSrw6UJhFP9IwaNxBy31gA,51
|
|
17
|
+
stabilo-1.0.0.dist-info/LICENSE,sha256=3dtRxi571l0chCMw2dqHmD2y1teMiqT1wkqs74lotFA,1068
|
|
18
|
+
stabilo-1.0.0.dist-info/METADATA,sha256=bZ5Wh_JCoXgJpiriOvw2pgZhBmaZTJ0CTLRINg07HAI,11306
|
|
19
|
+
stabilo-1.0.0.dist-info/WHEEL,sha256=R06PA3UVYHThwHvxuRWMqaGcr-PuniXahwjmQRFMEkY,91
|
|
20
|
+
stabilo-1.0.0.dist-info/top_level.txt,sha256=86H6WrIsGB1DZALM4Ih3wlx3yTw1w19ZOmrlSxe1dEU,8
|
|
21
|
+
stabilo-1.0.0.dist-info/RECORD,,
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
stabilo
|