supervisely 6.73.427__py3-none-any.whl → 6.73.429__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- supervisely/app/widgets/__init__.py +2 -0
- supervisely/app/widgets/input_tag/input_tag.py +102 -15
- supervisely/app/widgets/input_tag_list/__init__.py +0 -0
- supervisely/app/widgets/input_tag_list/input_tag_list.py +274 -0
- supervisely/app/widgets/input_tag_list/template.html +70 -0
- supervisely/convert/pointcloud/nuscenes_conv/nuscenes_converter.py +16 -25
- supervisely/convert/pointcloud_episodes/nuscenes_conv/nuscenes_converter.py +17 -29
- supervisely/convert/pointcloud_episodes/nuscenes_conv/nuscenes_helper.py +143 -104
- supervisely/nn/tracker/__init__.py +5 -6
- supervisely/nn/tracker/botsort/tracker/mc_bot_sort.py +1 -1
- supervisely/nn/tracker/botsort_tracker.py +9 -2
- supervisely/nn/tracker/calculate_metrics.py +264 -0
- supervisely/nn/tracker/utils.py +274 -0
- supervisely/nn/tracker/visualize.py +519 -0
- supervisely/template/experiment/experiment.html.jinja +26 -33
- {supervisely-6.73.427.dist-info → supervisely-6.73.429.dist-info}/METADATA +1 -1
- {supervisely-6.73.427.dist-info → supervisely-6.73.429.dist-info}/RECORD +21 -15
- {supervisely-6.73.427.dist-info → supervisely-6.73.429.dist-info}/LICENSE +0 -0
- {supervisely-6.73.427.dist-info → supervisely-6.73.429.dist-info}/WHEEL +0 -0
- {supervisely-6.73.427.dist-info → supervisely-6.73.429.dist-info}/entry_points.txt +0 -0
- {supervisely-6.73.427.dist-info → supervisely-6.73.429.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,519 @@
|
|
|
1
|
+
from typing import Union, Dict, List, Tuple, Iterator, Optional
|
|
2
|
+
import numpy as np
|
|
3
|
+
import cv2
|
|
4
|
+
import ffmpeg
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from collections import defaultdict
|
|
7
|
+
from dataclasses import dataclass
|
|
8
|
+
import tempfile
|
|
9
|
+
import shutil
|
|
10
|
+
|
|
11
|
+
import supervisely as sly
|
|
12
|
+
from supervisely import logger
|
|
13
|
+
from supervisely.nn.model.prediction import Prediction
|
|
14
|
+
from supervisely import VideoAnnotation
|
|
15
|
+
from supervisely.nn.tracker.utils import predictions_to_video_annotation
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class TrackingVisualizer:
|
|
19
|
+
def __init__(
|
|
20
|
+
self,
|
|
21
|
+
show_labels: bool = True,
|
|
22
|
+
show_classes: bool = True,
|
|
23
|
+
show_trajectories: bool = True,
|
|
24
|
+
show_frame_number: bool = False,
|
|
25
|
+
box_thickness: int = 2,
|
|
26
|
+
text_scale: float = 0.6,
|
|
27
|
+
text_thickness: int = 2,
|
|
28
|
+
trajectory_length: int = 30,
|
|
29
|
+
codec: str = "mp4",
|
|
30
|
+
output_fps: float = 30.0,
|
|
31
|
+
colorize_tracks: bool = True,
|
|
32
|
+
|
|
33
|
+
):
|
|
34
|
+
"""
|
|
35
|
+
Initialize the visualizer with configuration.
|
|
36
|
+
|
|
37
|
+
Args:
|
|
38
|
+
show_labels: Whether to show track IDs.
|
|
39
|
+
show_classes: Whether to show class names.
|
|
40
|
+
show_trajectories: Whether to draw trajectories.
|
|
41
|
+
show_frame_number: Whether to overlay frame number.
|
|
42
|
+
box_thickness: Thickness of bounding boxes.
|
|
43
|
+
text_scale: Scale of label text.
|
|
44
|
+
text_thickness: Thickness of label text.
|
|
45
|
+
trajectory_length: How many points to keep in trajectory.
|
|
46
|
+
codec: Output video codec.
|
|
47
|
+
output_fps: Output video framerate.
|
|
48
|
+
colorize_tracks (bool, default=True): if True, ignore colors from project meta and generate new colors for each tracked object; if False, try to use colors from project meta when possible.
|
|
49
|
+
"""
|
|
50
|
+
# Visualization settings
|
|
51
|
+
self.show_labels = show_labels
|
|
52
|
+
self.show_classes = show_classes
|
|
53
|
+
self.show_trajectories = show_trajectories
|
|
54
|
+
self.show_frame_number = show_frame_number
|
|
55
|
+
|
|
56
|
+
# Style settings
|
|
57
|
+
self.box_thickness = box_thickness
|
|
58
|
+
self.text_scale = text_scale
|
|
59
|
+
self.text_thickness = text_thickness
|
|
60
|
+
self.trajectory_length = trajectory_length
|
|
61
|
+
self.colorize_tracks = colorize_tracks
|
|
62
|
+
|
|
63
|
+
# Output settings
|
|
64
|
+
self.codec = codec
|
|
65
|
+
self.output_fps = output_fps
|
|
66
|
+
|
|
67
|
+
# Internal state
|
|
68
|
+
self.annotation = None
|
|
69
|
+
self.tracks_by_frame = {}
|
|
70
|
+
self.track_centers = defaultdict(list)
|
|
71
|
+
self.track_colors = {}
|
|
72
|
+
self.color_palette = self._generate_color_palette()
|
|
73
|
+
self._temp_dir = None
|
|
74
|
+
|
|
75
|
+
def _generate_color_palette(self, num_colors: int = 100) -> List[Tuple[int, int, int]]:
|
|
76
|
+
"""
|
|
77
|
+
Generate bright, distinct color palette for track visualization.
|
|
78
|
+
Uses HSV space with random hue and fixed high saturation/value.
|
|
79
|
+
"""
|
|
80
|
+
np.random.seed(42)
|
|
81
|
+
colors = []
|
|
82
|
+
for i in range(num_colors):
|
|
83
|
+
hue = np.random.randint(0, 180)
|
|
84
|
+
saturation = 200 + np.random.randint(55)
|
|
85
|
+
value = 200 + np.random.randint(55)
|
|
86
|
+
|
|
87
|
+
hsv_color = np.uint8([[[hue, saturation, value]]])
|
|
88
|
+
bgr_color = cv2.cvtColor(hsv_color, cv2.COLOR_HSV2BGR)[0][0]
|
|
89
|
+
colors.append(tuple(map(int, bgr_color)))
|
|
90
|
+
return colors
|
|
91
|
+
|
|
92
|
+
def _get_track_color(self, track_id: int) -> Tuple[int, int, int]:
|
|
93
|
+
"""Get consistent color for track ID from palette."""
|
|
94
|
+
return self.color_palette[track_id % len(self.color_palette)]
|
|
95
|
+
|
|
96
|
+
def _get_video_info(self, video_path: Path) -> Tuple[int, int, float, int]:
|
|
97
|
+
"""
|
|
98
|
+
Get video metadata using ffmpeg.
|
|
99
|
+
|
|
100
|
+
Returns:
|
|
101
|
+
Tuple of (width, height, fps, total_frames)
|
|
102
|
+
"""
|
|
103
|
+
try:
|
|
104
|
+
probe = ffmpeg.probe(str(video_path))
|
|
105
|
+
video_stream = next((stream for stream in probe['streams']
|
|
106
|
+
if stream['codec_type'] == 'video'), None)
|
|
107
|
+
|
|
108
|
+
if video_stream is None:
|
|
109
|
+
raise ValueError(f"No video stream found in: {video_path}")
|
|
110
|
+
|
|
111
|
+
width = int(video_stream['width'])
|
|
112
|
+
height = int(video_stream['height'])
|
|
113
|
+
|
|
114
|
+
# Extract FPS
|
|
115
|
+
fps_str = video_stream.get('r_frame_rate', '30/1')
|
|
116
|
+
if '/' in fps_str:
|
|
117
|
+
num, den = map(int, fps_str.split('/'))
|
|
118
|
+
fps = num / den if den != 0 else 30.0
|
|
119
|
+
else:
|
|
120
|
+
fps = float(fps_str)
|
|
121
|
+
|
|
122
|
+
# Get total frames
|
|
123
|
+
total_frames = int(video_stream.get('nb_frames', 0))
|
|
124
|
+
if total_frames == 0:
|
|
125
|
+
# Fallback: estimate from duration and fps
|
|
126
|
+
duration = float(video_stream.get('duration', 0))
|
|
127
|
+
total_frames = int(duration * fps) if duration > 0 else 0
|
|
128
|
+
|
|
129
|
+
return width, height, fps, total_frames
|
|
130
|
+
|
|
131
|
+
except Exception as e:
|
|
132
|
+
raise ValueError(f"Could not read video metadata {video_path}: {str(e)}")
|
|
133
|
+
|
|
134
|
+
def _create_frame_iterator(self, source: Union[str, Path]) -> Iterator[Tuple[int, np.ndarray]]:
|
|
135
|
+
"""
|
|
136
|
+
Create iterator that yields (frame_index, frame) tuples.
|
|
137
|
+
|
|
138
|
+
Args:
|
|
139
|
+
source: Path to video file or directory with frame images
|
|
140
|
+
|
|
141
|
+
Yields:
|
|
142
|
+
Tuple of (frame_index, frame_array)
|
|
143
|
+
"""
|
|
144
|
+
source = Path(source)
|
|
145
|
+
|
|
146
|
+
if source.is_file():
|
|
147
|
+
yield from self._iterate_video_frames(source)
|
|
148
|
+
elif source.is_dir():
|
|
149
|
+
yield from self._iterate_directory_frames(source)
|
|
150
|
+
else:
|
|
151
|
+
raise ValueError(f"Source must be a video file or directory, got: {source}")
|
|
152
|
+
|
|
153
|
+
def _iterate_video_frames(self, video_path: Path) -> Iterator[Tuple[int, np.ndarray]]:
|
|
154
|
+
"""Iterate through video frames using ffmpeg."""
|
|
155
|
+
width, height, fps, total_frames = self._get_video_info(video_path)
|
|
156
|
+
|
|
157
|
+
# Store video info for later use
|
|
158
|
+
self.source_fps = fps
|
|
159
|
+
self.frame_size = (width, height)
|
|
160
|
+
|
|
161
|
+
process = (
|
|
162
|
+
ffmpeg
|
|
163
|
+
.input(str(video_path))
|
|
164
|
+
.output('pipe:', format='rawvideo', pix_fmt='bgr24')
|
|
165
|
+
.run_async(pipe_stdout=True, pipe_stderr=True)
|
|
166
|
+
)
|
|
167
|
+
|
|
168
|
+
try:
|
|
169
|
+
frame_size_bytes = width * height * 3
|
|
170
|
+
frame_idx = 0
|
|
171
|
+
|
|
172
|
+
while True:
|
|
173
|
+
frame_data = process.stdout.read(frame_size_bytes)
|
|
174
|
+
if len(frame_data) != frame_size_bytes:
|
|
175
|
+
break
|
|
176
|
+
|
|
177
|
+
frame = np.frombuffer(frame_data, np.uint8).reshape([height, width, 3])
|
|
178
|
+
yield frame_idx, frame
|
|
179
|
+
frame_idx += 1
|
|
180
|
+
|
|
181
|
+
finally:
|
|
182
|
+
process.stdout.close()
|
|
183
|
+
if process.stderr:
|
|
184
|
+
process.stderr.close()
|
|
185
|
+
process.wait()
|
|
186
|
+
|
|
187
|
+
def _iterate_directory_frames(self, frames_dir: Path) -> Iterator[Tuple[int, np.ndarray]]:
|
|
188
|
+
"""Iterate through image frames in directory."""
|
|
189
|
+
if not frames_dir.is_dir():
|
|
190
|
+
raise ValueError(f"Directory does not exist: {frames_dir}")
|
|
191
|
+
|
|
192
|
+
# Support common image extensions
|
|
193
|
+
extensions = ['.jpg', '.jpeg', '.png', '.bmp', '.tiff']
|
|
194
|
+
image_files = []
|
|
195
|
+
for ext in extensions:
|
|
196
|
+
image_files.extend(frames_dir.glob(f'*{ext}'))
|
|
197
|
+
image_files.extend(frames_dir.glob(f'*{ext.upper()}'))
|
|
198
|
+
|
|
199
|
+
image_files = sorted(image_files)
|
|
200
|
+
if not image_files:
|
|
201
|
+
raise ValueError(f"No image files found in directory: {frames_dir}")
|
|
202
|
+
|
|
203
|
+
# Set fps from config for image sequences
|
|
204
|
+
self.source_fps = self.output_fps
|
|
205
|
+
|
|
206
|
+
for frame_idx, img_path in enumerate(image_files):
|
|
207
|
+
frame = cv2.imread(str(img_path))
|
|
208
|
+
if frame is not None:
|
|
209
|
+
if frame_idx == 0:
|
|
210
|
+
h, w = frame.shape[:2]
|
|
211
|
+
self.frame_size = (w, h)
|
|
212
|
+
yield frame_idx, frame
|
|
213
|
+
else:
|
|
214
|
+
logger.warning(f"Could not read image: {img_path}")
|
|
215
|
+
|
|
216
|
+
def _extract_tracks_from_annotation(self) -> None:
|
|
217
|
+
"""
|
|
218
|
+
Extract tracking data from Supervisely VideoAnnotation.
|
|
219
|
+
|
|
220
|
+
Populates self.tracks_by_frame with frame-indexed tracking data.
|
|
221
|
+
"""
|
|
222
|
+
self.tracks_by_frame = defaultdict(list)
|
|
223
|
+
self.track_colors = {}
|
|
224
|
+
|
|
225
|
+
# Map object keys to track info
|
|
226
|
+
objects = {}
|
|
227
|
+
for i, obj in enumerate(self.annotation.objects):
|
|
228
|
+
objects[obj.key] = (i, obj.obj_class.name)
|
|
229
|
+
|
|
230
|
+
# Extract tracks from frames
|
|
231
|
+
for frame in self.annotation.frames:
|
|
232
|
+
frame_idx = frame.index
|
|
233
|
+
for figure in frame.figures:
|
|
234
|
+
if figure.geometry.geometry_name() != 'rectangle':
|
|
235
|
+
continue
|
|
236
|
+
|
|
237
|
+
object_key = figure.parent_object.key
|
|
238
|
+
if object_key not in objects:
|
|
239
|
+
continue
|
|
240
|
+
|
|
241
|
+
track_id, class_name = objects[object_key]
|
|
242
|
+
|
|
243
|
+
# Extract bbox coordinates
|
|
244
|
+
rect = figure.geometry
|
|
245
|
+
bbox = (rect.left, rect.top, rect.right, rect.bottom)
|
|
246
|
+
|
|
247
|
+
if track_id not in self.track_colors:
|
|
248
|
+
if self.colorize_tracks:
|
|
249
|
+
# auto-color override everything
|
|
250
|
+
color = self._get_track_color(track_id)
|
|
251
|
+
else:
|
|
252
|
+
# try to use annotation color
|
|
253
|
+
color = figure.video_object.obj_class.color
|
|
254
|
+
if color:
|
|
255
|
+
# convert rgb → bgr
|
|
256
|
+
color = color[::-1]
|
|
257
|
+
else:
|
|
258
|
+
# fallback to auto-color if annotation missing
|
|
259
|
+
color = self._get_track_color(track_id)
|
|
260
|
+
|
|
261
|
+
self.track_colors[track_id] = color
|
|
262
|
+
|
|
263
|
+
|
|
264
|
+
self.tracks_by_frame[frame_idx].append((track_id, bbox, class_name))
|
|
265
|
+
|
|
266
|
+
logger.info(f"Extracted tracks from {len(self.tracks_by_frame)} frames")
|
|
267
|
+
|
|
268
|
+
def _draw_detection(self, img: np.ndarray, track_id: int, bbox: Tuple[int, int, int, int],
|
|
269
|
+
class_name: str) -> Optional[Tuple[int, int]]:
|
|
270
|
+
"""
|
|
271
|
+
Draw single detection with track ID and class label.
|
|
272
|
+
Returns the center point of the bbox for trajectory drawing.
|
|
273
|
+
"""
|
|
274
|
+
x1, y1, x2, y2 = map(int, bbox)
|
|
275
|
+
|
|
276
|
+
if x2 <= x1 or y2 <= y1:
|
|
277
|
+
return None
|
|
278
|
+
|
|
279
|
+
color = self.track_colors[track_id]
|
|
280
|
+
|
|
281
|
+
# Draw bounding box
|
|
282
|
+
cv2.rectangle(img, (x1, y1), (x2, y2), color, self.box_thickness)
|
|
283
|
+
|
|
284
|
+
# Draw label if enabled
|
|
285
|
+
if self.show_labels:
|
|
286
|
+
label = f"ID:{track_id}"
|
|
287
|
+
if self.show_classes:
|
|
288
|
+
label += f" ({class_name})"
|
|
289
|
+
|
|
290
|
+
label_y = y1 - 10 if y1 > 30 else y2 + 25
|
|
291
|
+
(text_w, text_h), _ = cv2.getTextSize(
|
|
292
|
+
label, cv2.FONT_HERSHEY_SIMPLEX, self.text_scale, self.text_thickness
|
|
293
|
+
)
|
|
294
|
+
|
|
295
|
+
cv2.rectangle(img, (x1, label_y - text_h - 5),
|
|
296
|
+
(x1 + text_w, label_y + 5), color, -1)
|
|
297
|
+
cv2.putText(img, label, (x1, label_y),
|
|
298
|
+
cv2.FONT_HERSHEY_SIMPLEX, self.text_scale,
|
|
299
|
+
(255, 255, 255), self.text_thickness, cv2.LINE_AA)
|
|
300
|
+
|
|
301
|
+
# Return center point for trajectory
|
|
302
|
+
return (x1 + x2) // 2, (y1 + y2) // 2
|
|
303
|
+
|
|
304
|
+
|
|
305
|
+
def _draw_trajectories(self, img: np.ndarray) -> None:
|
|
306
|
+
"""Draw trajectory lines for all tracks, filtering out big jumps."""
|
|
307
|
+
if not self.show_trajectories:
|
|
308
|
+
return
|
|
309
|
+
|
|
310
|
+
max_jump = 200
|
|
311
|
+
|
|
312
|
+
for track_id, centers in self.track_centers.items():
|
|
313
|
+
if len(centers) < 2:
|
|
314
|
+
continue
|
|
315
|
+
|
|
316
|
+
color = self.track_colors[track_id]
|
|
317
|
+
points = centers[-self.trajectory_length:]
|
|
318
|
+
|
|
319
|
+
for i in range(1, len(points)):
|
|
320
|
+
p1, p2 = points[i - 1], points[i]
|
|
321
|
+
if p1 is None or p2 is None:
|
|
322
|
+
continue
|
|
323
|
+
|
|
324
|
+
if np.hypot(p2[0] - p1[0], p2[1] - p1[1]) > max_jump:
|
|
325
|
+
continue
|
|
326
|
+
cv2.line(img, p1, p2, color, 2)
|
|
327
|
+
cv2.circle(img, p1, 3, color, -1)
|
|
328
|
+
|
|
329
|
+
|
|
330
|
+
def _process_single_frame(self, frame: np.ndarray, frame_idx: int) -> np.ndarray:
|
|
331
|
+
"""
|
|
332
|
+
Process single frame: add annotations and return processed frame.
|
|
333
|
+
|
|
334
|
+
Args:
|
|
335
|
+
frame: Input frame
|
|
336
|
+
frame_idx: Frame index
|
|
337
|
+
|
|
338
|
+
Returns:
|
|
339
|
+
Annotated frame
|
|
340
|
+
"""
|
|
341
|
+
img = frame.copy()
|
|
342
|
+
active_ids = set()
|
|
343
|
+
# Draw detections for current frame
|
|
344
|
+
if frame_idx in self.tracks_by_frame:
|
|
345
|
+
for track_id, bbox, class_name in self.tracks_by_frame[frame_idx]:
|
|
346
|
+
center = self._draw_detection(img, track_id, bbox, class_name)
|
|
347
|
+
self.track_centers[track_id].append(center)
|
|
348
|
+
if len(self.track_centers[track_id]) > self.trajectory_length:
|
|
349
|
+
self.track_centers[track_id].pop(0)
|
|
350
|
+
active_ids.add(track_id)
|
|
351
|
+
|
|
352
|
+
for tid in self.track_centers.keys():
|
|
353
|
+
if tid not in active_ids:
|
|
354
|
+
self.track_centers[tid].append(None)
|
|
355
|
+
if len(self.track_centers[tid]) > self.trajectory_length:
|
|
356
|
+
self.track_centers[tid].pop(0)
|
|
357
|
+
|
|
358
|
+
# Draw trajectories
|
|
359
|
+
self._draw_trajectories(img)
|
|
360
|
+
|
|
361
|
+
# Add frame number if requested
|
|
362
|
+
if self.show_frame_number:
|
|
363
|
+
cv2.putText(img, f"Frame: {frame_idx + 1}", (10, 30),
|
|
364
|
+
cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2, cv2.LINE_AA)
|
|
365
|
+
|
|
366
|
+
return img
|
|
367
|
+
|
|
368
|
+
def _save_processed_frame(self, frame: np.ndarray, frame_idx: int) -> str:
|
|
369
|
+
"""
|
|
370
|
+
Save processed frame to temporary directory.
|
|
371
|
+
|
|
372
|
+
Args:
|
|
373
|
+
frame: Processed frame
|
|
374
|
+
frame_idx: Frame index
|
|
375
|
+
|
|
376
|
+
Returns:
|
|
377
|
+
Path to saved frame
|
|
378
|
+
"""
|
|
379
|
+
frame_path = self._temp_dir / f"frame_{frame_idx:08d}.jpg"
|
|
380
|
+
cv2.imwrite(str(frame_path), frame, [cv2.IMWRITE_JPEG_QUALITY, 95])
|
|
381
|
+
return str(frame_path)
|
|
382
|
+
|
|
383
|
+
def _create_video_from_frames(self, output_path: Union[str, Path]) -> None:
|
|
384
|
+
"""
|
|
385
|
+
Create final video from processed frames using ffmpeg.
|
|
386
|
+
|
|
387
|
+
Args:
|
|
388
|
+
output_path: Path for output video
|
|
389
|
+
"""
|
|
390
|
+
output_path = Path(output_path)
|
|
391
|
+
output_path.parent.mkdir(parents=True, exist_ok=True)
|
|
392
|
+
|
|
393
|
+
# Create video from frame sequence
|
|
394
|
+
input_pattern = str(self._temp_dir / "frame_%08d.jpg")
|
|
395
|
+
|
|
396
|
+
try:
|
|
397
|
+
(
|
|
398
|
+
ffmpeg
|
|
399
|
+
.input(input_pattern, pattern_type='sequence', framerate=self.source_fps)
|
|
400
|
+
.output(str(output_path), vcodec='libx264', pix_fmt='yuv420p', crf=18)
|
|
401
|
+
.overwrite_output()
|
|
402
|
+
.run(capture_stdout=True, capture_stderr=True)
|
|
403
|
+
)
|
|
404
|
+
logger.info(f"Video saved to {output_path}")
|
|
405
|
+
|
|
406
|
+
except ffmpeg.Error as e:
|
|
407
|
+
error_msg = e.stderr.decode() if e.stderr else "Unknown ffmpeg error"
|
|
408
|
+
raise ValueError(f"Failed to create video: {error_msg}")
|
|
409
|
+
|
|
410
|
+
def _cleanup_temp_directory(self) -> None:
|
|
411
|
+
"""Clean up temporary directory and all its contents."""
|
|
412
|
+
if self._temp_dir and self._temp_dir.exists():
|
|
413
|
+
shutil.rmtree(self._temp_dir)
|
|
414
|
+
self._temp_dir = None
|
|
415
|
+
|
|
416
|
+
def visualize_video_annotation(self, annotation: VideoAnnotation,
|
|
417
|
+
source: Union[str, Path],
|
|
418
|
+
output_path: Union[str, Path]) -> None:
|
|
419
|
+
"""
|
|
420
|
+
Visualize tracking annotations on video using streaming approach.
|
|
421
|
+
|
|
422
|
+
Args:
|
|
423
|
+
annotation: Supervisely VideoAnnotation object with tracking data
|
|
424
|
+
source: Path to video file or directory containing frame images
|
|
425
|
+
output_path: Path for output video file
|
|
426
|
+
|
|
427
|
+
Raises:
|
|
428
|
+
TypeError: If annotation is not VideoAnnotation
|
|
429
|
+
ValueError: If source is invalid or annotation is empty
|
|
430
|
+
"""
|
|
431
|
+
if not isinstance(annotation, VideoAnnotation):
|
|
432
|
+
raise TypeError(f"Annotation must be VideoAnnotation, got {type(annotation)}")
|
|
433
|
+
|
|
434
|
+
# Store annotation
|
|
435
|
+
self.annotation = annotation
|
|
436
|
+
|
|
437
|
+
# Create temporary directory for processed frames
|
|
438
|
+
self._temp_dir = Path(tempfile.mkdtemp(prefix="video_viz_"))
|
|
439
|
+
|
|
440
|
+
try:
|
|
441
|
+
# Extract tracking data
|
|
442
|
+
self._extract_tracks_from_annotation()
|
|
443
|
+
|
|
444
|
+
if not self.tracks_by_frame:
|
|
445
|
+
logger.warning("No tracking data found in annotation")
|
|
446
|
+
|
|
447
|
+
# Reset trajectory tracking
|
|
448
|
+
self.track_centers = defaultdict(list)
|
|
449
|
+
|
|
450
|
+
# Process frames one by one
|
|
451
|
+
frame_count = 0
|
|
452
|
+
for frame_idx, frame in self._create_frame_iterator(source):
|
|
453
|
+
# Process frame
|
|
454
|
+
processed_frame = self._process_single_frame(frame, frame_idx)
|
|
455
|
+
|
|
456
|
+
# Save processed frame
|
|
457
|
+
self._save_processed_frame(processed_frame, frame_idx)
|
|
458
|
+
|
|
459
|
+
frame_count += 1
|
|
460
|
+
|
|
461
|
+
# Progress logging
|
|
462
|
+
if frame_count % 100 == 0:
|
|
463
|
+
logger.info(f"Processed {frame_count} frames")
|
|
464
|
+
|
|
465
|
+
logger.info(f"Finished processing {frame_count} frames")
|
|
466
|
+
|
|
467
|
+
# Create final video from saved frames
|
|
468
|
+
self._create_video_from_frames(output_path)
|
|
469
|
+
|
|
470
|
+
finally:
|
|
471
|
+
# Always cleanup temporary files
|
|
472
|
+
self._cleanup_temp_directory()
|
|
473
|
+
|
|
474
|
+
def __del__(self):
|
|
475
|
+
"""Cleanup temporary directory on object destruction."""
|
|
476
|
+
self._cleanup_temp_directory()
|
|
477
|
+
|
|
478
|
+
|
|
479
|
+
def visualize(
|
|
480
|
+
predictions: Union[VideoAnnotation, List[Prediction]],
|
|
481
|
+
source: Union[str, Path],
|
|
482
|
+
output_path: Union[str, Path],
|
|
483
|
+
show_labels: bool = True,
|
|
484
|
+
show_classes: bool = True,
|
|
485
|
+
show_trajectories: bool = True,
|
|
486
|
+
box_thickness: int = 2,
|
|
487
|
+
colorize_tracks: bool = True,
|
|
488
|
+
**kwargs
|
|
489
|
+
) -> None:
|
|
490
|
+
"""
|
|
491
|
+
Visualize tracking results from either VideoAnnotation or list of Prediction.
|
|
492
|
+
|
|
493
|
+
Args:
|
|
494
|
+
predictions (supervisely.VideoAnnotation | List[Prediction]): Tracking data to render; either a Supervisely VideoAnnotation or a list of Prediction objects.
|
|
495
|
+
source (str | Path): Path to an input video file or a directory of sequential frames (e.g., frame_000001.jpg).
|
|
496
|
+
output_path (str | Path): Path to the output video file to be created.
|
|
497
|
+
show_labels (bool, default=True): Draw per-object labels (track IDs).
|
|
498
|
+
show_classes (bool, default=True): Draw class names for each object.
|
|
499
|
+
show_trajectories (bool, default=True): Render object trajectories across frames.
|
|
500
|
+
box_thickness (int, default=2): Bounding-box line thickness in pixels.
|
|
501
|
+
colorize_tracks (bool, default=True): if True, ignore colors from project meta and generate new colors for each tracked object; if False, try to use colors from project meta when possible.
|
|
502
|
+
"""
|
|
503
|
+
visualizer = TrackingVisualizer(
|
|
504
|
+
show_labels=show_labels,
|
|
505
|
+
show_classes=show_classes,
|
|
506
|
+
show_trajectories=show_trajectories,
|
|
507
|
+
box_thickness=box_thickness,
|
|
508
|
+
colorize_tracks=colorize_tracks,
|
|
509
|
+
**kwargs
|
|
510
|
+
)
|
|
511
|
+
|
|
512
|
+
if isinstance(predictions, VideoAnnotation):
|
|
513
|
+
visualizer.visualize_video_annotation(predictions, source, output_path)
|
|
514
|
+
elif isinstance(predictions, list):
|
|
515
|
+
predictions = predictions_to_video_annotation(predictions)
|
|
516
|
+
visualizer.visualize_video_annotation(predictions, source, output_path)
|
|
517
|
+
else:
|
|
518
|
+
raise TypeError(f"Predictions must be VideoAnnotation or list of Prediction, got {type(predictions)}")
|
|
519
|
+
|
|
@@ -174,6 +174,7 @@ can pass them in the terminal before running the script:
|
|
|
174
174
|
model = api.nn.deploy(
|
|
175
175
|
model="{{ experiment.paths.artifacts_dir.path }}/checkpoints/{{ experiment.training.checkpoints.pytorch.name }}",
|
|
176
176
|
device="cuda:0", # or "cpu"
|
|
177
|
+
workspace_id={{ experiment.project.workspace_id }}
|
|
177
178
|
)
|
|
178
179
|
|
|
179
180
|
# 3. Predict
|
|
@@ -321,49 +322,41 @@ API](https://docs.supervisely.com/neural-networks/overview-1/prediction-api){:ta
|
|
|
321
322
|
|
|
322
323
|
## Tracking Objects in Video
|
|
323
324
|
|
|
324
|
-
|
|
325
|
-
[BoxMot](https://github.com/mikel-brostrom/boxmot){:target="_blank"} is a
|
|
326
|
-
third-party library that implements lightweight neural networks for tracking-by-detection task (when the tracking is
|
|
327
|
-
performed on the objects predicted by a separate detector). For `boxmot` models you can use even CPU device.
|
|
325
|
+
Supervisely now supports **tracking-by-detection** out of the box. We leverage a lightweight tracking algorithm (such as [BoT-SORT](https://github.com/NirAharon/BoT-SORT){:target="_blank"}) which identifies the unique objects across video frames and assigns IDs to them. This allows us to connect separate detections from different frames into a single track for each object.
|
|
328
326
|
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
```bash
|
|
332
|
-
pip install boxmot
|
|
333
|
-
```
|
|
334
|
-
|
|
335
|
-
Supervisely SDK has the `track()` method from `supervisely.nn.tracking` which allows you to apply `boxmot` models
|
|
336
|
-
together with a detector in a single line of code. This method takes two arguments: a `boxmot` tracker, and a
|
|
337
|
-
`PredictionSession` of a detector. It returns a `sly.VideoAnnotation` with the tracked objects.
|
|
327
|
+
To apply tracking via API, first, deploy your detection model or connect to it, and then use the `predict()` method with `tracking=True` parameter. You can also specify tracking configuration parameters by passing `tracking_config={...}` with your custom settings.
|
|
338
328
|
|
|
339
329
|
```python
|
|
340
330
|
import supervisely as sly
|
|
341
|
-
from supervisely.nn.tracking import track
|
|
342
|
-
import boxmot
|
|
343
|
-
from pathlib import Path
|
|
344
|
-
|
|
345
|
-
# Deploy a detector
|
|
346
|
-
detector = api.nn.deploy(
|
|
347
|
-
model="{{ experiment.model.framework }}/{{ experiment.model.name }}",
|
|
348
|
-
device="cuda:0", # Use GPU for detection
|
|
349
|
-
)
|
|
350
331
|
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
332
|
+
api = sly.Api()
|
|
333
|
+
|
|
334
|
+
# Deploy your model
|
|
335
|
+
model = api.nn.deploy(
|
|
336
|
+
model="{{ experiment.paths.artifacts_dir.path }}/checkpoints/{{ experiment.training.checkpoints.pytorch.name }}",
|
|
337
|
+
device="cuda",
|
|
338
|
+
workspace_id={{ experiment.project.workspace_id }},
|
|
355
339
|
)
|
|
356
340
|
|
|
357
|
-
#
|
|
358
|
-
|
|
359
|
-
video_id=
|
|
360
|
-
|
|
361
|
-
|
|
341
|
+
# Apply tracking
|
|
342
|
+
predictions = model.predict(
|
|
343
|
+
video_id=YOUR_VIDEO_ID, # Video ID in Supervisely
|
|
344
|
+
tracking=True,
|
|
345
|
+
tracking_config={
|
|
346
|
+
"tracker": "botsort", # botsort is a powerful tracking algorithm used by default
|
|
347
|
+
# You can pass other tracking parameters here, see the docs for details
|
|
348
|
+
}
|
|
362
349
|
)
|
|
350
|
+
|
|
351
|
+
# Processing results
|
|
352
|
+
for pred in predictions:
|
|
353
|
+
frame_index = pred.frame_index
|
|
354
|
+
annotation = pred.annotation
|
|
355
|
+
track_ids = pred.track_ids
|
|
356
|
+
print(f"Frame {frame_index}: {len(track_ids)} tracks")
|
|
363
357
|
```
|
|
364
358
|
|
|
365
|
-
>
|
|
366
|
-
Video](https://docs.supervisely.com/neural-networks/overview-1/prediction-api#tracking-objects-in-video){:target="_blank"}.
|
|
359
|
+
> You can also apply trackers in your own code or applications. Read more about this in the docs [Video Object Tracking](https://docs.supervisely.com/neural-networks/overview-1/video-object-tracking){:target="_blank"}.
|
|
367
360
|
|
|
368
361
|
{% endif %}
|
|
369
362
|
|