supervisely 6.73.428__py3-none-any.whl → 6.73.429__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- supervisely/nn/tracker/__init__.py +5 -6
- supervisely/nn/tracker/botsort/tracker/mc_bot_sort.py +1 -1
- supervisely/nn/tracker/botsort_tracker.py +9 -2
- supervisely/nn/tracker/calculate_metrics.py +264 -0
- supervisely/nn/tracker/utils.py +274 -0
- supervisely/nn/tracker/visualize.py +519 -0
- supervisely/template/experiment/experiment.html.jinja +26 -33
- {supervisely-6.73.428.dist-info → supervisely-6.73.429.dist-info}/METADATA +1 -1
- {supervisely-6.73.428.dist-info → supervisely-6.73.429.dist-info}/RECORD +13 -10
- {supervisely-6.73.428.dist-info → supervisely-6.73.429.dist-info}/LICENSE +0 -0
- {supervisely-6.73.428.dist-info → supervisely-6.73.429.dist-info}/WHEEL +0 -0
- {supervisely-6.73.428.dist-info → supervisely-6.73.429.dist-info}/entry_points.txt +0 -0
- {supervisely-6.73.428.dist-info → supervisely-6.73.429.dist-info}/top_level.txt +0 -0
| @@ -1,9 +1,8 @@ | |
| 1 | 
            -
            from supervisely.sly_logger import logger
         | 
| 2 | 
            -
             | 
| 3 1 | 
             
            try:
         | 
| 4 2 | 
             
                from supervisely.nn.tracker.botsort_tracker import BotSortTracker
         | 
| 3 | 
            +
                from supervisely.nn.tracker.calculate_metrics import TrackingEvaluator, evaluate
         | 
| 4 | 
            +
                TRACKING_LIBS_INSTALLED = True
         | 
| 5 5 | 
             
            except ImportError:
         | 
| 6 | 
            -
                 | 
| 7 | 
            -
             | 
| 8 | 
            -
             | 
| 9 | 
            -
                raise
         | 
| 6 | 
            +
                TRACKING_LIBS_INSTALLED = False
         | 
| 7 | 
            +
             | 
| 8 | 
            +
            from supervisely.nn.tracker.visualize import TrackingVisualizer, visualize
         | 
| @@ -426,8 +426,8 @@ class BoTSORT(object): | |
| 426 426 | 
             
                        if track.score < self.new_track_thresh:
         | 
| 427 427 | 
             
                            continue
         | 
| 428 428 | 
             
                        # Fill track_id for new detection
         | 
| 429 | 
            -
                        detection_track_map[orig_idx]["track_id"] = int(track.track_id)
         | 
| 430 429 | 
             
                        track.activate(self.kalman_filter, self.frame_id)
         | 
| 430 | 
            +
                        detection_track_map[orig_idx]["track_id"] = int(track.track_id)
         | 
| 431 431 | 
             
                        activated_starcks.append(track)
         | 
| 432 432 |  | 
| 433 433 | 
             
                    """ Step 5: Update state"""
         | 
| @@ -1,6 +1,5 @@ | |
| 1 1 | 
             
            import supervisely as sly
         | 
| 2 2 | 
             
            from supervisely.nn.tracker.base_tracker import BaseTracker
         | 
| 3 | 
            -
            from supervisely.nn.tracker.botsort.tracker.mc_bot_sort import BoTSORT
         | 
| 4 3 | 
             
            from supervisely import Annotation, VideoAnnotation
         | 
| 5 4 |  | 
| 6 5 | 
             
            from dataclasses import dataclass
         | 
| @@ -11,6 +10,7 @@ import yaml | |
| 11 10 | 
             
            import os
         | 
| 12 11 | 
             
            from pathlib import Path
         | 
| 13 12 | 
             
            from supervisely import logger
         | 
| 13 | 
            +
            from supervisely.nn.tracker.botsort.tracker.mc_bot_sort import BoTSORT
         | 
| 14 14 |  | 
| 15 15 |  | 
| 16 16 | 
             
            @dataclass
         | 
| @@ -38,6 +38,13 @@ class BotSortTracker(BaseTracker): | |
| 38 38 |  | 
| 39 39 | 
             
                def __init__(self, settings: dict = None, device: str = None):
         | 
| 40 40 | 
             
                    super().__init__(settings=settings, device=device)
         | 
| 41 | 
            +
             | 
| 42 | 
            +
                    from supervisely.nn.tracker import TRACKING_LIBS_INSTALLED
         | 
| 43 | 
            +
                    if not TRACKING_LIBS_INSTALLED:
         | 
| 44 | 
            +
                        raise ImportError(
         | 
| 45 | 
            +
                            "Tracking dependencies are not installed. "
         | 
| 46 | 
            +
                            "Please install supervisely with `pip install supervisely[tracking]`."
         | 
| 47 | 
            +
                        )
         | 
| 41 48 |  | 
| 42 49 | 
             
                    # Load default settings from YAML file
         | 
| 43 50 | 
             
                    self.settings = self._load_default_settings()
         | 
| @@ -232,7 +239,7 @@ class BotSortTracker(BaseTracker): | |
| 232 239 |  | 
| 233 240 | 
             
                            video_object = video_objects[track_id]
         | 
| 234 241 | 
             
                            rect = sly.Rectangle(top=y1, left=x1, bottom=y2, right=x2)
         | 
| 235 | 
            -
                            frame_figures.append(sly.VideoFigure(video_object, rect, frame_idx))
         | 
| 242 | 
            +
                            frame_figures.append(sly.VideoFigure(video_object, rect, frame_idx, track_id=str(track_id)))
         | 
| 236 243 |  | 
| 237 244 | 
             
                        frames.append(sly.Frame(frame_idx, frame_figures))
         | 
| 238 245 |  | 
| @@ -0,0 +1,264 @@ | |
| 1 | 
            +
            import numpy as np
         | 
| 2 | 
            +
            from collections import defaultdict
         | 
| 3 | 
            +
            from typing import Dict, List, Union
         | 
| 4 | 
            +
             | 
| 5 | 
            +
            from scipy.optimize import linear_sum_assignment  # pylint: disable=import-error
         | 
| 6 | 
            +
             | 
| 7 | 
            +
            import supervisely as sly
         | 
| 8 | 
            +
            from supervisely.video_annotation.video_annotation import VideoAnnotation
         | 
| 9 | 
            +
             | 
| 10 | 
            +
            import motmetrics as mm  # pylint: disable=import-error
         | 
| 11 | 
            +
             | 
| 12 | 
            +
            class TrackingEvaluator:
         | 
| 13 | 
            +
                """
         | 
| 14 | 
            +
                Evaluator for video tracking metrics including MOTA, MOTP, IDF1.
         | 
| 15 | 
            +
                """
         | 
| 16 | 
            +
             | 
| 17 | 
            +
                def __init__(self, iou_threshold: float = 0.5):
         | 
| 18 | 
            +
                    """Initialize evaluator with IoU threshold for matching."""
         | 
| 19 | 
            +
                    from supervisely.nn.tracker import TRACKING_LIBS_INSTALLED
         | 
| 20 | 
            +
                    if not TRACKING_LIBS_INSTALLED:
         | 
| 21 | 
            +
                        raise ImportError(
         | 
| 22 | 
            +
                            "Tracking dependencies are not installed. "
         | 
| 23 | 
            +
                            "Please install supervisely with `pip install supervisely[tracking]`."
         | 
| 24 | 
            +
                        )
         | 
| 25 | 
            +
                        
         | 
| 26 | 
            +
                    if not 0.0 <= iou_threshold <= 1.0:
         | 
| 27 | 
            +
                        raise ValueError("iou_threshold must be in [0.0, 1.0]")
         | 
| 28 | 
            +
                    self.iou_threshold = iou_threshold
         | 
| 29 | 
            +
             | 
| 30 | 
            +
                def evaluate(
         | 
| 31 | 
            +
                    self,
         | 
| 32 | 
            +
                    gt_annotation: VideoAnnotation,
         | 
| 33 | 
            +
                    pred_annotation: VideoAnnotation,
         | 
| 34 | 
            +
                ) -> Dict[str, Union[float, int]]:
         | 
| 35 | 
            +
                    """Main entry: extract tracks from annotations, compute basic and MOT metrics, return results."""
         | 
| 36 | 
            +
                    self._validate_annotations(gt_annotation, pred_annotation)
         | 
| 37 | 
            +
                    self.img_height, self.img_width = gt_annotation.img_size
         | 
| 38 | 
            +
             | 
| 39 | 
            +
                    gt_tracks = self._extract_tracks(gt_annotation)
         | 
| 40 | 
            +
                    pred_tracks = self._extract_tracks(pred_annotation)
         | 
| 41 | 
            +
             | 
| 42 | 
            +
                    basic = self._compute_basic_metrics(gt_tracks, pred_tracks)
         | 
| 43 | 
            +
                    mot = self._compute_mot_metrics(gt_tracks, pred_tracks)
         | 
| 44 | 
            +
             | 
| 45 | 
            +
                    results = {
         | 
| 46 | 
            +
                        # basic detection
         | 
| 47 | 
            +
                        "precision": basic["precision"],
         | 
| 48 | 
            +
                        "recall": basic["recall"],
         | 
| 49 | 
            +
                        "f1": basic["f1"],
         | 
| 50 | 
            +
                        "avg_iou": basic["avg_iou"],
         | 
| 51 | 
            +
                        "true_positives": basic["tp"],
         | 
| 52 | 
            +
                        "false_positives": basic["fp"],
         | 
| 53 | 
            +
                        "false_negatives": basic["fn"],
         | 
| 54 | 
            +
                        "total_gt_objects": basic["total_gt"],
         | 
| 55 | 
            +
                        "total_pred_objects": basic["total_pred"],
         | 
| 56 | 
            +
             | 
| 57 | 
            +
                        # motmetrics
         | 
| 58 | 
            +
                        "mota": mot["mota"],
         | 
| 59 | 
            +
                        "motp": mot["motp"],
         | 
| 60 | 
            +
                        "idf1": mot["idf1"],
         | 
| 61 | 
            +
                        "id_switches": mot["id_switches"],
         | 
| 62 | 
            +
                        "fragmentations": mot["fragmentations"],
         | 
| 63 | 
            +
                        "num_misses": mot["num_misses"],
         | 
| 64 | 
            +
                        "num_false_positives": mot["num_false_positives"],
         | 
| 65 | 
            +
             | 
| 66 | 
            +
                        # config
         | 
| 67 | 
            +
                        "iou_threshold": self.iou_threshold,
         | 
| 68 | 
            +
                    }
         | 
| 69 | 
            +
                    return results
         | 
| 70 | 
            +
             | 
| 71 | 
            +
                def _validate_annotations(self, gt: VideoAnnotation, pred: VideoAnnotation):
         | 
| 72 | 
            +
                    """Minimal type validation for annotations."""
         | 
| 73 | 
            +
                    if not isinstance(gt, VideoAnnotation) or not isinstance(pred, VideoAnnotation):
         | 
| 74 | 
            +
                        raise TypeError("gt_annotation and pred_annotation must be VideoAnnotation instances")
         | 
| 75 | 
            +
             | 
| 76 | 
            +
                def _extract_tracks(self, annotation: VideoAnnotation) -> Dict[int, List[Dict]]:
         | 
| 77 | 
            +
                    """
         | 
| 78 | 
            +
                    Extract tracks from a VideoAnnotation into a dict keyed by frame index.
         | 
| 79 | 
            +
                    Each element is a dict: {'track_id': int, 'bbox': [x1,y1,x2,y2], 'confidence': float, 'class_name': str}
         | 
| 80 | 
            +
                    """
         | 
| 81 | 
            +
                    frames_to_tracks = defaultdict(list)
         | 
| 82 | 
            +
             | 
| 83 | 
            +
                    for frame in annotation.frames:
         | 
| 84 | 
            +
                        frame_idx = frame.index
         | 
| 85 | 
            +
                        for figure in frame.figures:
         | 
| 86 | 
            +
                            # use track_id if present, otherwise fallback to object's key int
         | 
| 87 | 
            +
                            track_id = int(figure.track_id) if figure.track_id is not None else figure.video_object.key().int
         | 
| 88 | 
            +
             | 
| 89 | 
            +
                            bbox = figure.geometry
         | 
| 90 | 
            +
                            if not isinstance(bbox, sly.Rectangle):
         | 
| 91 | 
            +
                                bbox = bbox.to_bbox()
         | 
| 92 | 
            +
             | 
| 93 | 
            +
                            x1 = float(bbox.left)
         | 
| 94 | 
            +
                            y1 = float(bbox.top)
         | 
| 95 | 
            +
                            x2 = float(bbox.right)
         | 
| 96 | 
            +
                            y2 = float(bbox.bottom)
         | 
| 97 | 
            +
             | 
| 98 | 
            +
                            frames_to_tracks[frame_idx].append({
         | 
| 99 | 
            +
                                "track_id": track_id,
         | 
| 100 | 
            +
                                "bbox": [x1, y1, x2, y2],
         | 
| 101 | 
            +
                                "confidence": float(getattr(figure, "confidence", 1.0)),
         | 
| 102 | 
            +
                                "class_name": figure.video_object.obj_class.name
         | 
| 103 | 
            +
                            })
         | 
| 104 | 
            +
             | 
| 105 | 
            +
                    return dict(frames_to_tracks)
         | 
| 106 | 
            +
             | 
| 107 | 
            +
                def _compute_basic_metrics(self, gt_tracks: Dict[int, List[Dict]], pred_tracks: Dict[int, List[Dict]]):
         | 
| 108 | 
            +
                    """
         | 
| 109 | 
            +
                    Compute per-frame true positives / false positives / false negatives and average IoU.
         | 
| 110 | 
            +
                    Matching is performed with Hungarian algorithm (scipy). Matches with IoU < threshold are discarded.
         | 
| 111 | 
            +
                    """
         | 
| 112 | 
            +
                    tp = fp = fn = 0
         | 
| 113 | 
            +
                    total_iou = 0.0
         | 
| 114 | 
            +
                    iou_count = 0
         | 
| 115 | 
            +
             | 
| 116 | 
            +
                    frames = sorted(set(list(gt_tracks.keys()) + list(pred_tracks.keys())))
         | 
| 117 | 
            +
                    for f in frames:
         | 
| 118 | 
            +
                        gts = gt_tracks.get(f, [])
         | 
| 119 | 
            +
                        preds = pred_tracks.get(f, [])
         | 
| 120 | 
            +
             | 
| 121 | 
            +
                        if not gts and not preds:
         | 
| 122 | 
            +
                            continue
         | 
| 123 | 
            +
                        if not gts:
         | 
| 124 | 
            +
                            fp += len(preds)
         | 
| 125 | 
            +
                            continue
         | 
| 126 | 
            +
                        if not preds:
         | 
| 127 | 
            +
                            fn += len(gts)
         | 
| 128 | 
            +
                            continue
         | 
| 129 | 
            +
             | 
| 130 | 
            +
                        gt_boxes = np.array([g["bbox"] for g in gts])
         | 
| 131 | 
            +
                        pred_boxes = np.array([p["bbox"] for p in preds])
         | 
| 132 | 
            +
             | 
| 133 | 
            +
                        # get cost matrix from motmetrics (cost = 1 - IoU)
         | 
| 134 | 
            +
                        cost_mat = mm.distances.iou_matrix(gt_boxes, pred_boxes, max_iou=1.0)
         | 
| 135 | 
            +
                        # replace NaNs (if any) with a large cost so Hungarian will avoid them
         | 
| 136 | 
            +
                        cost_for_assignment = np.where(np.isnan(cost_mat), 1e6, cost_mat)
         | 
| 137 | 
            +
             | 
| 138 | 
            +
                        # Hungarian assignment (minimize cost -> maximize IoU)
         | 
| 139 | 
            +
                        row_idx, col_idx = linear_sum_assignment(cost_for_assignment)
         | 
| 140 | 
            +
             | 
| 141 | 
            +
                        matched_gt = set()
         | 
| 142 | 
            +
                        matched_pred = set()
         | 
| 143 | 
            +
                        for r, c in zip(row_idx, col_idx):
         | 
| 144 | 
            +
                            if r < cost_mat.shape[0] and c < cost_mat.shape[1]:
         | 
| 145 | 
            +
                                # IoU = 1 - cost
         | 
| 146 | 
            +
                                cost_val = cost_mat[r, c]
         | 
| 147 | 
            +
                                if np.isnan(cost_val):
         | 
| 148 | 
            +
                                    continue
         | 
| 149 | 
            +
                                iou_val = 1.0 - float(cost_val)
         | 
| 150 | 
            +
                                if iou_val >= self.iou_threshold:
         | 
| 151 | 
            +
                                    matched_gt.add(r)
         | 
| 152 | 
            +
                                    matched_pred.add(c)
         | 
| 153 | 
            +
                                    total_iou += iou_val
         | 
| 154 | 
            +
                                    iou_count += 1
         | 
| 155 | 
            +
             | 
| 156 | 
            +
                        frame_tp = len(matched_gt)
         | 
| 157 | 
            +
                        frame_fp = len(preds) - len(matched_pred)
         | 
| 158 | 
            +
                        frame_fn = len(gts) - len(matched_gt)
         | 
| 159 | 
            +
             | 
| 160 | 
            +
                        tp += frame_tp
         | 
| 161 | 
            +
                        fp += frame_fp
         | 
| 162 | 
            +
                        fn += frame_fn
         | 
| 163 | 
            +
             | 
| 164 | 
            +
                    precision = tp / (tp + fp) if (tp + fp) > 0 else 0.0
         | 
| 165 | 
            +
                    recall = tp / (tp + fn) if (tp + fn) > 0 else 0.0
         | 
| 166 | 
            +
                    f1 = 2 * precision * recall / (precision + recall) if (precision + recall) > 0 else 0.0
         | 
| 167 | 
            +
                    avg_iou = total_iou / iou_count if iou_count > 0 else 0.0
         | 
| 168 | 
            +
             | 
| 169 | 
            +
                    total_gt = sum(len(v) for v in gt_tracks.values())
         | 
| 170 | 
            +
                    total_pred = sum(len(v) for v in pred_tracks.values())
         | 
| 171 | 
            +
             | 
| 172 | 
            +
                    return {
         | 
| 173 | 
            +
                        "precision": precision,
         | 
| 174 | 
            +
                        "recall": recall,
         | 
| 175 | 
            +
                        "f1": f1,
         | 
| 176 | 
            +
                        "avg_iou": avg_iou,
         | 
| 177 | 
            +
                        "tp": tp,
         | 
| 178 | 
            +
                        "fp": fp,
         | 
| 179 | 
            +
                        "fn": fn,
         | 
| 180 | 
            +
                        "total_gt": total_gt,
         | 
| 181 | 
            +
                        "total_pred": total_pred,
         | 
| 182 | 
            +
                    }
         | 
| 183 | 
            +
             | 
| 184 | 
            +
                def _compute_mot_metrics(self, gt_tracks: Dict[int, List[Dict]], pred_tracks: Dict[int, List[Dict]]):
         | 
| 185 | 
            +
                    """
         | 
| 186 | 
            +
                    Use motmetrics.MOTAccumulator to collect associations per frame and compute common MOT metrics.
         | 
| 187 | 
            +
                    Distance matrix is taken directly from motmetrics.distances.iou_matrix (which returns 1 - IoU).
         | 
| 188 | 
            +
                    Pairs with distance > (1 - iou_threshold) are set to infinity to exclude them from matching.
         | 
| 189 | 
            +
                    """
         | 
| 190 | 
            +
                    acc = mm.MOTAccumulator(auto_id=True)
         | 
| 191 | 
            +
             | 
| 192 | 
            +
                    frames = sorted(set(list(gt_tracks.keys()) + list(pred_tracks.keys())))
         | 
| 193 | 
            +
                    for f in frames:
         | 
| 194 | 
            +
                        gts = gt_tracks.get(f, [])
         | 
| 195 | 
            +
                        preds = pred_tracks.get(f, [])
         | 
| 196 | 
            +
             | 
| 197 | 
            +
                        gt_ids = [g["track_id"] for g in gts]
         | 
| 198 | 
            +
                        pred_ids = [p["track_id"] for p in preds]
         | 
| 199 | 
            +
             | 
| 200 | 
            +
                        if gts and preds:
         | 
| 201 | 
            +
                            gt_boxes = np.array([g["bbox"] for g in gts])
         | 
| 202 | 
            +
                            pred_boxes = np.array([p["bbox"] for p in preds])
         | 
| 203 | 
            +
             | 
| 204 | 
            +
                            # motmetrics provides a distance matrix (1 - IoU)
         | 
| 205 | 
            +
                            dist_mat = mm.distances.iou_matrix(gt_boxes, pred_boxes, max_iou=1.0)
         | 
| 206 | 
            +
                            # exclude pairs with IoU < threshold => distance > 1 - threshold
         | 
| 207 | 
            +
                            dist_mat = np.array(dist_mat, dtype=float)
         | 
| 208 | 
            +
                            dist_mat[np.isnan(dist_mat)] = np.inf
         | 
| 209 | 
            +
                            dist_mat[dist_mat > (1.0 - self.iou_threshold)] = np.inf
         | 
| 210 | 
            +
                        else:
         | 
| 211 | 
            +
                            dist_mat = np.full((len(gts), len(preds)), np.inf)
         | 
| 212 | 
            +
             | 
| 213 | 
            +
                        acc.update(gt_ids, pred_ids, dist_mat)
         | 
| 214 | 
            +
             | 
| 215 | 
            +
                    mh = mm.metrics.create()
         | 
| 216 | 
            +
                    summary = mh.compute(
         | 
| 217 | 
            +
                        acc,
         | 
| 218 | 
            +
                        metrics=[
         | 
| 219 | 
            +
                            "mota",
         | 
| 220 | 
            +
                            "motp",
         | 
| 221 | 
            +
                            "idf1",
         | 
| 222 | 
            +
                            "num_switches",
         | 
| 223 | 
            +
                            "num_fragmentations",
         | 
| 224 | 
            +
                            "num_misses",
         | 
| 225 | 
            +
                            "num_false_positives",
         | 
| 226 | 
            +
                        ],
         | 
| 227 | 
            +
                        name="eval",
         | 
| 228 | 
            +
                    )
         | 
| 229 | 
            +
             | 
| 230 | 
            +
                    def get_val(col: str, default=0.0):
         | 
| 231 | 
            +
                        if summary.empty or col not in summary.columns:
         | 
| 232 | 
            +
                            return float(default)
         | 
| 233 | 
            +
                        v = summary.iloc[0][col]
         | 
| 234 | 
            +
                        return float(v) if not np.isnan(v) else float(default)
         | 
| 235 | 
            +
             | 
| 236 | 
            +
                    return {
         | 
| 237 | 
            +
                        "mota": get_val("mota", 0.0),
         | 
| 238 | 
            +
                        "motp": get_val("motp", 0.0),
         | 
| 239 | 
            +
                        "idf1": get_val("idf1", 0.0),
         | 
| 240 | 
            +
                        "id_switches": int(get_val("num_switches", 0.0)),
         | 
| 241 | 
            +
                        "fragmentations": int(get_val("num_fragmentations", 0.0)),
         | 
| 242 | 
            +
                        "num_misses": int(get_val("num_misses", 0.0)),
         | 
| 243 | 
            +
                        "num_false_positives": int(get_val("num_false_positives", 0.0)),
         | 
| 244 | 
            +
                    }
         | 
| 245 | 
            +
             | 
| 246 | 
            +
             | 
| 247 | 
            +
            def evaluate(
         | 
| 248 | 
            +
                gt_annotation: VideoAnnotation,
         | 
| 249 | 
            +
                pred_annotation: VideoAnnotation,
         | 
| 250 | 
            +
                iou_threshold: float = 0.5,
         | 
| 251 | 
            +
            ) -> Dict[str, Union[float, int]]:
         | 
| 252 | 
            +
                """
         | 
| 253 | 
            +
                Evaluate tracking predictions against ground truth.
         | 
| 254 | 
            +
             | 
| 255 | 
            +
                Args:
         | 
| 256 | 
            +
                    gt_annotation: Ground-truth annotation, an object of class supervisely VideoAnnotation containing reference object tracks.
         | 
| 257 | 
            +
                    pred_annotation: Predicted annotation, an object of class supervisely VideoAnnotation to be compared against the ground truth.
         | 
| 258 | 
            +
                    iou_threshold: Minimum Intersection-over-Union required for a detection to be considered a valid match.
         | 
| 259 | 
            +
             | 
| 260 | 
            +
                Returns:
         | 
| 261 | 
            +
                    dict: json with evaluation metrics.
         | 
| 262 | 
            +
                """
         | 
| 263 | 
            +
                evaluator = TrackingEvaluator(iou_threshold=iou_threshold)
         | 
| 264 | 
            +
                return evaluator.evaluate(gt_annotation, pred_annotation)
         | 
| @@ -0,0 +1,274 @@ | |
| 1 | 
            +
             | 
| 2 | 
            +
            from typing import List, Union, Dict, Tuple
         | 
| 3 | 
            +
            from pathlib import Path
         | 
| 4 | 
            +
            from collections import defaultdict
         | 
| 5 | 
            +
            import numpy as np
         | 
| 6 | 
            +
             | 
| 7 | 
            +
            import supervisely as sly
         | 
| 8 | 
            +
            from supervisely.nn.model.prediction import Prediction
         | 
| 9 | 
            +
            from supervisely import VideoAnnotation
         | 
| 10 | 
            +
            from supervisely import logger
         | 
| 11 | 
            +
             | 
| 12 | 
            +
             | 
| 13 | 
            +
            def predictions_to_video_annotation(
         | 
| 14 | 
            +
                predictions: List[Prediction],
         | 
| 15 | 
            +
            ) -> VideoAnnotation:
         | 
| 16 | 
            +
                """
         | 
| 17 | 
            +
                Convert list of Prediction objects to VideoAnnotation.
         | 
| 18 | 
            +
                
         | 
| 19 | 
            +
                Args:
         | 
| 20 | 
            +
                    predictions: List of Prediction objects, one per frame
         | 
| 21 | 
            +
                    
         | 
| 22 | 
            +
                Returns:
         | 
| 23 | 
            +
                    VideoAnnotation object with tracked objects
         | 
| 24 | 
            +
             | 
| 25 | 
            +
                """
         | 
| 26 | 
            +
                
         | 
| 27 | 
            +
                if not predictions:
         | 
| 28 | 
            +
                    raise ValueError("Empty predictions list provided")
         | 
| 29 | 
            +
                
         | 
| 30 | 
            +
                frame_shape = predictions[0].annotation.img_size
         | 
| 31 | 
            +
                img_h, img_w = frame_shape
         | 
| 32 | 
            +
                video_objects = {}  
         | 
| 33 | 
            +
                frames = []
         | 
| 34 | 
            +
                
         | 
| 35 | 
            +
                for pred in predictions:
         | 
| 36 | 
            +
                    frame_figures = []
         | 
| 37 | 
            +
                    frame_idx = pred.frame_index
         | 
| 38 | 
            +
                    
         | 
| 39 | 
            +
                    # Get data using public properties
         | 
| 40 | 
            +
                    boxes = pred.boxes          # Public property - np.array (N, 4) in tlbr format
         | 
| 41 | 
            +
                    classes = pred.classes      # Public property - list of class names
         | 
| 42 | 
            +
                    track_ids = pred.track_ids  # Public property - can be None
         | 
| 43 | 
            +
                    
         | 
| 44 | 
            +
                    # Skip frame if no detections
         | 
| 45 | 
            +
                    if len(boxes) == 0:
         | 
| 46 | 
            +
                        frames.append(sly.Frame(frame_idx, []))
         | 
| 47 | 
            +
                        continue
         | 
| 48 | 
            +
                    
         | 
| 49 | 
            +
                    for bbox, class_name, track_id in zip(boxes, classes, track_ids):
         | 
| 50 | 
            +
                        # Clip bbox to image boundaries
         | 
| 51 | 
            +
                        # Note: pred.boxes returns tlbr format (top, left, bottom, right)
         | 
| 52 | 
            +
                        top, left, bottom, right = bbox
         | 
| 53 | 
            +
                        dims = np.array([img_h, img_w, img_h, img_w]) - 1
         | 
| 54 | 
            +
                        top, left, bottom, right = np.clip([top, left, bottom, right], 0, dims)
         | 
| 55 | 
            +
                        
         | 
| 56 | 
            +
                        # Convert to integer coordinates
         | 
| 57 | 
            +
                        top, left, bottom, right = int(top), int(left), int(bottom), int(right)
         | 
| 58 | 
            +
                        
         | 
| 59 | 
            +
                        # Get or create VideoObject
         | 
| 60 | 
            +
                        if track_id not in video_objects:
         | 
| 61 | 
            +
                            # Find obj_class from prediction annotation
         | 
| 62 | 
            +
                            obj_class = None
         | 
| 63 | 
            +
                            for label in pred.annotation.labels:
         | 
| 64 | 
            +
                                if label.obj_class.name == class_name:
         | 
| 65 | 
            +
                                    obj_class = label.obj_class
         | 
| 66 | 
            +
                                    break
         | 
| 67 | 
            +
                            
         | 
| 68 | 
            +
                            if obj_class is None:
         | 
| 69 | 
            +
                                # Create obj_class if not found (fallback)
         | 
| 70 | 
            +
                                obj_class = sly.ObjClass(class_name, sly.Rectangle)
         | 
| 71 | 
            +
                                
         | 
| 72 | 
            +
                            video_objects[track_id] = sly.VideoObject(obj_class)
         | 
| 73 | 
            +
                        
         | 
| 74 | 
            +
                        video_object = video_objects[track_id]
         | 
| 75 | 
            +
                        rect = sly.Rectangle(top=top, left=left, bottom=bottom, right=right)
         | 
| 76 | 
            +
                        frame_figures.append(sly.VideoFigure(video_object, rect, frame_idx, track_id=str(track_id)))
         | 
| 77 | 
            +
                    
         | 
| 78 | 
            +
                    frames.append(sly.Frame(frame_idx, frame_figures))
         | 
| 79 | 
            +
             | 
| 80 | 
            +
                objects = list(video_objects.values())
         | 
| 81 | 
            +
                
         | 
| 82 | 
            +
                return VideoAnnotation(
         | 
| 83 | 
            +
                    img_size=frame_shape,
         | 
| 84 | 
            +
                    frames_count=len(predictions),
         | 
| 85 | 
            +
                    objects=sly.VideoObjectCollection(objects),
         | 
| 86 | 
            +
                    frames=sly.FrameCollection(frames)
         | 
| 87 | 
            +
                )
         | 
| 88 | 
            +
                
         | 
| 89 | 
            +
            def video_annotation_to_mot(
         | 
| 90 | 
            +
                annotation: VideoAnnotation,
         | 
| 91 | 
            +
                output_path: Union[str, Path] = None,
         | 
| 92 | 
            +
                class_to_id_mapping: Dict[str, int] = None
         | 
| 93 | 
            +
            ) -> Union[str, List[str]]:
         | 
| 94 | 
            +
                """
         | 
| 95 | 
            +
                Convert Supervisely VideoAnnotation to MOT format.
         | 
| 96 | 
            +
                MOT format: frame_id,track_id,left,top,width,height,confidence,class_id,visibility
         | 
| 97 | 
            +
                """
         | 
| 98 | 
            +
                mot_lines = []
         | 
| 99 | 
            +
                
         | 
| 100 | 
            +
                # Create default class mapping if not provided
         | 
| 101 | 
            +
                if class_to_id_mapping is None:
         | 
| 102 | 
            +
                    unique_classes = set()
         | 
| 103 | 
            +
                    for frame in annotation.frames:
         | 
| 104 | 
            +
                        for figure in frame.figures:
         | 
| 105 | 
            +
                            unique_classes.add(figure.video_object.obj_class.name)
         | 
| 106 | 
            +
                    class_to_id_mapping = {cls_name: idx + 1 for idx, cls_name in enumerate(sorted(unique_classes))}
         | 
| 107 | 
            +
                
         | 
| 108 | 
            +
                # Extract tracks
         | 
| 109 | 
            +
                for frame in annotation.frames:
         | 
| 110 | 
            +
                    frame_id = frame.index + 1  # MOT uses 1-based frame indexing
         | 
| 111 | 
            +
                    
         | 
| 112 | 
            +
                    for figure in frame.figures:
         | 
| 113 | 
            +
                        # Get track ID from VideoFigure.track_id (official API)
         | 
| 114 | 
            +
                        if figure.track_id is not None:
         | 
| 115 | 
            +
                            track_id = int(figure.track_id)
         | 
| 116 | 
            +
                        else:
         | 
| 117 | 
            +
                            track_id = figure.video_object.key().int
         | 
| 118 | 
            +
                        
         | 
| 119 | 
            +
                        # Get bounding box
         | 
| 120 | 
            +
                        if isinstance(figure.geometry, sly.Rectangle):
         | 
| 121 | 
            +
                            bbox = figure.geometry
         | 
| 122 | 
            +
                        else:
         | 
| 123 | 
            +
                            bbox = figure.geometry.to_bbox()
         | 
| 124 | 
            +
                        
         | 
| 125 | 
            +
                        left = bbox.left
         | 
| 126 | 
            +
                        top = bbox.top
         | 
| 127 | 
            +
                        width = bbox.width
         | 
| 128 | 
            +
                        height = bbox.height
         | 
| 129 | 
            +
                        
         | 
| 130 | 
            +
                        # Get class ID
         | 
| 131 | 
            +
                        class_name = figure.video_object.obj_class.name
         | 
| 132 | 
            +
                        class_id = class_to_id_mapping.get(class_name, 1)
         | 
| 133 | 
            +
                        
         | 
| 134 | 
            +
                        # Get confidence (default)
         | 
| 135 | 
            +
                        confidence = 1.0
         | 
| 136 | 
            +
                        
         | 
| 137 | 
            +
                        # Visibility (assume visible)
         | 
| 138 | 
            +
                        visibility = 1
         | 
| 139 | 
            +
                        
         | 
| 140 | 
            +
                        # Create MOT line
         | 
| 141 | 
            +
                        mot_line = f"{frame_id},{track_id},{left:.2f},{top:.2f},{width:.2f},{height:.2f},{confidence:.3f},{class_id},{visibility}"
         | 
| 142 | 
            +
                        mot_lines.append(mot_line)
         | 
| 143 | 
            +
                
         | 
| 144 | 
            +
                # Save to file if path provided
         | 
| 145 | 
            +
                if output_path:
         | 
| 146 | 
            +
                    output_path = Path(output_path)
         | 
| 147 | 
            +
                    output_path.parent.mkdir(parents=True, exist_ok=True)
         | 
| 148 | 
            +
                    
         | 
| 149 | 
            +
                    with open(output_path, 'w') as f:
         | 
| 150 | 
            +
                        for line in mot_lines:
         | 
| 151 | 
            +
                            f.write(line + '\n')
         | 
| 152 | 
            +
                    
         | 
| 153 | 
            +
                    logger.info(f"Saved MOT format to: {output_path} ({len(mot_lines)} detections)")
         | 
| 154 | 
            +
                    return str(output_path)
         | 
| 155 | 
            +
                
         | 
| 156 | 
            +
                return mot_lines
         | 
| 157 | 
            +
             | 
| 158 | 
            +
            def mot_to_video_annotation(
         | 
| 159 | 
            +
                mot_file_path: Union[str, Path],
         | 
| 160 | 
            +
                img_size: Tuple[int, int] = (1080, 1920),
         | 
| 161 | 
            +
                class_mapping: Dict[int, str] = None,
         | 
| 162 | 
            +
                default_class_name: str = "person"
         | 
| 163 | 
            +
            ) -> VideoAnnotation:
         | 
| 164 | 
            +
                """
         | 
| 165 | 
            +
                Convert MOT format tracking data to Supervisely VideoAnnotation.
         | 
| 166 | 
            +
                MOT format: frame_id,track_id,left,top,width,height,confidence,class_id,visibility
         | 
| 167 | 
            +
                """
         | 
| 168 | 
            +
                mot_file_path = Path(mot_file_path)
         | 
| 169 | 
            +
                
         | 
| 170 | 
            +
                if not mot_file_path.exists():
         | 
| 171 | 
            +
                    raise FileNotFoundError(f"MOT file not found: {mot_file_path}")
         | 
| 172 | 
            +
                
         | 
| 173 | 
            +
                logger.info(f"Loading MOT data from: {mot_file_path}")
         | 
| 174 | 
            +
                logger.info(f"Image size: {img_size} (height, width)")
         | 
| 175 | 
            +
                
         | 
| 176 | 
            +
                # Default class mapping
         | 
| 177 | 
            +
                if class_mapping is None:
         | 
| 178 | 
            +
                    class_mapping = {1: default_class_name}
         | 
| 179 | 
            +
                
         | 
| 180 | 
            +
                # Parse MOT file
         | 
| 181 | 
            +
                video_objects = {}  # track_id -> VideoObject
         | 
| 182 | 
            +
                frames_data = defaultdict(list)  # frame_idx -> list of figures
         | 
| 183 | 
            +
                max_frame_idx = 0
         | 
| 184 | 
            +
                img_h, img_w = img_size
         | 
| 185 | 
            +
                
         | 
| 186 | 
            +
                with open(mot_file_path, 'r') as f:
         | 
| 187 | 
            +
                    for line_num, line in enumerate(f, 1):
         | 
| 188 | 
            +
                        line = line.strip()
         | 
| 189 | 
            +
                        if not line or line.startswith('#'):
         | 
| 190 | 
            +
                            continue
         | 
| 191 | 
            +
                            
         | 
| 192 | 
            +
                        try:
         | 
| 193 | 
            +
                            parts = line.split(',')
         | 
| 194 | 
            +
                            if len(parts) < 6:  # Minimum required fields
         | 
| 195 | 
            +
                                continue
         | 
| 196 | 
            +
                            
         | 
| 197 | 
            +
                            frame_id = int(parts[0])
         | 
| 198 | 
            +
                            track_id = int(parts[1])
         | 
| 199 | 
            +
                            left = float(parts[2])
         | 
| 200 | 
            +
                            top = float(parts[3])
         | 
| 201 | 
            +
                            width = float(parts[4])
         | 
| 202 | 
            +
                            height = float(parts[5])
         | 
| 203 | 
            +
                            
         | 
| 204 | 
            +
                            # Optional fields
         | 
| 205 | 
            +
                            confidence = float(parts[6]) if len(parts) > 6 and parts[6] != '-1' else 1.0
         | 
| 206 | 
            +
                            class_id = int(parts[7]) if len(parts) > 7 and parts[7] != '-1' else 1
         | 
| 207 | 
            +
                            visibility = float(parts[8]) if len(parts) > 8 and parts[8] != '-1' else 1.0
         | 
| 208 | 
            +
                            
         | 
| 209 | 
            +
                            frame_idx = frame_id - 1  # Convert to 0-based indexing
         | 
| 210 | 
            +
                            max_frame_idx = max(max_frame_idx, frame_idx)
         | 
| 211 | 
            +
                            
         | 
| 212 | 
            +
                            # Skip low confidence detections
         | 
| 213 | 
            +
                            if confidence < 0.1:
         | 
| 214 | 
            +
                                continue
         | 
| 215 | 
            +
                            
         | 
| 216 | 
            +
                            # Calculate coordinates with safer clipping
         | 
| 217 | 
            +
                            right = left + width
         | 
| 218 | 
            +
                            bottom = top + height
         | 
| 219 | 
            +
                            
         | 
| 220 | 
            +
                            # Clip to image boundaries
         | 
| 221 | 
            +
                            left = max(0, int(left))
         | 
| 222 | 
            +
                            top = max(0, int(top))
         | 
| 223 | 
            +
                            right = min(int(right), img_w - 1)
         | 
| 224 | 
            +
                            bottom = min(int(bottom), img_h - 1)
         | 
| 225 | 
            +
                            
         | 
| 226 | 
            +
                            # Skip invalid boxes
         | 
| 227 | 
            +
                            if right <= left or bottom <= top:
         | 
| 228 | 
            +
                                continue
         | 
| 229 | 
            +
                            
         | 
| 230 | 
            +
                            # Get class name
         | 
| 231 | 
            +
                            class_name = class_mapping.get(class_id, default_class_name)
         | 
| 232 | 
            +
                            
         | 
| 233 | 
            +
                            # Create VideoObject if not exists
         | 
| 234 | 
            +
                            if track_id not in video_objects:
         | 
| 235 | 
            +
                                obj_class = sly.ObjClass(class_name, sly.Rectangle)
         | 
| 236 | 
            +
                                video_objects[track_id] = sly.VideoObject(obj_class)
         | 
| 237 | 
            +
                            
         | 
| 238 | 
            +
                            video_object = video_objects[track_id]
         | 
| 239 | 
            +
                            
         | 
| 240 | 
            +
                            # Create rectangle and figure with track_id
         | 
| 241 | 
            +
                            rect = sly.Rectangle(top=top, left=left, bottom=bottom, right=right)
         | 
| 242 | 
            +
                            figure = sly.VideoFigure(video_object, rect, frame_idx, track_id=str(track_id))
         | 
| 243 | 
            +
                            
         | 
| 244 | 
            +
                            frames_data[frame_idx].append(figure)
         | 
| 245 | 
            +
                            
         | 
| 246 | 
            +
                        except (ValueError, IndexError) as e:
         | 
| 247 | 
            +
                            logger.warning(f"Skipped invalid MOT line {line_num}: {line} - {e}")
         | 
| 248 | 
            +
                            continue
         | 
| 249 | 
            +
                
         | 
| 250 | 
            +
                # Create frames
         | 
| 251 | 
            +
                frames = []
         | 
| 252 | 
            +
                if frames_data:
         | 
| 253 | 
            +
                    frames_count = max(frames_data.keys()) + 1
         | 
| 254 | 
            +
                    
         | 
| 255 | 
            +
                    for frame_idx in range(frames_count):
         | 
| 256 | 
            +
                        figures = frames_data.get(frame_idx, [])
         | 
| 257 | 
            +
                        frames.append(sly.Frame(frame_idx, figures))
         | 
| 258 | 
            +
                else:
         | 
| 259 | 
            +
                    frames_count = 1
         | 
| 260 | 
            +
                    frames = [sly.Frame(0, [])]
         | 
| 261 | 
            +
                
         | 
| 262 | 
            +
                # Create VideoAnnotation
         | 
| 263 | 
            +
                objects = list(video_objects.values())
         | 
| 264 | 
            +
                
         | 
| 265 | 
            +
                annotation = VideoAnnotation(
         | 
| 266 | 
            +
                    img_size=img_size,
         | 
| 267 | 
            +
                    frames_count=frames_count,
         | 
| 268 | 
            +
                    objects=sly.VideoObjectCollection(objects),
         | 
| 269 | 
            +
                    frames=sly.FrameCollection(frames)
         | 
| 270 | 
            +
                )
         | 
| 271 | 
            +
                
         | 
| 272 | 
            +
                logger.info(f"Created VideoAnnotation with {len(objects)} tracks and {frames_count} frames")
         | 
| 273 | 
            +
                
         | 
| 274 | 
            +
                return annotation
         | 
| @@ -0,0 +1,519 @@ | |
| 1 | 
            +
            from typing import Union, Dict, List, Tuple, Iterator, Optional
         | 
| 2 | 
            +
            import numpy as np
         | 
| 3 | 
            +
            import cv2
         | 
| 4 | 
            +
            import ffmpeg
         | 
| 5 | 
            +
            from pathlib import Path
         | 
| 6 | 
            +
            from collections import defaultdict
         | 
| 7 | 
            +
            from dataclasses import dataclass
         | 
| 8 | 
            +
            import tempfile
         | 
| 9 | 
            +
            import shutil
         | 
| 10 | 
            +
             | 
| 11 | 
            +
            import supervisely as sly
         | 
| 12 | 
            +
            from supervisely import logger
         | 
| 13 | 
            +
            from supervisely.nn.model.prediction import Prediction
         | 
| 14 | 
            +
            from supervisely import VideoAnnotation
         | 
| 15 | 
            +
            from supervisely.nn.tracker.utils import predictions_to_video_annotation
         | 
| 16 | 
            +
             | 
| 17 | 
            +
             | 
| 18 | 
            +
            class TrackingVisualizer:
         | 
| 19 | 
            +
                def __init__(
         | 
| 20 | 
            +
                    self,
         | 
| 21 | 
            +
                    show_labels: bool = True,
         | 
| 22 | 
            +
                    show_classes: bool = True,
         | 
| 23 | 
            +
                    show_trajectories: bool = True,
         | 
| 24 | 
            +
                    show_frame_number: bool = False,
         | 
| 25 | 
            +
                    box_thickness: int = 2,
         | 
| 26 | 
            +
                    text_scale: float = 0.6,
         | 
| 27 | 
            +
                    text_thickness: int = 2,
         | 
| 28 | 
            +
                    trajectory_length: int = 30,
         | 
| 29 | 
            +
                    codec: str = "mp4",
         | 
| 30 | 
            +
                    output_fps: float = 30.0,
         | 
| 31 | 
            +
                    colorize_tracks: bool = True,
         | 
| 32 | 
            +
             | 
| 33 | 
            +
                ):
         | 
| 34 | 
            +
                    """
         | 
| 35 | 
            +
                    Initialize the visualizer with configuration.
         | 
| 36 | 
            +
             | 
| 37 | 
            +
                    Args:
         | 
| 38 | 
            +
                        show_labels: Whether to show track IDs.
         | 
| 39 | 
            +
                        show_classes: Whether to show class names.
         | 
| 40 | 
            +
                        show_trajectories: Whether to draw trajectories.
         | 
| 41 | 
            +
                        show_frame_number: Whether to overlay frame number.
         | 
| 42 | 
            +
                        box_thickness: Thickness of bounding boxes.
         | 
| 43 | 
            +
                        text_scale: Scale of label text.
         | 
| 44 | 
            +
                        text_thickness: Thickness of label text.
         | 
| 45 | 
            +
                        trajectory_length: How many points to keep in trajectory.
         | 
| 46 | 
            +
                        codec: Output video codec.
         | 
| 47 | 
            +
                        output_fps: Output video framerate.
         | 
| 48 | 
            +
                        colorize_tracks (bool, default=True): if True, ignore colors from project meta and generate new colors for each tracked object; if False, try to use colors from project meta when possible.
         | 
| 49 | 
            +
                    """
         | 
| 50 | 
            +
                    # Visualization settings
         | 
| 51 | 
            +
                    self.show_labels = show_labels
         | 
| 52 | 
            +
                    self.show_classes = show_classes
         | 
| 53 | 
            +
                    self.show_trajectories = show_trajectories
         | 
| 54 | 
            +
                    self.show_frame_number = show_frame_number
         | 
| 55 | 
            +
             | 
| 56 | 
            +
                    # Style settings
         | 
| 57 | 
            +
                    self.box_thickness = box_thickness
         | 
| 58 | 
            +
                    self.text_scale = text_scale
         | 
| 59 | 
            +
                    self.text_thickness = text_thickness
         | 
| 60 | 
            +
                    self.trajectory_length = trajectory_length
         | 
| 61 | 
            +
                    self.colorize_tracks = colorize_tracks
         | 
| 62 | 
            +
             | 
| 63 | 
            +
                    # Output settings
         | 
| 64 | 
            +
                    self.codec = codec
         | 
| 65 | 
            +
                    self.output_fps = output_fps
         | 
| 66 | 
            +
             | 
| 67 | 
            +
                    # Internal state
         | 
| 68 | 
            +
                    self.annotation = None
         | 
| 69 | 
            +
                    self.tracks_by_frame = {}
         | 
| 70 | 
            +
                    self.track_centers = defaultdict(list)
         | 
| 71 | 
            +
                    self.track_colors = {}
         | 
| 72 | 
            +
                    self.color_palette = self._generate_color_palette()
         | 
| 73 | 
            +
                    self._temp_dir = None
         | 
| 74 | 
            +
                    
         | 
| 75 | 
            +
                def _generate_color_palette(self, num_colors: int = 100) -> List[Tuple[int, int, int]]:
         | 
| 76 | 
            +
                    """
         | 
| 77 | 
            +
                    Generate bright, distinct color palette for track visualization.
         | 
| 78 | 
            +
                    Uses HSV space with random hue and fixed high saturation/value.
         | 
| 79 | 
            +
                    """
         | 
| 80 | 
            +
                    np.random.seed(42)
         | 
| 81 | 
            +
                    colors = []
         | 
| 82 | 
            +
                    for i in range(num_colors):
         | 
| 83 | 
            +
                        hue = np.random.randint(0, 180)
         | 
| 84 | 
            +
                        saturation = 200 + np.random.randint(55)
         | 
| 85 | 
            +
                        value = 200 + np.random.randint(55)
         | 
| 86 | 
            +
             | 
| 87 | 
            +
                        hsv_color = np.uint8([[[hue, saturation, value]]])
         | 
| 88 | 
            +
                        bgr_color = cv2.cvtColor(hsv_color, cv2.COLOR_HSV2BGR)[0][0]
         | 
| 89 | 
            +
                        colors.append(tuple(map(int, bgr_color)))
         | 
| 90 | 
            +
                    return colors
         | 
| 91 | 
            +
                
         | 
| 92 | 
            +
                def _get_track_color(self, track_id: int) -> Tuple[int, int, int]:
         | 
| 93 | 
            +
                    """Get consistent color for track ID from palette."""
         | 
| 94 | 
            +
                    return self.color_palette[track_id % len(self.color_palette)]
         | 
| 95 | 
            +
                
         | 
| 96 | 
            +
                def _get_video_info(self, video_path: Path) -> Tuple[int, int, float, int]:
         | 
| 97 | 
            +
                    """
         | 
| 98 | 
            +
                    Get video metadata using ffmpeg.
         | 
| 99 | 
            +
                    
         | 
| 100 | 
            +
                    Returns:
         | 
| 101 | 
            +
                        Tuple of (width, height, fps, total_frames)
         | 
| 102 | 
            +
                    """
         | 
| 103 | 
            +
                    try:
         | 
| 104 | 
            +
                        probe = ffmpeg.probe(str(video_path))
         | 
| 105 | 
            +
                        video_stream = next((stream for stream in probe['streams'] 
         | 
| 106 | 
            +
                                           if stream['codec_type'] == 'video'), None)
         | 
| 107 | 
            +
                        
         | 
| 108 | 
            +
                        if video_stream is None:
         | 
| 109 | 
            +
                            raise ValueError(f"No video stream found in: {video_path}")
         | 
| 110 | 
            +
                        
         | 
| 111 | 
            +
                        width = int(video_stream['width'])
         | 
| 112 | 
            +
                        height = int(video_stream['height'])
         | 
| 113 | 
            +
                        
         | 
| 114 | 
            +
                        # Extract FPS
         | 
| 115 | 
            +
                        fps_str = video_stream.get('r_frame_rate', '30/1')
         | 
| 116 | 
            +
                        if '/' in fps_str:
         | 
| 117 | 
            +
                            num, den = map(int, fps_str.split('/'))
         | 
| 118 | 
            +
                            fps = num / den if den != 0 else 30.0
         | 
| 119 | 
            +
                        else:
         | 
| 120 | 
            +
                            fps = float(fps_str)
         | 
| 121 | 
            +
                        
         | 
| 122 | 
            +
                        # Get total frames
         | 
| 123 | 
            +
                        total_frames = int(video_stream.get('nb_frames', 0))
         | 
| 124 | 
            +
                        if total_frames == 0:
         | 
| 125 | 
            +
                            # Fallback: estimate from duration and fps
         | 
| 126 | 
            +
                            duration = float(video_stream.get('duration', 0))
         | 
| 127 | 
            +
                            total_frames = int(duration * fps) if duration > 0 else 0
         | 
| 128 | 
            +
                        
         | 
| 129 | 
            +
                        return width, height, fps, total_frames
         | 
| 130 | 
            +
                        
         | 
| 131 | 
            +
                    except Exception as e:
         | 
| 132 | 
            +
                        raise ValueError(f"Could not read video metadata {video_path}: {str(e)}")
         | 
| 133 | 
            +
                
         | 
| 134 | 
            +
                def _create_frame_iterator(self, source: Union[str, Path]) -> Iterator[Tuple[int, np.ndarray]]:
         | 
| 135 | 
            +
                    """
         | 
| 136 | 
            +
                    Create iterator that yields (frame_index, frame) tuples.
         | 
| 137 | 
            +
                    
         | 
| 138 | 
            +
                    Args:
         | 
| 139 | 
            +
                        source: Path to video file or directory with frame images
         | 
| 140 | 
            +
                        
         | 
| 141 | 
            +
                    Yields:
         | 
| 142 | 
            +
                        Tuple of (frame_index, frame_array)
         | 
| 143 | 
            +
                    """
         | 
| 144 | 
            +
                    source = Path(source)
         | 
| 145 | 
            +
                    
         | 
| 146 | 
            +
                    if source.is_file():
         | 
| 147 | 
            +
                        yield from self._iterate_video_frames(source)
         | 
| 148 | 
            +
                    elif source.is_dir():
         | 
| 149 | 
            +
                        yield from self._iterate_directory_frames(source)
         | 
| 150 | 
            +
                    else:
         | 
| 151 | 
            +
                        raise ValueError(f"Source must be a video file or directory, got: {source}")
         | 
| 152 | 
            +
                
         | 
| 153 | 
            +
                def _iterate_video_frames(self, video_path: Path) -> Iterator[Tuple[int, np.ndarray]]:
         | 
| 154 | 
            +
                    """Iterate through video frames using ffmpeg."""
         | 
| 155 | 
            +
                    width, height, fps, total_frames = self._get_video_info(video_path)
         | 
| 156 | 
            +
                    
         | 
| 157 | 
            +
                    # Store video info for later use
         | 
| 158 | 
            +
                    self.source_fps = fps
         | 
| 159 | 
            +
                    self.frame_size = (width, height)
         | 
| 160 | 
            +
                    
         | 
| 161 | 
            +
                    process = (
         | 
| 162 | 
            +
                        ffmpeg
         | 
| 163 | 
            +
                        .input(str(video_path))
         | 
| 164 | 
            +
                        .output('pipe:', format='rawvideo', pix_fmt='bgr24')
         | 
| 165 | 
            +
                        .run_async(pipe_stdout=True, pipe_stderr=True)
         | 
| 166 | 
            +
                    )
         | 
| 167 | 
            +
                    
         | 
| 168 | 
            +
                    try:
         | 
| 169 | 
            +
                        frame_size_bytes = width * height * 3
         | 
| 170 | 
            +
                        frame_idx = 0
         | 
| 171 | 
            +
                        
         | 
| 172 | 
            +
                        while True:
         | 
| 173 | 
            +
                            frame_data = process.stdout.read(frame_size_bytes)
         | 
| 174 | 
            +
                            if len(frame_data) != frame_size_bytes:
         | 
| 175 | 
            +
                                break
         | 
| 176 | 
            +
                            
         | 
| 177 | 
            +
                            frame = np.frombuffer(frame_data, np.uint8).reshape([height, width, 3])
         | 
| 178 | 
            +
                            yield frame_idx, frame
         | 
| 179 | 
            +
                            frame_idx += 1
         | 
| 180 | 
            +
                            
         | 
| 181 | 
            +
                    finally:
         | 
| 182 | 
            +
                        process.stdout.close()
         | 
| 183 | 
            +
                        if process.stderr:
         | 
| 184 | 
            +
                            process.stderr.close()
         | 
| 185 | 
            +
                        process.wait()
         | 
| 186 | 
            +
                
         | 
| 187 | 
            +
                def _iterate_directory_frames(self, frames_dir: Path) -> Iterator[Tuple[int, np.ndarray]]:
         | 
| 188 | 
            +
                    """Iterate through image frames in directory."""
         | 
| 189 | 
            +
                    if not frames_dir.is_dir():
         | 
| 190 | 
            +
                        raise ValueError(f"Directory does not exist: {frames_dir}")
         | 
| 191 | 
            +
                    
         | 
| 192 | 
            +
                    # Support common image extensions
         | 
| 193 | 
            +
                    extensions = ['.jpg', '.jpeg', '.png', '.bmp', '.tiff']
         | 
| 194 | 
            +
                    image_files = []
         | 
| 195 | 
            +
                    for ext in extensions:
         | 
| 196 | 
            +
                        image_files.extend(frames_dir.glob(f'*{ext}'))
         | 
| 197 | 
            +
                        image_files.extend(frames_dir.glob(f'*{ext.upper()}'))
         | 
| 198 | 
            +
                    
         | 
| 199 | 
            +
                    image_files = sorted(image_files)
         | 
| 200 | 
            +
                    if not image_files:
         | 
| 201 | 
            +
                        raise ValueError(f"No image files found in directory: {frames_dir}")
         | 
| 202 | 
            +
                    
         | 
| 203 | 
            +
                    # Set fps from config for image sequences
         | 
| 204 | 
            +
                    self.source_fps = self.output_fps
         | 
| 205 | 
            +
                    
         | 
| 206 | 
            +
                    for frame_idx, img_path in enumerate(image_files):
         | 
| 207 | 
            +
                        frame = cv2.imread(str(img_path))
         | 
| 208 | 
            +
                        if frame is not None:
         | 
| 209 | 
            +
                            if frame_idx == 0:
         | 
| 210 | 
            +
                                h, w = frame.shape[:2]
         | 
| 211 | 
            +
                                self.frame_size = (w, h)
         | 
| 212 | 
            +
                            yield frame_idx, frame
         | 
| 213 | 
            +
                        else:
         | 
| 214 | 
            +
                            logger.warning(f"Could not read image: {img_path}")
         | 
| 215 | 
            +
                
         | 
| 216 | 
            +
                def _extract_tracks_from_annotation(self) -> None:
         | 
| 217 | 
            +
                    """
         | 
| 218 | 
            +
                    Extract tracking data from Supervisely VideoAnnotation.
         | 
| 219 | 
            +
                    
         | 
| 220 | 
            +
                    Populates self.tracks_by_frame with frame-indexed tracking data.
         | 
| 221 | 
            +
                    """
         | 
| 222 | 
            +
                    self.tracks_by_frame = defaultdict(list)
         | 
| 223 | 
            +
                    self.track_colors = {}
         | 
| 224 | 
            +
                    
         | 
| 225 | 
            +
                    # Map object keys to track info
         | 
| 226 | 
            +
                    objects = {}
         | 
| 227 | 
            +
                    for i, obj in enumerate(self.annotation.objects):
         | 
| 228 | 
            +
                        objects[obj.key] = (i, obj.obj_class.name)
         | 
| 229 | 
            +
                    
         | 
| 230 | 
            +
                    # Extract tracks from frames
         | 
| 231 | 
            +
                    for frame in self.annotation.frames:
         | 
| 232 | 
            +
                        frame_idx = frame.index
         | 
| 233 | 
            +
                        for figure in frame.figures:
         | 
| 234 | 
            +
                            if figure.geometry.geometry_name() != 'rectangle':
         | 
| 235 | 
            +
                                continue
         | 
| 236 | 
            +
                                
         | 
| 237 | 
            +
                            object_key = figure.parent_object.key
         | 
| 238 | 
            +
                            if object_key not in objects:
         | 
| 239 | 
            +
                                continue
         | 
| 240 | 
            +
                                
         | 
| 241 | 
            +
                            track_id, class_name = objects[object_key]
         | 
| 242 | 
            +
                            
         | 
| 243 | 
            +
                            # Extract bbox coordinates
         | 
| 244 | 
            +
                            rect = figure.geometry
         | 
| 245 | 
            +
                            bbox = (rect.left, rect.top, rect.right, rect.bottom)
         | 
| 246 | 
            +
                            
         | 
| 247 | 
            +
                            if track_id not in self.track_colors:
         | 
| 248 | 
            +
                                if self.colorize_tracks:
         | 
| 249 | 
            +
                                    # auto-color override everything
         | 
| 250 | 
            +
                                    color = self._get_track_color(track_id)
         | 
| 251 | 
            +
                                else:
         | 
| 252 | 
            +
                                    # try to use annotation color
         | 
| 253 | 
            +
                                    color = figure.video_object.obj_class.color
         | 
| 254 | 
            +
                                    if color:
         | 
| 255 | 
            +
                                        # convert rgb → bgr
         | 
| 256 | 
            +
                                        color = color[::-1]
         | 
| 257 | 
            +
                                    else:
         | 
| 258 | 
            +
                                        # fallback to auto-color if annotation missing
         | 
| 259 | 
            +
                                        color = self._get_track_color(track_id)
         | 
| 260 | 
            +
             | 
| 261 | 
            +
                                self.track_colors[track_id] = color
         | 
| 262 | 
            +
             | 
| 263 | 
            +
                            
         | 
| 264 | 
            +
                            self.tracks_by_frame[frame_idx].append((track_id, bbox, class_name))
         | 
| 265 | 
            +
                    
         | 
| 266 | 
            +
                    logger.info(f"Extracted tracks from {len(self.tracks_by_frame)} frames")
         | 
| 267 | 
            +
                    
         | 
| 268 | 
            +
                def _draw_detection(self, img: np.ndarray, track_id: int, bbox: Tuple[int, int, int, int], 
         | 
| 269 | 
            +
                                class_name: str) -> Optional[Tuple[int, int]]:
         | 
| 270 | 
            +
                    """
         | 
| 271 | 
            +
                    Draw single detection with track ID and class label.
         | 
| 272 | 
            +
                    Returns the center point of the bbox for trajectory drawing.
         | 
| 273 | 
            +
                    """
         | 
| 274 | 
            +
                    x1, y1, x2, y2 = map(int, bbox)
         | 
| 275 | 
            +
             | 
| 276 | 
            +
                    if x2 <= x1 or y2 <= y1:
         | 
| 277 | 
            +
                        return None
         | 
| 278 | 
            +
                    
         | 
| 279 | 
            +
                    color = self.track_colors[track_id]
         | 
| 280 | 
            +
             | 
| 281 | 
            +
                    # Draw bounding box
         | 
| 282 | 
            +
                    cv2.rectangle(img, (x1, y1), (x2, y2), color, self.box_thickness)
         | 
| 283 | 
            +
             | 
| 284 | 
            +
                    # Draw label if enabled
         | 
| 285 | 
            +
                    if self.show_labels:
         | 
| 286 | 
            +
                        label = f"ID:{track_id}"
         | 
| 287 | 
            +
                        if self.show_classes:
         | 
| 288 | 
            +
                            label += f" ({class_name})"
         | 
| 289 | 
            +
             | 
| 290 | 
            +
                        label_y = y1 - 10 if y1 > 30 else y2 + 25
         | 
| 291 | 
            +
                        (text_w, text_h), _ = cv2.getTextSize(
         | 
| 292 | 
            +
                            label, cv2.FONT_HERSHEY_SIMPLEX, self.text_scale, self.text_thickness
         | 
| 293 | 
            +
                        )
         | 
| 294 | 
            +
             | 
| 295 | 
            +
                        cv2.rectangle(img, (x1, label_y - text_h - 5), 
         | 
| 296 | 
            +
                                    (x1 + text_w, label_y + 5), color, -1)
         | 
| 297 | 
            +
                        cv2.putText(img, label, (x1, label_y), 
         | 
| 298 | 
            +
                                cv2.FONT_HERSHEY_SIMPLEX, self.text_scale, 
         | 
| 299 | 
            +
                                (255, 255, 255), self.text_thickness, cv2.LINE_AA)
         | 
| 300 | 
            +
             | 
| 301 | 
            +
                    # Return center point for trajectory
         | 
| 302 | 
            +
                    return (x1 + x2) // 2, (y1 + y2) // 2
         | 
| 303 | 
            +
             | 
| 304 | 
            +
             | 
| 305 | 
            +
                def _draw_trajectories(self, img: np.ndarray) -> None:
         | 
| 306 | 
            +
                    """Draw trajectory lines for all tracks, filtering out big jumps."""
         | 
| 307 | 
            +
                    if not self.show_trajectories:
         | 
| 308 | 
            +
                        return
         | 
| 309 | 
            +
             | 
| 310 | 
            +
                    max_jump = 200  
         | 
| 311 | 
            +
             | 
| 312 | 
            +
                    for track_id, centers in self.track_centers.items():
         | 
| 313 | 
            +
                        if len(centers) < 2:
         | 
| 314 | 
            +
                            continue
         | 
| 315 | 
            +
             | 
| 316 | 
            +
                        color = self.track_colors[track_id]
         | 
| 317 | 
            +
                        points = centers[-self.trajectory_length:]
         | 
| 318 | 
            +
             | 
| 319 | 
            +
                        for i in range(1, len(points)):
         | 
| 320 | 
            +
                            p1, p2 = points[i - 1], points[i]
         | 
| 321 | 
            +
                            if p1 is None or p2 is None:
         | 
| 322 | 
            +
                                continue
         | 
| 323 | 
            +
                          
         | 
| 324 | 
            +
                            if np.hypot(p2[0] - p1[0], p2[1] - p1[1]) > max_jump:
         | 
| 325 | 
            +
                                continue
         | 
| 326 | 
            +
                            cv2.line(img, p1, p2, color, 2)
         | 
| 327 | 
            +
                            cv2.circle(img, p1, 3, color, -1)
         | 
| 328 | 
            +
             | 
| 329 | 
            +
                    
         | 
| 330 | 
            +
                def _process_single_frame(self, frame: np.ndarray, frame_idx: int) -> np.ndarray:
         | 
| 331 | 
            +
                    """
         | 
| 332 | 
            +
                    Process single frame: add annotations and return processed frame.
         | 
| 333 | 
            +
                    
         | 
| 334 | 
            +
                    Args:
         | 
| 335 | 
            +
                        frame: Input frame
         | 
| 336 | 
            +
                        frame_idx: Frame index
         | 
| 337 | 
            +
                        
         | 
| 338 | 
            +
                    Returns:
         | 
| 339 | 
            +
                        Annotated frame
         | 
| 340 | 
            +
                    """
         | 
| 341 | 
            +
                    img = frame.copy()
         | 
| 342 | 
            +
                    active_ids = set()
         | 
| 343 | 
            +
                    # Draw detections for current frame
         | 
| 344 | 
            +
                    if frame_idx in self.tracks_by_frame:
         | 
| 345 | 
            +
                        for track_id, bbox, class_name in self.tracks_by_frame[frame_idx]:
         | 
| 346 | 
            +
                            center = self._draw_detection(img, track_id, bbox, class_name)
         | 
| 347 | 
            +
                            self.track_centers[track_id].append(center)
         | 
| 348 | 
            +
                            if len(self.track_centers[track_id]) > self.trajectory_length:
         | 
| 349 | 
            +
                                self.track_centers[track_id].pop(0)
         | 
| 350 | 
            +
                            active_ids.add(track_id)
         | 
| 351 | 
            +
                    
         | 
| 352 | 
            +
                    for tid in self.track_centers.keys():
         | 
| 353 | 
            +
                        if tid not in active_ids:
         | 
| 354 | 
            +
                            self.track_centers[tid].append(None)
         | 
| 355 | 
            +
                            if len(self.track_centers[tid]) > self.trajectory_length:
         | 
| 356 | 
            +
                                self.track_centers[tid].pop(0)
         | 
| 357 | 
            +
                            
         | 
| 358 | 
            +
                    # Draw trajectories
         | 
| 359 | 
            +
                    self._draw_trajectories(img)
         | 
| 360 | 
            +
                    
         | 
| 361 | 
            +
                    # Add frame number if requested
         | 
| 362 | 
            +
                    if self.show_frame_number:
         | 
| 363 | 
            +
                        cv2.putText(img, f"Frame: {frame_idx + 1}", (10, 30),
         | 
| 364 | 
            +
                                   cv2.FONT_HERSHEY_SIMPLEX, 0.7, (255, 255, 255), 2, cv2.LINE_AA)
         | 
| 365 | 
            +
                    
         | 
| 366 | 
            +
                    return img
         | 
| 367 | 
            +
                
         | 
| 368 | 
            +
                def _save_processed_frame(self, frame: np.ndarray, frame_idx: int) -> str:
         | 
| 369 | 
            +
                    """
         | 
| 370 | 
            +
                    Save processed frame to temporary directory.
         | 
| 371 | 
            +
                    
         | 
| 372 | 
            +
                    Args:
         | 
| 373 | 
            +
                        frame: Processed frame
         | 
| 374 | 
            +
                        frame_idx: Frame index
         | 
| 375 | 
            +
                        
         | 
| 376 | 
            +
                    Returns:
         | 
| 377 | 
            +
                        Path to saved frame
         | 
| 378 | 
            +
                    """
         | 
| 379 | 
            +
                    frame_path = self._temp_dir / f"frame_{frame_idx:08d}.jpg"
         | 
| 380 | 
            +
                    cv2.imwrite(str(frame_path), frame, [cv2.IMWRITE_JPEG_QUALITY, 95])
         | 
| 381 | 
            +
                    return str(frame_path)
         | 
| 382 | 
            +
                
         | 
| 383 | 
            +
                def _create_video_from_frames(self, output_path: Union[str, Path]) -> None:
         | 
| 384 | 
            +
                    """
         | 
| 385 | 
            +
                    Create final video from processed frames using ffmpeg.
         | 
| 386 | 
            +
                    
         | 
| 387 | 
            +
                    Args:
         | 
| 388 | 
            +
                        output_path: Path for output video
         | 
| 389 | 
            +
                    """
         | 
| 390 | 
            +
                    output_path = Path(output_path)
         | 
| 391 | 
            +
                    output_path.parent.mkdir(parents=True, exist_ok=True)
         | 
| 392 | 
            +
                    
         | 
| 393 | 
            +
                    # Create video from frame sequence
         | 
| 394 | 
            +
                    input_pattern = str(self._temp_dir / "frame_%08d.jpg")
         | 
| 395 | 
            +
                    
         | 
| 396 | 
            +
                    try:
         | 
| 397 | 
            +
                        (
         | 
| 398 | 
            +
                            ffmpeg
         | 
| 399 | 
            +
                            .input(input_pattern, pattern_type='sequence', framerate=self.source_fps)
         | 
| 400 | 
            +
                            .output(str(output_path), vcodec='libx264', pix_fmt='yuv420p', crf=18)
         | 
| 401 | 
            +
                            .overwrite_output()
         | 
| 402 | 
            +
                            .run(capture_stdout=True, capture_stderr=True)
         | 
| 403 | 
            +
                        )
         | 
| 404 | 
            +
                        logger.info(f"Video saved to {output_path}")
         | 
| 405 | 
            +
                        
         | 
| 406 | 
            +
                    except ffmpeg.Error as e:
         | 
| 407 | 
            +
                        error_msg = e.stderr.decode() if e.stderr else "Unknown ffmpeg error"
         | 
| 408 | 
            +
                        raise ValueError(f"Failed to create video: {error_msg}")
         | 
| 409 | 
            +
                
         | 
| 410 | 
            +
                def _cleanup_temp_directory(self) -> None:
         | 
| 411 | 
            +
                    """Clean up temporary directory and all its contents."""
         | 
| 412 | 
            +
                    if self._temp_dir and self._temp_dir.exists():
         | 
| 413 | 
            +
                        shutil.rmtree(self._temp_dir)
         | 
| 414 | 
            +
                        self._temp_dir = None
         | 
| 415 | 
            +
                
         | 
| 416 | 
            +
                def visualize_video_annotation(self, annotation: VideoAnnotation, 
         | 
| 417 | 
            +
                                              source: Union[str, Path], 
         | 
| 418 | 
            +
                                              output_path: Union[str, Path]) -> None:
         | 
| 419 | 
            +
                    """
         | 
| 420 | 
            +
                    Visualize tracking annotations on video using streaming approach.
         | 
| 421 | 
            +
                    
         | 
| 422 | 
            +
                    Args:
         | 
| 423 | 
            +
                        annotation: Supervisely VideoAnnotation object with tracking data
         | 
| 424 | 
            +
                        source: Path to video file or directory containing frame images
         | 
| 425 | 
            +
                        output_path: Path for output video file
         | 
| 426 | 
            +
                        
         | 
| 427 | 
            +
                    Raises:
         | 
| 428 | 
            +
                        TypeError: If annotation is not VideoAnnotation
         | 
| 429 | 
            +
                        ValueError: If source is invalid or annotation is empty
         | 
| 430 | 
            +
                    """
         | 
| 431 | 
            +
                    if not isinstance(annotation, VideoAnnotation):
         | 
| 432 | 
            +
                        raise TypeError(f"Annotation must be VideoAnnotation, got {type(annotation)}")
         | 
| 433 | 
            +
                    
         | 
| 434 | 
            +
                    # Store annotation
         | 
| 435 | 
            +
                    self.annotation = annotation
         | 
| 436 | 
            +
                    
         | 
| 437 | 
            +
                    # Create temporary directory for processed frames
         | 
| 438 | 
            +
                    self._temp_dir = Path(tempfile.mkdtemp(prefix="video_viz_"))
         | 
| 439 | 
            +
                    
         | 
| 440 | 
            +
                    try:
         | 
| 441 | 
            +
                        # Extract tracking data
         | 
| 442 | 
            +
                        self._extract_tracks_from_annotation()
         | 
| 443 | 
            +
                        
         | 
| 444 | 
            +
                        if not self.tracks_by_frame:
         | 
| 445 | 
            +
                            logger.warning("No tracking data found in annotation")
         | 
| 446 | 
            +
                        
         | 
| 447 | 
            +
                        # Reset trajectory tracking
         | 
| 448 | 
            +
                        self.track_centers = defaultdict(list)
         | 
| 449 | 
            +
                        
         | 
| 450 | 
            +
                        # Process frames one by one
         | 
| 451 | 
            +
                        frame_count = 0
         | 
| 452 | 
            +
                        for frame_idx, frame in self._create_frame_iterator(source):
         | 
| 453 | 
            +
                            # Process frame
         | 
| 454 | 
            +
                            processed_frame = self._process_single_frame(frame, frame_idx)
         | 
| 455 | 
            +
                            
         | 
| 456 | 
            +
                            # Save processed frame
         | 
| 457 | 
            +
                            self._save_processed_frame(processed_frame, frame_idx)
         | 
| 458 | 
            +
                            
         | 
| 459 | 
            +
                            frame_count += 1
         | 
| 460 | 
            +
                            
         | 
| 461 | 
            +
                            # Progress logging
         | 
| 462 | 
            +
                            if frame_count % 100 == 0:
         | 
| 463 | 
            +
                                logger.info(f"Processed {frame_count} frames")
         | 
| 464 | 
            +
                        
         | 
| 465 | 
            +
                        logger.info(f"Finished processing {frame_count} frames")
         | 
| 466 | 
            +
                        
         | 
| 467 | 
            +
                        # Create final video from saved frames
         | 
| 468 | 
            +
                        self._create_video_from_frames(output_path)
         | 
| 469 | 
            +
                        
         | 
| 470 | 
            +
                    finally:
         | 
| 471 | 
            +
                        # Always cleanup temporary files
         | 
| 472 | 
            +
                        self._cleanup_temp_directory()
         | 
| 473 | 
            +
             | 
| 474 | 
            +
                def __del__(self):
         | 
| 475 | 
            +
                    """Cleanup temporary directory on object destruction."""
         | 
| 476 | 
            +
                    self._cleanup_temp_directory()
         | 
| 477 | 
            +
                    
         | 
| 478 | 
            +
             | 
| 479 | 
            +
            def visualize(
         | 
| 480 | 
            +
                predictions: Union[VideoAnnotation, List[Prediction]], 
         | 
| 481 | 
            +
                source: Union[str, Path], 
         | 
| 482 | 
            +
                output_path: Union[str, Path],
         | 
| 483 | 
            +
                show_labels: bool = True,
         | 
| 484 | 
            +
                show_classes: bool = True,
         | 
| 485 | 
            +
                show_trajectories: bool = True,
         | 
| 486 | 
            +
                box_thickness: int = 2,
         | 
| 487 | 
            +
                colorize_tracks: bool = True,
         | 
| 488 | 
            +
                **kwargs
         | 
| 489 | 
            +
            ) -> None:
         | 
| 490 | 
            +
                """
         | 
| 491 | 
            +
                Visualize tracking results from either VideoAnnotation or list of Prediction.
         | 
| 492 | 
            +
             | 
| 493 | 
            +
                Args:
         | 
| 494 | 
            +
                    predictions (supervisely.VideoAnnotation | List[Prediction]): Tracking data to render; either a Supervisely VideoAnnotation or a list of Prediction objects.
         | 
| 495 | 
            +
                    source (str | Path): Path to an input video file or a directory of sequential frames (e.g., frame_000001.jpg).
         | 
| 496 | 
            +
                    output_path (str | Path): Path to the output video file to be created.
         | 
| 497 | 
            +
                    show_labels (bool, default=True): Draw per-object labels (track IDs).
         | 
| 498 | 
            +
                    show_classes (bool, default=True): Draw class names for each object.
         | 
| 499 | 
            +
                    show_trajectories (bool, default=True): Render object trajectories across frames.
         | 
| 500 | 
            +
                    box_thickness (int, default=2): Bounding-box line thickness in pixels.
         | 
| 501 | 
            +
                    colorize_tracks (bool, default=True): if True, ignore colors from project meta and generate new colors for each tracked object; if False, try to use colors from project meta when possible.    
         | 
| 502 | 
            +
                    """
         | 
| 503 | 
            +
                visualizer = TrackingVisualizer(
         | 
| 504 | 
            +
                    show_labels=show_labels, 
         | 
| 505 | 
            +
                    show_classes=show_classes, 
         | 
| 506 | 
            +
                    show_trajectories=show_trajectories,
         | 
| 507 | 
            +
                    box_thickness=box_thickness,
         | 
| 508 | 
            +
                    colorize_tracks=colorize_tracks,
         | 
| 509 | 
            +
                    **kwargs
         | 
| 510 | 
            +
                )
         | 
| 511 | 
            +
             | 
| 512 | 
            +
                if isinstance(predictions, VideoAnnotation):
         | 
| 513 | 
            +
                    visualizer.visualize_video_annotation(predictions, source, output_path)
         | 
| 514 | 
            +
                elif isinstance(predictions, list):
         | 
| 515 | 
            +
                    predictions = predictions_to_video_annotation(predictions)
         | 
| 516 | 
            +
                    visualizer.visualize_video_annotation(predictions, source, output_path)
         | 
| 517 | 
            +
                else:
         | 
| 518 | 
            +
                    raise TypeError(f"Predictions must be VideoAnnotation or list of Prediction, got {type(predictions)}")
         | 
| 519 | 
            +
             | 
| @@ -174,6 +174,7 @@ can pass them in the terminal before running the script: | |
| 174 174 | 
             
                model = api.nn.deploy(
         | 
| 175 175 | 
             
                    model="{{ experiment.paths.artifacts_dir.path }}/checkpoints/{{ experiment.training.checkpoints.pytorch.name }}",
         | 
| 176 176 | 
             
                    device="cuda:0", # or "cpu"
         | 
| 177 | 
            +
                    workspace_id={{ experiment.project.workspace_id }}
         | 
| 177 178 | 
             
                )
         | 
| 178 179 |  | 
| 179 180 | 
             
                # 3. Predict
         | 
| @@ -321,49 +322,41 @@ API](https://docs.supervisely.com/neural-networks/overview-1/prediction-api){:ta | |
| 321 322 |  | 
| 322 323 | 
             
            ## Tracking Objects in Video
         | 
| 323 324 |  | 
| 324 | 
            -
             | 
| 325 | 
            -
            [BoxMot](https://github.com/mikel-brostrom/boxmot){:target="_blank"} is a
         | 
| 326 | 
            -
            third-party library that implements lightweight neural networks for tracking-by-detection task (when the tracking is
         | 
| 327 | 
            -
            performed on the objects predicted by a separate detector). For `boxmot` models you can use even CPU device.
         | 
| 325 | 
            +
            Supervisely now supports **tracking-by-detection** out of the box. We leverage a lightweight tracking algorithm (such as [BoT-SORT](https://github.com/NirAharon/BoT-SORT){:target="_blank"}) which identifies the unique objects across video frames and assigns IDs to them. This allows us to connect separate detections from different frames into a single track for each object.
         | 
| 328 326 |  | 
| 329 | 
            -
             | 
| 330 | 
            -
             | 
| 331 | 
            -
            ```bash
         | 
| 332 | 
            -
            pip install boxmot
         | 
| 333 | 
            -
            ```
         | 
| 334 | 
            -
             | 
| 335 | 
            -
            Supervisely SDK has the `track()` method from `supervisely.nn.tracking` which allows you to apply `boxmot` models
         | 
| 336 | 
            -
            together with a detector in a single line of code. This method takes two arguments: a `boxmot` tracker, and a
         | 
| 337 | 
            -
            `PredictionSession` of a detector. It returns a `sly.VideoAnnotation` with the tracked objects.
         | 
| 327 | 
            +
            To apply tracking via API, first, deploy your detection model or connect to it, and then use the `predict()` method with `tracking=True` parameter. You can also specify tracking configuration parameters by passing `tracking_config={...}` with your custom settings.
         | 
| 338 328 |  | 
| 339 329 | 
             
            ```python
         | 
| 340 330 | 
             
            import supervisely as sly
         | 
| 341 | 
            -
            from supervisely.nn.tracking import track
         | 
| 342 | 
            -
            import boxmot
         | 
| 343 | 
            -
            from pathlib import Path
         | 
| 344 | 
            -
             | 
| 345 | 
            -
            # Deploy a detector
         | 
| 346 | 
            -
            detector = api.nn.deploy(
         | 
| 347 | 
            -
                model="{{ experiment.model.framework }}/{{ experiment.model.name }}",
         | 
| 348 | 
            -
                device="cuda:0", # Use GPU for detection
         | 
| 349 | 
            -
            )
         | 
| 350 331 |  | 
| 351 | 
            -
             | 
| 352 | 
            -
             | 
| 353 | 
            -
             | 
| 354 | 
            -
             | 
| 332 | 
            +
            api = sly.Api()
         | 
| 333 | 
            +
             | 
| 334 | 
            +
            # Deploy your model
         | 
| 335 | 
            +
            model = api.nn.deploy(
         | 
| 336 | 
            +
                model="{{ experiment.paths.artifacts_dir.path }}/checkpoints/{{ experiment.training.checkpoints.pytorch.name }}",
         | 
| 337 | 
            +
                device="cuda",
         | 
| 338 | 
            +
                workspace_id={{ experiment.project.workspace_id }},
         | 
| 355 339 | 
             
            )
         | 
| 356 340 |  | 
| 357 | 
            -
            #  | 
| 358 | 
            -
             | 
| 359 | 
            -
                video_id= | 
| 360 | 
            -
                 | 
| 361 | 
            -
                 | 
| 341 | 
            +
            # Apply tracking
         | 
| 342 | 
            +
            predictions = model.predict(
         | 
| 343 | 
            +
                video_id=YOUR_VIDEO_ID,  # Video ID in Supervisely
         | 
| 344 | 
            +
                tracking=True,
         | 
| 345 | 
            +
                tracking_config={
         | 
| 346 | 
            +
                    "tracker": "botsort",  # botsort is a powerful tracking algorithm used by default
         | 
| 347 | 
            +
                    # You can pass other tracking parameters here, see the docs for details
         | 
| 348 | 
            +
                }
         | 
| 362 349 | 
             
            )
         | 
| 350 | 
            +
             | 
| 351 | 
            +
            # Processing results
         | 
| 352 | 
            +
            for pred in predictions:
         | 
| 353 | 
            +
                frame_index = pred.frame_index
         | 
| 354 | 
            +
                annotation = pred.annotation
         | 
| 355 | 
            +
                track_ids = pred.track_ids
         | 
| 356 | 
            +
                print(f"Frame {frame_index}: {len(track_ids)} tracks")
         | 
| 363 357 | 
             
            ```
         | 
| 364 358 |  | 
| 365 | 
            -
            >  | 
| 366 | 
            -
            Video](https://docs.supervisely.com/neural-networks/overview-1/prediction-api#tracking-objects-in-video){:target="_blank"}.
         | 
| 359 | 
            +
            > You can also apply trackers in your own code or applications. Read more about this in the docs [Video Object Tracking](https://docs.supervisely.com/neural-networks/overview-1/video-object-tracking){:target="_blank"}.
         | 
| 367 360 |  | 
| 368 361 | 
             
            {% endif %}
         | 
| 369 362 |  | 
| @@ -994,9 +994,12 @@ supervisely/nn/model/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hS | |
| 994 994 | 
             
            supervisely/nn/model/model_api.py,sha256=tSstGGdzm3cHc81P7IoM447sMVesUYJpspzMqUY70pg,10322
         | 
| 995 995 | 
             
            supervisely/nn/model/prediction.py,sha256=5a-SLvxV3h6dp5KUs1ENZBY-rkWpddShjI8DBWQHhtU,11945
         | 
| 996 996 | 
             
            supervisely/nn/model/prediction_session.py,sha256=W7DKIWvufCUQhIdk8q_fTsIOxW_DuZbkr1j_NVcT5gk,27386
         | 
| 997 | 
            -
            supervisely/nn/tracker/__init__.py,sha256= | 
| 997 | 
            +
            supervisely/nn/tracker/__init__.py,sha256=1Pv1zLedcZaTk1BS3ezscQbVizq7vQABlNbLhEhHkOI,326
         | 
| 998 998 | 
             
            supervisely/nn/tracker/base_tracker.py,sha256=2d23JlHizOqVye324YT20EE8RP52uwoQUkPYvPXJTdw,1668
         | 
| 999 | 
            -
            supervisely/nn/tracker/botsort_tracker.py,sha256= | 
| 999 | 
            +
            supervisely/nn/tracker/botsort_tracker.py,sha256=scQIessMnF9xaBFkAkSDnUfY0jYEftpod1nUxMisZXU,10141
         | 
| 1000 | 
            +
            supervisely/nn/tracker/calculate_metrics.py,sha256=JjXI4VYWYSZ5j2Ed81FNYozkS3v2UAM73ztjLrHGg58,10434
         | 
| 1001 | 
            +
            supervisely/nn/tracker/utils.py,sha256=UxGXcMfFgqh7y26oSk9MIeG_lOECn9kbytQWkiGVL2A,10046
         | 
| 1002 | 
            +
            supervisely/nn/tracker/visualize.py,sha256=DY2cnRm4w6_e47xVuF9fwSnOP27ZWTRfv4MFPCyxsD4,20146
         | 
| 1000 1003 | 
             
            supervisely/nn/tracker/botsort/__init__.py,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
         | 
| 1001 1004 | 
             
            supervisely/nn/tracker/botsort/botsort_config.yaml,sha256=q_7Gp1-15lGYOLv7JvxVJ69mm6hbCLbUAl_ZBOYNGpw,535
         | 
| 1002 1005 | 
             
            supervisely/nn/tracker/botsort/osnet_reid/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
         | 
| @@ -1007,7 +1010,7 @@ supervisely/nn/tracker/botsort/tracker/basetrack.py,sha256=oPcXpQWev6YmpjzqWwQKZ | |
| 1007 1010 | 
             
            supervisely/nn/tracker/botsort/tracker/gmc.py,sha256=E6LkkVEYeBUjKaQ7VnYxMAj0fdDDWH8Es_tC1a0Z0rs,11754
         | 
| 1008 1011 | 
             
            supervisely/nn/tracker/botsort/tracker/kalman_filter.py,sha256=waTArMcbmpHAzb57aJ-IcS1o5-i-TX5U-0N_M_28nlo,9671
         | 
| 1009 1012 | 
             
            supervisely/nn/tracker/botsort/tracker/matching.py,sha256=bgnheHwWD3XZSI3OJVfdrU5bYJ44rxPHzzSElfg6LZM,6600
         | 
| 1010 | 
            -
            supervisely/nn/tracker/botsort/tracker/mc_bot_sort.py,sha256= | 
| 1013 | 
            +
            supervisely/nn/tracker/botsort/tracker/mc_bot_sort.py,sha256=dFjWmubyJLrUP4i-CJaOhPEkQD-WD144deW7Ua5a7Rc,17775
         | 
| 1011 1014 | 
             
            supervisely/nn/training/__init__.py,sha256=gY4PCykJ-42MWKsqb9kl-skemKa8yB6t_fb5kzqR66U,111
         | 
| 1012 1015 | 
             
            supervisely/nn/training/train_app.py,sha256=DW9J6qZEebRFdq59sexvEZTWyExisbz7Z2lHXwEkPjY,131937
         | 
| 1013 1016 | 
             
            supervisely/nn/training/gui/__init__.py,sha256=Nqnn8clbgv-5l0PgxcTOldg8mkMKrFn4TvPL-rYUUGg,1
         | 
| @@ -1075,7 +1078,7 @@ supervisely/template/base_generator.py,sha256=3nesbfRpueyRYljQSTnkMjeC8ERTOfjI88 | |
| 1075 1078 | 
             
            supervisely/template/extensions.py,sha256=kTYxu_LrvFyUN3HByCebGq8ra7zUygcEyw4qTUHq3M4,5255
         | 
| 1076 1079 | 
             
            supervisely/template/template_renderer.py,sha256=SzGxRdbP59uxqcZT8kZbaHN2epK8Vjfh-0jKBpkdCBY,9709
         | 
| 1077 1080 | 
             
            supervisely/template/experiment/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
         | 
| 1078 | 
            -
            supervisely/template/experiment/experiment.html.jinja,sha256= | 
| 1081 | 
            +
            supervisely/template/experiment/experiment.html.jinja,sha256=U-PgdHePrEcGklqq286SLcintDQ85rNSy95uFwOjzg4,18947
         | 
| 1079 1082 | 
             
            supervisely/template/experiment/experiment_generator.py,sha256=b0zqP_dt3bvXxjZx6DxTog2-ih-Q7pj4w_5c70WEfjU,44924
         | 
| 1080 1083 | 
             
            supervisely/template/experiment/header.html.jinja,sha256=sWGWeoYeixdLUOnQDjzrfGI8K6vL93dOt7fAgNWqkvY,13751
         | 
| 1081 1084 | 
             
            supervisely/template/experiment/sidebar.html.jinja,sha256=4IxuJzcU1OT93mXMixE7EAMYfcn_lOVfCjS3VkEieSk,9323
         | 
| @@ -1124,9 +1127,9 @@ supervisely/worker_proto/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZ | |
| 1124 1127 | 
             
            supervisely/worker_proto/worker_api_pb2.py,sha256=VQfi5JRBHs2pFCK1snec3JECgGnua3Xjqw_-b3aFxuM,59142
         | 
| 1125 1128 | 
             
            supervisely/worker_proto/worker_api_pb2_grpc.py,sha256=3BwQXOaP9qpdi0Dt9EKG--Lm8KGN0C5AgmUfRv77_Jk,28940
         | 
| 1126 1129 | 
             
            supervisely_lib/__init__.py,sha256=7-3QnN8Zf0wj8NCr2oJmqoQWMKKPKTECvjH9pd2S5vY,159
         | 
| 1127 | 
            -
            supervisely-6.73. | 
| 1128 | 
            -
            supervisely-6.73. | 
| 1129 | 
            -
            supervisely-6.73. | 
| 1130 | 
            -
            supervisely-6.73. | 
| 1131 | 
            -
            supervisely-6.73. | 
| 1132 | 
            -
            supervisely-6.73. | 
| 1130 | 
            +
            supervisely-6.73.429.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
         | 
| 1131 | 
            +
            supervisely-6.73.429.dist-info/METADATA,sha256=bRh0S_1wOEZTgvxz7fyuN4M0Y_Ljtt7dWUpDPAWSQ3U,35433
         | 
| 1132 | 
            +
            supervisely-6.73.429.dist-info/WHEEL,sha256=iAkIy5fosb7FzIOwONchHf19Qu7_1wCWyFNR5gu9nU0,91
         | 
| 1133 | 
            +
            supervisely-6.73.429.dist-info/entry_points.txt,sha256=U96-5Hxrp2ApRjnCoUiUhWMqijqh8zLR03sEhWtAcms,102
         | 
| 1134 | 
            +
            supervisely-6.73.429.dist-info/top_level.txt,sha256=kcFVwb7SXtfqZifrZaSE3owHExX4gcNYe7Q2uoby084,28
         | 
| 1135 | 
            +
            supervisely-6.73.429.dist-info/RECORD,,
         | 
| 
            File without changes
         | 
| 
            File without changes
         | 
| 
            File without changes
         | 
| 
            File without changes
         |