PyPI - ultralytics - Versions diffs - 8.3.143__py3-none-any.whl → 8.3.145__py3-none-any.whl - Mend

ultralytics 8.3.143py3-none-any.whl → 8.3.145py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (148) hide show

tests/conftest.py +7 -24
tests/test_cli.py +1 -1
tests/test_cuda.py +7 -2
tests/test_engine.py +7 -8
tests/test_exports.py +16 -16
tests/test_integrations.py +1 -1
tests/test_solutions.py +11 -11
ultralytics/__init__.py +1 -1
ultralytics/cfg/__init__.py +16 -13
ultralytics/data/annotator.py +6 -5
ultralytics/data/augment.py +127 -126
ultralytics/data/base.py +54 -51
ultralytics/data/build.py +47 -23
ultralytics/data/converter.py +47 -43
ultralytics/data/dataset.py +51 -50
ultralytics/data/loaders.py +77 -44
ultralytics/data/split.py +22 -9
ultralytics/data/split_dota.py +63 -39
ultralytics/data/utils.py +59 -39
ultralytics/engine/exporter.py +79 -27
ultralytics/engine/model.py +52 -51
ultralytics/engine/predictor.py +37 -28
ultralytics/engine/results.py +191 -161
ultralytics/engine/trainer.py +36 -19
ultralytics/engine/tuner.py +12 -9
ultralytics/engine/validator.py +7 -9
ultralytics/hub/__init__.py +11 -13
ultralytics/hub/auth.py +22 -2
ultralytics/hub/google/__init__.py +19 -19
ultralytics/hub/session.py +37 -51
ultralytics/hub/utils.py +19 -5
ultralytics/models/fastsam/model.py +30 -12
ultralytics/models/fastsam/predict.py +5 -6
ultralytics/models/fastsam/utils.py +3 -3
ultralytics/models/fastsam/val.py +10 -6
ultralytics/models/nas/model.py +9 -5
ultralytics/models/nas/predict.py +6 -6
ultralytics/models/nas/val.py +3 -3
ultralytics/models/rtdetr/model.py +7 -6
ultralytics/models/rtdetr/predict.py +14 -7
ultralytics/models/rtdetr/train.py +10 -4
ultralytics/models/rtdetr/val.py +36 -9
ultralytics/models/sam/amg.py +30 -12
ultralytics/models/sam/build.py +22 -22
ultralytics/models/sam/model.py +10 -9
ultralytics/models/sam/modules/blocks.py +76 -80
ultralytics/models/sam/modules/decoders.py +6 -8
ultralytics/models/sam/modules/encoders.py +23 -26
ultralytics/models/sam/modules/memory_attention.py +13 -1
ultralytics/models/sam/modules/sam.py +57 -26
ultralytics/models/sam/modules/tiny_encoder.py +232 -237
ultralytics/models/sam/modules/transformer.py +13 -13
ultralytics/models/sam/modules/utils.py +11 -19
ultralytics/models/sam/predict.py +114 -101
ultralytics/models/utils/loss.py +98 -77
ultralytics/models/utils/ops.py +116 -67
ultralytics/models/yolo/classify/predict.py +5 -5
ultralytics/models/yolo/classify/train.py +32 -28
ultralytics/models/yolo/classify/val.py +7 -8
ultralytics/models/yolo/detect/predict.py +1 -0
ultralytics/models/yolo/detect/train.py +15 -14
ultralytics/models/yolo/detect/val.py +37 -36
ultralytics/models/yolo/model.py +106 -23
ultralytics/models/yolo/obb/predict.py +3 -4
ultralytics/models/yolo/obb/train.py +14 -6
ultralytics/models/yolo/obb/val.py +29 -23
ultralytics/models/yolo/pose/predict.py +9 -8
ultralytics/models/yolo/pose/train.py +24 -16
ultralytics/models/yolo/pose/val.py +44 -26
ultralytics/models/yolo/segment/predict.py +5 -5
ultralytics/models/yolo/segment/train.py +11 -7
ultralytics/models/yolo/segment/val.py +2 -2
ultralytics/models/yolo/world/train.py +33 -23
ultralytics/models/yolo/world/train_world.py +11 -3
ultralytics/models/yolo/yoloe/predict.py +11 -11
ultralytics/models/yolo/yoloe/train.py +73 -21
ultralytics/models/yolo/yoloe/train_seg.py +10 -7
ultralytics/models/yolo/yoloe/val.py +42 -18
ultralytics/nn/autobackend.py +59 -15
ultralytics/nn/modules/__init__.py +4 -4
ultralytics/nn/modules/activation.py +4 -1
ultralytics/nn/modules/block.py +178 -111
ultralytics/nn/modules/conv.py +6 -5
ultralytics/nn/modules/head.py +469 -121
ultralytics/nn/modules/transformer.py +147 -58
ultralytics/nn/tasks.py +227 -20
ultralytics/nn/text_model.py +30 -33
ultralytics/solutions/ai_gym.py +4 -6
ultralytics/solutions/analytics.py +7 -4
ultralytics/solutions/config.py +10 -10
ultralytics/solutions/distance_calculation.py +11 -10
ultralytics/solutions/heatmap.py +2 -2
ultralytics/solutions/instance_segmentation.py +7 -4
ultralytics/solutions/object_blurrer.py +3 -3
ultralytics/solutions/object_counter.py +15 -11
ultralytics/solutions/object_cropper.py +3 -2
ultralytics/solutions/parking_management.py +29 -28
ultralytics/solutions/queue_management.py +6 -6
ultralytics/solutions/region_counter.py +10 -3
ultralytics/solutions/security_alarm.py +3 -3
ultralytics/solutions/similarity_search.py +85 -24
ultralytics/solutions/solutions.py +189 -79
ultralytics/solutions/speed_estimation.py +28 -22
ultralytics/solutions/streamlit_inference.py +17 -12
ultralytics/solutions/trackzone.py +4 -4
ultralytics/trackers/basetrack.py +16 -23
ultralytics/trackers/bot_sort.py +30 -20
ultralytics/trackers/byte_tracker.py +70 -64
ultralytics/trackers/track.py +4 -8
ultralytics/trackers/utils/gmc.py +31 -58
ultralytics/trackers/utils/kalman_filter.py +37 -37
ultralytics/trackers/utils/matching.py +1 -1
ultralytics/utils/__init__.py +105 -89
ultralytics/utils/autobatch.py +16 -3
ultralytics/utils/autodevice.py +54 -24
ultralytics/utils/benchmarks.py +45 -29
ultralytics/utils/callbacks/base.py +3 -3
ultralytics/utils/callbacks/clearml.py +9 -9
ultralytics/utils/callbacks/comet.py +67 -25
ultralytics/utils/callbacks/dvc.py +7 -10
ultralytics/utils/callbacks/mlflow.py +2 -5
ultralytics/utils/callbacks/neptune.py +7 -13
ultralytics/utils/callbacks/raytune.py +1 -1
ultralytics/utils/callbacks/tensorboard.py +5 -6
ultralytics/utils/callbacks/wb.py +14 -14
ultralytics/utils/checks.py +14 -13
ultralytics/utils/dist.py +5 -5
ultralytics/utils/downloads.py +94 -67
ultralytics/utils/errors.py +5 -5
ultralytics/utils/export.py +61 -47
ultralytics/utils/files.py +23 -22
ultralytics/utils/instance.py +48 -52
ultralytics/utils/loss.py +78 -40
ultralytics/utils/metrics.py +186 -130
ultralytics/utils/ops.py +186 -190
ultralytics/utils/patches.py +15 -17
ultralytics/utils/plotting.py +71 -27
ultralytics/utils/tal.py +21 -15
ultralytics/utils/torch_utils.py +53 -50
ultralytics/utils/triton.py +5 -4
ultralytics/utils/tuner.py +5 -5
{ultralytics-8.3.143.dist-info → ultralytics-8.3.145.dist-info}/METADATA +2 -2
ultralytics-8.3.145.dist-info/RECORD +272 -0
ultralytics-8.3.143.dist-info/RECORD +0 -272
{ultralytics-8.3.143.dist-info → ultralytics-8.3.145.dist-info}/WHEEL +0 -0
{ultralytics-8.3.143.dist-info → ultralytics-8.3.145.dist-info}/entry_points.txt +0 -0
{ultralytics-8.3.143.dist-info → ultralytics-8.3.145.dist-info}/licenses/LICENSE +0 -0
{ultralytics-8.3.143.dist-info → ultralytics-8.3.145.dist-info}/top_level.txt +0 -0

ultralytics/solutions/solutions.py CHANGED Viewed

@@ -2,6 +2,7 @@
 import math
 from collections import defaultdict
+from typing import Any, Dict, List, Optional, Tuple
 import cv2
 import numpy as np
@@ -18,25 +19,47 @@ class BaseSolution:
     A base class for managing Ultralytics Solutions.
     This class provides core functionality for various Ultralytics Solutions, including model loading, object tracking,
-    and region initialization.
+    and region initialization. It serves as the foundation for implementing specific computer vision solutions such as
+    object counting, pose estimation, and analytics.
     Attributes:
-        LineString (shapely.geometry.LineString): Class for creating line string geometries.
-        Polygon (shapely.geometry.Polygon): Class for creating polygon geometries.
-        Point (shapely.geometry.Point): Class for creating point geometries.
-        CFG (dict): Configuration dictionary loaded from a YAML file and updated with kwargs.
-        region (List[Tuple[int, int]]): List of coordinate tuples defining a region of interest.
+        LineString: Class for creating line string geometries from shapely.
+        Polygon: Class for creating polygon geometries from shapely.
+        Point: Class for creating point geometries from shapely.
+        prep: Prepared geometry function from shapely for optimized spatial operations.
+        CFG (Dict[str, Any]): Configuration dictionary loaded from YAML file and updated with kwargs.
+        LOGGER: Logger instance for solution-specific logging.
+        annotator: Annotator instance for drawing on images.
+        tracks: YOLO tracking results from the latest inference.
+        track_data: Extracted tracking data (boxes or OBB) from tracks.
+        boxes (List): Bounding box coordinates from tracking results.
+        clss (List[int]): Class indices from tracking results.
+        track_ids (List[int]): Track IDs from tracking results.
+        confs (List[float]): Confidence scores from tracking results.
+        track_line: Current track line for storing tracking history.
+        masks: Segmentation masks from tracking results.
+        r_s: Region or line geometry object for spatial operations.
+        frame_no (int): Current frame number for logging purposes.
+        region (List[Tuple[int, int]]): List of coordinate tuples defining region of interest.
         line_width (int): Width of lines used in visualizations.
-        model (ultralytics.YOLO): Loaded YOLO model instance.
+        model (YOLO): Loaded YOLO model instance.
         names (Dict[int, str]): Dictionary mapping class indices to class names.
-        env_check (bool): Flag indicating whether the environment supports image display.
-        track_history (collections.defaultdict): Dictionary to store tracking history for each object.
+        classes (List[int]): List of class indices to track.
+        show_conf (bool): Flag to show confidence scores in annotations.
+        show_labels (bool): Flag to show class labels in annotations.
+        device (str): Device for model inference.
+        track_add_args (Dict[str, Any]): Additional arguments for tracking configuration.
+        env_check (bool): Flag indicating whether environment supports image display.
+        track_history (defaultdict): Dictionary storing tracking history for each object.
+        profilers (Tuple): Profiler instances for performance monitoring.
     Methods:
-        extract_tracks: Apply object tracking and extract tracks from an input image.
-        store_tracking_history: Store object tracking history for a given track ID and bounding box.
-        initialize_region: Initialize the counting region and line segment based on configuration.
-        display_output: Display the results of processing, including showing frames or saving results.
+        adjust_box_label: Generate formatted label for bounding box.
+        extract_tracks: Apply object tracking and extract tracks from input image.
+        store_tracking_history: Store object tracking history for given track ID and bounding box.
+        initialize_region: Initialize counting region and line segment based on configuration.
+        display_output: Display processing results including frames or saved results.
+        process: Process method to be implemented by each Solution subclass.
     Examples:
         >>> solution = BaseSolution(model="yolo11n.pt", region=[(0, 0), (100, 0), (100, 100), (0, 100)])
@@ -46,12 +69,12 @@ class BaseSolution:
         >>> solution.display_output(image)
     """
-    def __init__(self, is_cli=False, **kwargs):
+    def __init__(self, is_cli: bool = False, **kwargs):
         """
-        Initializes the BaseSolution class with configuration settings and the YOLO model.
+        Initialize the BaseSolution class with configuration settings and YOLO model.
         Args:
-            is_cli (bool): Enables CLI mode if set to True.
+            is_cli (bool): Enable CLI mode if set to True.
             **kwargs (Any): Additional configuration parameters that override defaults.
         """
         self.CFG = vars(SolutionConfig().update(**kwargs))
@@ -112,9 +135,9 @@ class BaseSolution:
                 ops.Profile(device=self.device),  # solution
             )
-    def adjust_box_label(self, cls, conf, track_id=None):
+    def adjust_box_label(self, cls: int, conf: float, track_id: Optional[int] = None) -> Optional[str]:
         """
-        Generates a formatted label for a bounding box.
+        Generate a formatted label for a bounding box.
         This method constructs a label string for a bounding box using the class index and confidence score.
         Optionally includes the track ID if provided. The label format adapts based on the display settings
@@ -123,17 +146,17 @@ class BaseSolution:
         Args:
             cls (int): The class index of the detected object.
             conf (float): The confidence score of the detection.
-            track_id (int, optional): The unique identifier for the tracked object. Defaults to None.
+            track_id (int, optional): The unique identifier for the tracked object.
         Returns:
-            (str or None): The formatted label string if `self.show_labels` is True; otherwise, None.
+            (str | None): The formatted label string if `self.show_labels` is True; otherwise, None.
         """
         name = ("" if track_id is None else f"{track_id} ") + self.names[cls]
         return (f"{name} {conf:.2f}" if self.show_conf else name) if self.show_labels else None
-    def extract_tracks(self, im0):
+    def extract_tracks(self, im0: np.ndarray):
         """
-        Applies object tracking and extracts tracks from an input image or frame.
+        Apply object tracking and extract tracks from an input image or frame.
         Args:
             im0 (np.ndarray): The input image or frame.
@@ -146,11 +169,12 @@ class BaseSolution:
         with self.profilers[0]:
             self.tracks = self.model.track(
                 source=im0, persist=True, classes=self.classes, verbose=False, **self.track_add_args
-            )
-        self.track_data = self.tracks[0].obb or self.tracks[0].boxes  # Extract tracks for OBB or object detection
+            )[0]
+        is_obb = self.tracks.obb is not None
+        self.track_data = self.tracks.obb if is_obb else self.tracks.boxes  # Extract tracks for OBB or object detection
-        if self.track_data and self.track_data.id is not None:
-            self.boxes = self.track_data.xyxy.cpu()
+        if self.track_data and self.track_data.is_track:
+            self.boxes = (self.track_data.xyxyxyxy if is_obb else self.track_data.xyxy).cpu()
             self.clss = self.track_data.cls.cpu().tolist()
             self.track_ids = self.track_data.id.int().cpu().tolist()
             self.confs = self.track_data.conf.cpu().tolist()
@@ -158,9 +182,9 @@ class BaseSolution:
             self.LOGGER.warning("no tracks found!")
             self.boxes, self.clss, self.track_ids, self.confs = [], [], [], []
-    def store_tracking_history(self, track_id, box):
+    def store_tracking_history(self, track_id: int, box):
         """
-        Stores the tracking history of an object.
+        Store the tracking history of an object.
         This method updates the tracking history for a given object by appending the center point of its
         bounding box to the track line. It maintains a maximum of 30 points in the tracking history.
@@ -187,7 +211,7 @@ class BaseSolution:
             self.Polygon(self.region) if len(self.region) >= 3 else self.LineString(self.region)
         )  # region or line
-    def display_output(self, plot_im):
+    def display_output(self, plot_im: np.ndarray):
         """
         Display the results of the processing, which could involve showing frames, printing counts, or saving results.
@@ -195,7 +219,7 @@ class BaseSolution:
         the processed frame with annotations, and allows for user interaction to close the display.
         Args:
-            plot_im (numpy.ndarray): The image or frame that has been processed and annotated.
+            plot_im (np.ndarray): The image or frame that has been processed and annotated.
         Examples:
             >>> solution = BaseSolution()
@@ -240,8 +264,8 @@ class SolutionAnnotator(Annotator):
     A specialized annotator class for visualizing and analyzing computer vision tasks.
     This class extends the base Annotator class, providing additional methods for drawing regions, centroids, tracking
-    trails, and visual annotations for Ultralytics Solutions: https://docs.ultralytics.com/solutions/.
-    and parking management.
+    trails, and visual annotations for Ultralytics Solutions. It offers comprehensive visualization capabilities for
+    various computer vision applications including object detection, tracking, pose estimation, and analytics.
     Attributes:
         im (np.ndarray): The image being annotated.
@@ -252,19 +276,19 @@ class SolutionAnnotator(Annotator):
         example (str): An example attribute for demonstration purposes.
     Methods:
-        draw_region: Draws a region using specified points, colors, and thickness.
-        queue_counts_display: Displays queue counts in the specified region.
-        display_analytics: Displays overall statistics for parking lot management.
-        estimate_pose_angle: Calculates the angle between three points in an object pose.
-        draw_specific_points: Draws specific keypoints on the image.
-        plot_workout_information: Draws a labeled text box on the image.
-        plot_angle_and_count_and_stage: Visualizes angle, step count, and stage for workout monitoring.
-        plot_distance_and_line: Displays the distance between centroids and connects them with a line.
-        display_objects_labels: Annotates bounding boxes with object class labels.
-        sweep_annotator: Visualizes a vertical sweep line and optional label.
-        visioneye: Maps and connects object centroids to a visual "eye" point.
-        circle_label: Draws a circular label within a bounding box.
-        text_label: Draws a rectangular label within a bounding box.
+        draw_region: Draw a region using specified points, colors, and thickness.
+        queue_counts_display: Display queue counts in the specified region.
+        display_analytics: Display overall statistics for parking lot management.
+        estimate_pose_angle: Calculate the angle between three points in an object pose.
+        draw_specific_kpts: Draw specific keypoints on the image.
+        plot_workout_information: Draw a labeled text box on the image.
+        plot_angle_and_count_and_stage: Visualize angle, step count, and stage for workout monitoring.
+        plot_distance_and_line: Display the distance between centroids and connect them with a line.
+        display_objects_labels: Annotate bounding boxes with object class labels.
+        sweep_annotator: Visualize a vertical sweep line and optional label.
+        visioneye: Map and connect object centroids to a visual "eye" point.
+        circle_label: Draw a circular label within a bounding box.
+        text_label: Draw a rectangular label within a bounding box.
     Examples:
         >>> annotator = SolutionAnnotator(image)
@@ -274,26 +298,39 @@ class SolutionAnnotator(Annotator):
         ... )
     """
-    def __init__(self, im, line_width=None, font_size=None, font="Arial.ttf", pil=False, example="abc"):
+    def __init__(
+        self,
+        im: np.ndarray,
+        line_width: Optional[int] = None,
+        font_size: Optional[int] = None,
+        font: str = "Arial.ttf",
+        pil: bool = False,
+        example: str = "abc",
+    ):
         """
-        Initializes the SolutionAnnotator class with an image for annotation.
+        Initialize the SolutionAnnotator class with an image for annotation.
         Args:
             im (np.ndarray): The image to be annotated.
             line_width (int, optional): Line thickness for drawing on the image.
             font_size (int, optional): Font size for text annotations.
-            font (str, optional): Path to the font file.
-            pil (bool, optional): Indicates whether to use PIL for rendering text.
-            example (str, optional): An example parameter for demonstration purposes.
+            font (str): Path to the font file.
+            pil (bool): Indicates whether to use PIL for rendering text.
+            example (str): An example parameter for demonstration purposes.
         """
         super().__init__(im, line_width, font_size, font, pil, example)
-    def draw_region(self, reg_pts=None, color=(0, 255, 0), thickness=5):
+    def draw_region(
+        self,
+        reg_pts: Optional[List[Tuple[int, int]]] = None,
+        color: Tuple[int, int, int] = (0, 255, 0),
+        thickness: int = 5,
+    ):
         """
         Draw a region or line on the image.
         Args:
-            reg_pts (List[Tuple[int, int]]): Region points (for line 2 points, for region 4+ points).
+            reg_pts (List[Tuple[int, int]], optional): Region points (for line 2 points, for region 4+ points).
             color (Tuple[int, int, int]): RGB color value for the region.
             thickness (int): Line thickness for drawing the region.
         """
@@ -303,13 +340,19 @@ class SolutionAnnotator(Annotator):
         for point in reg_pts:
             cv2.circle(self.im, (point[0], point[1]), thickness * 2, color, -1)  # -1 fills the circle
-    def queue_counts_display(self, label, points=None, region_color=(255, 255, 255), txt_color=(0, 0, 0)):
+    def queue_counts_display(
+        self,
+        label: str,
+        points: Optional[List[Tuple[int, int]]] = None,
+        region_color: Tuple[int, int, int] = (255, 255, 255),
+        txt_color: Tuple[int, int, int] = (0, 0, 0),
+    ):
         """
-        Displays queue counts on an image centered at the points with customizable font size and colors.
+        Display queue counts on an image centered at the points with customizable font size and colors.
         Args:
             label (str): Queue counts label.
-            points (List[Tuple[int, int]]): Region points for center point calculation to display text.
+            points (List[Tuple[int, int]], optional): Region points for center point calculation to display text.
             region_color (Tuple[int, int, int]): RGB queue region color.
             txt_color (Tuple[int, int, int]): RGB text display color.
         """
@@ -343,7 +386,14 @@ class SolutionAnnotator(Annotator):
             lineType=cv2.LINE_AA,
         )
-    def display_analytics(self, im0, text, txt_color, bg_color, margin):
+    def display_analytics(
+        self,
+        im0: np.ndarray,
+        text: Dict[str, Any],
+        txt_color: Tuple[int, int, int],
+        bg_color: Tuple[int, int, int],
+        margin: int,
+    ):
         """
         Display the overall statistics for parking lots, object counter etc.
@@ -373,7 +423,7 @@ class SolutionAnnotator(Annotator):
             text_y_offset = rect_y2
     @staticmethod
-    def estimate_pose_angle(a, b, c):
+    def estimate_pose_angle(a: List[float], b: List[float], c: List[float]) -> float:
         """
         Calculate the angle between three points for workout monitoring.
@@ -389,20 +439,26 @@ class SolutionAnnotator(Annotator):
         angle = abs(radians * 180.0 / math.pi)
         return angle if angle <= 180.0 else (360 - angle)
-    def draw_specific_kpts(self, keypoints, indices=None, radius=2, conf_thresh=0.25):
+    def draw_specific_kpts(
+        self,
+        keypoints: List[List[float]],
+        indices: Optional[List[int]] = None,
+        radius: int = 2,
+        conf_thresh: float = 0.25,
+    ) -> np.ndarray:
         """
         Draw specific keypoints for gym steps counting.
         Args:
             keypoints (List[List[float]]): Keypoints data to be plotted, each in format [x, y, confidence].
             indices (List[int], optional): Keypoint indices to be plotted.
-            radius (int, optional): Keypoint radius.
-            conf_thresh (float, optional): Confidence threshold for keypoints.
+            radius (int): Keypoint radius.
+            conf_thresh (float): Confidence threshold for keypoints.
         Returns:
             (np.ndarray): Image with drawn keypoints.
-        Note:
+        Notes:
             Keypoint format: [x, y] or [x, y, confidence].
             Modifies self.im in-place.
         """
@@ -419,20 +475,26 @@ class SolutionAnnotator(Annotator):
         return self.im
-    def plot_workout_information(self, display_text, position, color=(104, 31, 17), txt_color=(255, 255, 255)):
+    def plot_workout_information(
+        self,
+        display_text: str,
+        position: Tuple[int, int],
+        color: Tuple[int, int, int] = (104, 31, 17),
+        txt_color: Tuple[int, int, int] = (255, 255, 255),
+    ) -> int:
         """
         Draw workout text with a background on the image.
         Args:
             display_text (str): The text to be displayed.
             position (Tuple[int, int]): Coordinates (x, y) on the image where the text will be placed.
-            color (Tuple[int, int, int], optional): Text background color.
-            txt_color (Tuple[int, int, int], optional): Text foreground color.
+            color (Tuple[int, int, int]): Text background color.
+            txt_color (Tuple[int, int, int]): Text foreground color.
         Returns:
             (int): The height of the text.
         """
-        (text_width, text_height), _ = cv2.getTextSize(display_text, 0, self.sf, self.tf)
+        (text_width, text_height), _ = cv2.getTextSize(display_text, 0, fontScale=self.sf, thickness=self.tf)
         # Draw background rectangle
         cv2.rectangle(
@@ -448,7 +510,13 @@ class SolutionAnnotator(Annotator):
         return text_height
     def plot_angle_and_count_and_stage(
-        self, angle_text, count_text, stage_text, center_kpt, color=(104, 31, 17), txt_color=(255, 255, 255)
+        self,
+        angle_text: str,
+        count_text: str,
+        stage_text: str,
+        center_kpt: List[int],
+        color: Tuple[int, int, int] = (104, 31, 17),
+        txt_color: Tuple[int, int, int] = (255, 255, 255),
     ):
         """
         Plot the pose angle, count value, and step stage for workout monitoring.
@@ -458,8 +526,8 @@ class SolutionAnnotator(Annotator):
             count_text (str): Counts value for workout monitoring.
             stage_text (str): Stage decision for workout monitoring.
             center_kpt (List[int]): Centroid pose index for workout monitoring.
-            color (Tuple[int, int, int], optional): Text background color.
-            txt_color (Tuple[int, int, int], optional): Text foreground color.
+            color (Tuple[int, int, int]): Text background color.
+            txt_color (Tuple[int, int, int]): Text foreground color.
         """
         # Format text
         angle_text, count_text, stage_text = f" {angle_text:.2f}", f"Steps : {count_text}", f" {stage_text}"
@@ -476,7 +544,11 @@ class SolutionAnnotator(Annotator):
         )
     def plot_distance_and_line(
-        self, pixels_distance, centroids, line_color=(104, 31, 17), centroid_color=(255, 0, 255)
+        self,
+        pixels_distance: float,
+        centroids: List[Tuple[int, int]],
+        line_color: Tuple[int, int, int] = (104, 31, 17),
+        centroid_color: Tuple[int, int, int] = (255, 0, 255),
     ):
         """
         Plot the distance and line between two centroids on the frame.
@@ -484,8 +556,8 @@ class SolutionAnnotator(Annotator):
         Args:
             pixels_distance (float): Pixels distance between two bbox centroids.
             centroids (List[Tuple[int, int]]): Bounding box centroids data.
-            line_color (Tuple[int, int, int], optional): Distance line color.
-            centroid_color (Tuple[int, int, int], optional): Bounding box centroid color.
+            line_color (Tuple[int, int, int]): Distance line color.
+            centroid_color (Tuple[int, int, int]): Bounding box centroid color.
         """
         # Get the text size
         text = f"Pixels Distance: {pixels_distance:.2f}"
@@ -511,7 +583,16 @@ class SolutionAnnotator(Annotator):
         cv2.circle(self.im, centroids[0], 6, centroid_color, -1)
         cv2.circle(self.im, centroids[1], 6, centroid_color, -1)
-    def display_objects_labels(self, im0, text, txt_color, bg_color, x_center, y_center, margin):
+    def display_objects_labels(
+        self,
+        im0: np.ndarray,
+        text: str,
+        txt_color: Tuple[int, int, int],
+        bg_color: Tuple[int, int, int],
+        x_center: float,
+        y_center: float,
+        margin: int,
+    ):
         """
         Display the bounding boxes labels in parking management app.
@@ -551,7 +632,14 @@ class SolutionAnnotator(Annotator):
             lineType=cv2.LINE_AA,
         )
-    def sweep_annotator(self, line_x=0, line_y=0, label=None, color=(221, 0, 186), txt_color=(255, 255, 255)):
+    def sweep_annotator(
+        self,
+        line_x: int = 0,
+        line_y: int = 0,
+        label: Optional[str] = None,
+        color: Tuple[int, int, int] = (221, 0, 186),
+        txt_color: Tuple[int, int, int] = (255, 255, 255),
+    ):
         """
         Draw a sweep annotation line and an optional label.
@@ -585,7 +673,13 @@ class SolutionAnnotator(Annotator):
                 self.tf,
             )
-    def visioneye(self, box, center_point, color=(235, 219, 11), pin_color=(255, 0, 255)):
+    def visioneye(
+        self,
+        box: List[float],
+        center_point: Tuple[int, int],
+        color: Tuple[int, int, int] = (235, 219, 11),
+        pin_color: Tuple[int, int, int] = (255, 0, 255),
+    ):
         """
         Perform pinpoint human-vision eye mapping and plotting.
@@ -600,7 +694,14 @@ class SolutionAnnotator(Annotator):
         cv2.circle(self.im, center_bbox, self.tf * 2, color, -1)
         cv2.line(self.im, center_point, center_bbox, color, self.tf)
-    def circle_label(self, box, label="", color=(128, 128, 128), txt_color=(255, 255, 255), margin=2):
+    def circle_label(
+        self,
+        box: Tuple[float, float, float, float],
+        label: str = "",
+        color: Tuple[int, int, int] = (128, 128, 128),
+        txt_color: Tuple[int, int, int] = (255, 255, 255),
+        margin: int = 2,
+    ):
         """
         Draw a label with a background circle centered within a given bounding box.
@@ -638,7 +739,14 @@ class SolutionAnnotator(Annotator):
             lineType=cv2.LINE_AA,
         )
-    def text_label(self, box, label="", color=(128, 128, 128), txt_color=(255, 255, 255), margin=5):
+    def text_label(
+        self,
+        box: Tuple[float, float, float, float],
+        label: str = "",
+        color: Tuple[int, int, int] = (128, 128, 128),
+        txt_color: Tuple[int, int, int] = (255, 255, 255),
+        margin: int = 5,
+    ):
         """
         Draw a label with a background rectangle centered within a given bounding box.
@@ -681,7 +789,8 @@ class SolutionResults:
     A class to encapsulate the results of Ultralytics Solutions.
     This class is designed to store and manage various outputs generated by the solution pipeline, including counts,
-    angles, and workout stages.
+    angles, workout stages, and other analytics data. It provides a structured way to access and manipulate results
+    from different computer vision solutions such as object counting, pose estimation, and tracking analytics.
     Attributes:
         plot_im (np.ndarray): Processed image with counts, blurred, or other effects from solutions.
@@ -697,9 +806,10 @@ class SolutionResults:
         filled_slots (int): The number of filled slots in a monitored area.
         email_sent (bool): A flag indicating whether an email notification was sent.
         total_tracks (int): The total number of tracked objects.
-        region_counts (dict): The count of objects within a specific region.
+        region_counts (Dict): The count of objects within a specific region.
         speed_dict (Dict[str, float]): A dictionary containing speed information for tracked objects.
         total_crop_objects (int): Total number of cropped objects using ObjectCropper class.
+        speed (Dict): Performance timing information for tracking and solution processing.
     """
     def __init__(self, **kwargs):
@@ -730,7 +840,7 @@ class SolutionResults:
         # Override with user-defined values
         self.__dict__.update(kwargs)
-    def __str__(self):
+    def __str__(self) -> str:
         """
         Return a formatted string representation of the SolutionResults object.

ultralytics/solutions/speed_estimation.py CHANGED Viewed

@@ -12,24 +12,29 @@ class SpeedEstimator(BaseSolution):
     A class to estimate the speed of objects in a real-time video stream based on their tracks.
     This class extends the BaseSolution class and provides functionality for estimating object speeds using
-    tracking data in video streams.
+    tracking data in video streams. Speed is calculated based on pixel displacement over time and converted
+    to real-world units using a configurable meters-per-pixel scale factor.
     Attributes:
-        spd (Dict[int, float]): Dictionary storing speed data for tracked objects.
-        trk_hist (Dict[int, float]): Dictionary storing the object tracking data.
-        max_hist (int): maximum track history before computing speed
-        meters_per_pixel (float): Real-world meters represented by one pixel (e.g., 0.04 for 4m over 100px).
-        max_speed (int): Maximum allowed object speed; values above this will be capped at 120 km/h.
+        fps (float): Video frame rate for time calculations.
+        frame_count (int): Global frame counter for tracking temporal information.
+        trk_frame_ids (dict): Maps track IDs to their first frame index.
+        spd (dict): Final speed per object in km/h once locked.
+        trk_hist (dict): Maps track IDs to deque of position history.
+        locked_ids (set): Track IDs whose speed has been finalized.
+        max_hist (int): Required frame history before computing speed.
+        meter_per_pixel (float): Real-world meters represented by one pixel for scene scale conversion.
+        max_speed (int): Maximum allowed object speed; values above this will be capped.
     Methods:
-        initialize_region: Initializes the speed estimation region.
-        process: Processes input frames to estimate object speeds.
-        store_tracking_history: Stores the tracking history for an object.
-        extract_tracks: Extracts tracks from the current frame.
-        display_output: Displays the output with annotations.
+        process: Process input frames to estimate object speeds based on tracking data.
+        store_tracking_history: Store the tracking history for an object.
+        extract_tracks: Extract tracks from the current frame.
+        display_output: Display the output with annotations.
     Examples:
-        >>> estimator = SpeedEstimator()
+        Initialize speed estimator and process a frame
+        >>> estimator = SpeedEstimator(meter_per_pixel=0.04, max_speed=120)
         >>> frame = cv2.imread("frame.jpg")
         >>> results = estimator.process(frame)
         >>> cv2.imshow("Speed Estimation", results.plot_im)
@@ -44,15 +49,15 @@ class SpeedEstimator(BaseSolution):
         """
         super().__init__(**kwargs)
-        self.fps = self.CFG["fps"]  # assumed video FPS
-        self.frame_count = 0  # global frame count
+        self.fps = self.CFG["fps"]  # Video frame rate for time calculations
+        self.frame_count = 0  # Global frame counter
         self.trk_frame_ids = {}  # Track ID → first frame index
         self.spd = {}  # Final speed per object (km/h), once locked
         self.trk_hist = {}  # Track ID → deque of (time, position)
         self.locked_ids = set()  # Track IDs whose speed has been finalized
         self.max_hist = self.CFG["max_hist"]  # Required frame history before computing speed
         self.meter_per_pixel = self.CFG["meter_per_pixel"]  # Scene scale, depends on camera details
-        self.max_speed = self.CFG["max_speed"]  # max_speed adjustment
+        self.max_speed = self.CFG["max_speed"]  # Maximum speed adjustment
     def process(self, im0):
         """
@@ -65,6 +70,7 @@ class SpeedEstimator(BaseSolution):
             (SolutionResults): Contains processed image `plot_im` and `total_tracks` (number of tracked objects).
         Examples:
+            Process a frame for speed estimation
             >>> estimator = SpeedEstimator()
             >>> image = np.random.randint(0, 255, (480, 640, 3), dtype=np.uint8)
             >>> results = estimator.process(image)
@@ -89,15 +95,15 @@ class SpeedEstimator(BaseSolution):
                     p0, p1 = trk_hist[0], trk_hist[-1]  # First and last points of track
                     dt = (self.frame_count - self.trk_frame_ids[track_id]) / self.fps  # Time in seconds
                     if dt > 0:
-                        dx, dy = p1[0] - p0[0], p1[1] - p0[1]  # pixel displacement
-                        pixel_distance = sqrt(dx * dx + dy * dy)  # get pixel distance
-                        meters = pixel_distance * self.meter_per_pixel  # convert to meters
+                        dx, dy = p1[0] - p0[0], p1[1] - p0[1]  # Pixel displacement
+                        pixel_distance = sqrt(dx * dx + dy * dy)  # Calculate pixel distance
+                        meters = pixel_distance * self.meter_per_pixel  # Convert to meters
                         self.spd[track_id] = int(
                             min((meters / dt) * 3.6, self.max_speed)
-                        )  # convert to km/h and store final speed
-                        self.locked_ids.add(track_id)  # prevent further updates
-                        self.trk_hist.pop(track_id, None)  # free memory
-                        self.trk_frame_ids.pop(track_id, None)  # optional: remove frame start too
+                        )  # Convert to km/h and store final speed
+                        self.locked_ids.add(track_id)  # Prevent further updates
+                        self.trk_hist.pop(track_id, None)  # Free memory
+                        self.trk_frame_ids.pop(track_id, None)  # Remove frame start reference
             if track_id in self.spd:
                 speed_label = f"{self.spd[track_id]} km/h"

ultralytics 8.3.143__py3-none-any.whl → 8.3.145__py3-none-any.whl

ultralytics 8.3.143py3-none-any.whl → 8.3.145py3-none-any.whl