ultralytics 8.3.143__py3-none-any.whl → 8.3.144__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (148) hide show
  1. tests/conftest.py +7 -24
  2. tests/test_cli.py +1 -1
  3. tests/test_cuda.py +7 -2
  4. tests/test_engine.py +7 -8
  5. tests/test_exports.py +16 -16
  6. tests/test_integrations.py +1 -1
  7. tests/test_solutions.py +11 -11
  8. ultralytics/__init__.py +1 -1
  9. ultralytics/cfg/__init__.py +16 -13
  10. ultralytics/data/annotator.py +6 -5
  11. ultralytics/data/augment.py +127 -126
  12. ultralytics/data/base.py +54 -51
  13. ultralytics/data/build.py +47 -23
  14. ultralytics/data/converter.py +47 -43
  15. ultralytics/data/dataset.py +51 -50
  16. ultralytics/data/loaders.py +77 -44
  17. ultralytics/data/split.py +22 -9
  18. ultralytics/data/split_dota.py +63 -39
  19. ultralytics/data/utils.py +59 -39
  20. ultralytics/engine/exporter.py +79 -27
  21. ultralytics/engine/model.py +39 -39
  22. ultralytics/engine/predictor.py +37 -28
  23. ultralytics/engine/results.py +187 -157
  24. ultralytics/engine/trainer.py +36 -19
  25. ultralytics/engine/tuner.py +12 -9
  26. ultralytics/engine/validator.py +7 -9
  27. ultralytics/hub/__init__.py +11 -13
  28. ultralytics/hub/auth.py +22 -2
  29. ultralytics/hub/google/__init__.py +19 -19
  30. ultralytics/hub/session.py +37 -51
  31. ultralytics/hub/utils.py +19 -5
  32. ultralytics/models/fastsam/model.py +30 -12
  33. ultralytics/models/fastsam/predict.py +5 -6
  34. ultralytics/models/fastsam/utils.py +3 -3
  35. ultralytics/models/fastsam/val.py +10 -6
  36. ultralytics/models/nas/model.py +9 -5
  37. ultralytics/models/nas/predict.py +6 -6
  38. ultralytics/models/nas/val.py +3 -3
  39. ultralytics/models/rtdetr/model.py +7 -6
  40. ultralytics/models/rtdetr/predict.py +14 -7
  41. ultralytics/models/rtdetr/train.py +10 -4
  42. ultralytics/models/rtdetr/val.py +36 -9
  43. ultralytics/models/sam/amg.py +30 -12
  44. ultralytics/models/sam/build.py +22 -22
  45. ultralytics/models/sam/model.py +10 -9
  46. ultralytics/models/sam/modules/blocks.py +76 -80
  47. ultralytics/models/sam/modules/decoders.py +6 -8
  48. ultralytics/models/sam/modules/encoders.py +23 -26
  49. ultralytics/models/sam/modules/memory_attention.py +13 -1
  50. ultralytics/models/sam/modules/sam.py +57 -26
  51. ultralytics/models/sam/modules/tiny_encoder.py +232 -237
  52. ultralytics/models/sam/modules/transformer.py +13 -13
  53. ultralytics/models/sam/modules/utils.py +11 -19
  54. ultralytics/models/sam/predict.py +114 -101
  55. ultralytics/models/utils/loss.py +98 -77
  56. ultralytics/models/utils/ops.py +116 -67
  57. ultralytics/models/yolo/classify/predict.py +5 -5
  58. ultralytics/models/yolo/classify/train.py +32 -28
  59. ultralytics/models/yolo/classify/val.py +7 -8
  60. ultralytics/models/yolo/detect/predict.py +1 -0
  61. ultralytics/models/yolo/detect/train.py +15 -14
  62. ultralytics/models/yolo/detect/val.py +37 -36
  63. ultralytics/models/yolo/model.py +106 -23
  64. ultralytics/models/yolo/obb/predict.py +3 -4
  65. ultralytics/models/yolo/obb/train.py +14 -6
  66. ultralytics/models/yolo/obb/val.py +29 -23
  67. ultralytics/models/yolo/pose/predict.py +9 -8
  68. ultralytics/models/yolo/pose/train.py +24 -16
  69. ultralytics/models/yolo/pose/val.py +44 -26
  70. ultralytics/models/yolo/segment/predict.py +5 -5
  71. ultralytics/models/yolo/segment/train.py +11 -7
  72. ultralytics/models/yolo/segment/val.py +2 -2
  73. ultralytics/models/yolo/world/train.py +33 -23
  74. ultralytics/models/yolo/world/train_world.py +11 -3
  75. ultralytics/models/yolo/yoloe/predict.py +11 -11
  76. ultralytics/models/yolo/yoloe/train.py +73 -21
  77. ultralytics/models/yolo/yoloe/train_seg.py +10 -7
  78. ultralytics/models/yolo/yoloe/val.py +42 -18
  79. ultralytics/nn/autobackend.py +59 -15
  80. ultralytics/nn/modules/__init__.py +4 -4
  81. ultralytics/nn/modules/activation.py +4 -1
  82. ultralytics/nn/modules/block.py +178 -111
  83. ultralytics/nn/modules/conv.py +6 -5
  84. ultralytics/nn/modules/head.py +469 -121
  85. ultralytics/nn/modules/transformer.py +147 -58
  86. ultralytics/nn/tasks.py +227 -20
  87. ultralytics/nn/text_model.py +30 -33
  88. ultralytics/solutions/ai_gym.py +1 -1
  89. ultralytics/solutions/analytics.py +7 -4
  90. ultralytics/solutions/config.py +10 -10
  91. ultralytics/solutions/distance_calculation.py +11 -10
  92. ultralytics/solutions/heatmap.py +1 -1
  93. ultralytics/solutions/instance_segmentation.py +6 -3
  94. ultralytics/solutions/object_blurrer.py +3 -3
  95. ultralytics/solutions/object_counter.py +15 -7
  96. ultralytics/solutions/object_cropper.py +3 -2
  97. ultralytics/solutions/parking_management.py +29 -28
  98. ultralytics/solutions/queue_management.py +6 -6
  99. ultralytics/solutions/region_counter.py +10 -3
  100. ultralytics/solutions/security_alarm.py +3 -3
  101. ultralytics/solutions/similarity_search.py +85 -24
  102. ultralytics/solutions/solutions.py +184 -75
  103. ultralytics/solutions/speed_estimation.py +28 -22
  104. ultralytics/solutions/streamlit_inference.py +17 -12
  105. ultralytics/solutions/trackzone.py +4 -4
  106. ultralytics/trackers/basetrack.py +16 -23
  107. ultralytics/trackers/bot_sort.py +30 -20
  108. ultralytics/trackers/byte_tracker.py +70 -64
  109. ultralytics/trackers/track.py +4 -8
  110. ultralytics/trackers/utils/gmc.py +31 -58
  111. ultralytics/trackers/utils/kalman_filter.py +37 -37
  112. ultralytics/trackers/utils/matching.py +1 -1
  113. ultralytics/utils/__init__.py +105 -89
  114. ultralytics/utils/autobatch.py +16 -3
  115. ultralytics/utils/autodevice.py +54 -24
  116. ultralytics/utils/benchmarks.py +42 -28
  117. ultralytics/utils/callbacks/base.py +3 -3
  118. ultralytics/utils/callbacks/clearml.py +9 -9
  119. ultralytics/utils/callbacks/comet.py +67 -25
  120. ultralytics/utils/callbacks/dvc.py +7 -10
  121. ultralytics/utils/callbacks/mlflow.py +2 -5
  122. ultralytics/utils/callbacks/neptune.py +7 -13
  123. ultralytics/utils/callbacks/raytune.py +1 -1
  124. ultralytics/utils/callbacks/tensorboard.py +5 -6
  125. ultralytics/utils/callbacks/wb.py +14 -14
  126. ultralytics/utils/checks.py +14 -13
  127. ultralytics/utils/dist.py +5 -5
  128. ultralytics/utils/downloads.py +94 -67
  129. ultralytics/utils/errors.py +5 -5
  130. ultralytics/utils/export.py +61 -47
  131. ultralytics/utils/files.py +23 -22
  132. ultralytics/utils/instance.py +48 -52
  133. ultralytics/utils/loss.py +78 -40
  134. ultralytics/utils/metrics.py +186 -130
  135. ultralytics/utils/ops.py +186 -190
  136. ultralytics/utils/patches.py +15 -17
  137. ultralytics/utils/plotting.py +71 -27
  138. ultralytics/utils/tal.py +21 -15
  139. ultralytics/utils/torch_utils.py +53 -50
  140. ultralytics/utils/triton.py +5 -4
  141. ultralytics/utils/tuner.py +5 -5
  142. {ultralytics-8.3.143.dist-info → ultralytics-8.3.144.dist-info}/METADATA +1 -1
  143. ultralytics-8.3.144.dist-info/RECORD +272 -0
  144. ultralytics-8.3.143.dist-info/RECORD +0 -272
  145. {ultralytics-8.3.143.dist-info → ultralytics-8.3.144.dist-info}/WHEEL +0 -0
  146. {ultralytics-8.3.143.dist-info → ultralytics-8.3.144.dist-info}/entry_points.txt +0 -0
  147. {ultralytics-8.3.143.dist-info → ultralytics-8.3.144.dist-info}/licenses/LICENSE +0 -0
  148. {ultralytics-8.3.143.dist-info → ultralytics-8.3.144.dist-info}/top_level.txt +0 -0
@@ -2,6 +2,7 @@
2
2
 
3
3
  import math
4
4
  from collections import defaultdict
5
+ from typing import Any, Dict, List, Optional, Tuple
5
6
 
6
7
  import cv2
7
8
  import numpy as np
@@ -18,25 +19,47 @@ class BaseSolution:
18
19
  A base class for managing Ultralytics Solutions.
19
20
 
20
21
  This class provides core functionality for various Ultralytics Solutions, including model loading, object tracking,
21
- and region initialization.
22
+ and region initialization. It serves as the foundation for implementing specific computer vision solutions such as
23
+ object counting, pose estimation, and analytics.
22
24
 
23
25
  Attributes:
24
- LineString (shapely.geometry.LineString): Class for creating line string geometries.
25
- Polygon (shapely.geometry.Polygon): Class for creating polygon geometries.
26
- Point (shapely.geometry.Point): Class for creating point geometries.
27
- CFG (dict): Configuration dictionary loaded from a YAML file and updated with kwargs.
28
- region (List[Tuple[int, int]]): List of coordinate tuples defining a region of interest.
26
+ LineString: Class for creating line string geometries from shapely.
27
+ Polygon: Class for creating polygon geometries from shapely.
28
+ Point: Class for creating point geometries from shapely.
29
+ prep: Prepared geometry function from shapely for optimized spatial operations.
30
+ CFG (Dict[str, Any]): Configuration dictionary loaded from YAML file and updated with kwargs.
31
+ LOGGER: Logger instance for solution-specific logging.
32
+ annotator: Annotator instance for drawing on images.
33
+ tracks: YOLO tracking results from the latest inference.
34
+ track_data: Extracted tracking data (boxes or OBB) from tracks.
35
+ boxes (List): Bounding box coordinates from tracking results.
36
+ clss (List[int]): Class indices from tracking results.
37
+ track_ids (List[int]): Track IDs from tracking results.
38
+ confs (List[float]): Confidence scores from tracking results.
39
+ track_line: Current track line for storing tracking history.
40
+ masks: Segmentation masks from tracking results.
41
+ r_s: Region or line geometry object for spatial operations.
42
+ frame_no (int): Current frame number for logging purposes.
43
+ region (List[Tuple[int, int]]): List of coordinate tuples defining region of interest.
29
44
  line_width (int): Width of lines used in visualizations.
30
- model (ultralytics.YOLO): Loaded YOLO model instance.
45
+ model (YOLO): Loaded YOLO model instance.
31
46
  names (Dict[int, str]): Dictionary mapping class indices to class names.
32
- env_check (bool): Flag indicating whether the environment supports image display.
33
- track_history (collections.defaultdict): Dictionary to store tracking history for each object.
47
+ classes (List[int]): List of class indices to track.
48
+ show_conf (bool): Flag to show confidence scores in annotations.
49
+ show_labels (bool): Flag to show class labels in annotations.
50
+ device (str): Device for model inference.
51
+ track_add_args (Dict[str, Any]): Additional arguments for tracking configuration.
52
+ env_check (bool): Flag indicating whether environment supports image display.
53
+ track_history (defaultdict): Dictionary storing tracking history for each object.
54
+ profilers (Tuple): Profiler instances for performance monitoring.
34
55
 
35
56
  Methods:
36
- extract_tracks: Apply object tracking and extract tracks from an input image.
37
- store_tracking_history: Store object tracking history for a given track ID and bounding box.
38
- initialize_region: Initialize the counting region and line segment based on configuration.
39
- display_output: Display the results of processing, including showing frames or saving results.
57
+ adjust_box_label: Generate formatted label for bounding box.
58
+ extract_tracks: Apply object tracking and extract tracks from input image.
59
+ store_tracking_history: Store object tracking history for given track ID and bounding box.
60
+ initialize_region: Initialize counting region and line segment based on configuration.
61
+ display_output: Display processing results including frames or saved results.
62
+ process: Process method to be implemented by each Solution subclass.
40
63
 
41
64
  Examples:
42
65
  >>> solution = BaseSolution(model="yolo11n.pt", region=[(0, 0), (100, 0), (100, 100), (0, 100)])
@@ -46,12 +69,12 @@ class BaseSolution:
46
69
  >>> solution.display_output(image)
47
70
  """
48
71
 
49
- def __init__(self, is_cli=False, **kwargs):
72
+ def __init__(self, is_cli: bool = False, **kwargs):
50
73
  """
51
- Initializes the BaseSolution class with configuration settings and the YOLO model.
74
+ Initialize the BaseSolution class with configuration settings and YOLO model.
52
75
 
53
76
  Args:
54
- is_cli (bool): Enables CLI mode if set to True.
77
+ is_cli (bool): Enable CLI mode if set to True.
55
78
  **kwargs (Any): Additional configuration parameters that override defaults.
56
79
  """
57
80
  self.CFG = vars(SolutionConfig().update(**kwargs))
@@ -112,9 +135,9 @@ class BaseSolution:
112
135
  ops.Profile(device=self.device), # solution
113
136
  )
114
137
 
115
- def adjust_box_label(self, cls, conf, track_id=None):
138
+ def adjust_box_label(self, cls: int, conf: float, track_id: Optional[int] = None) -> Optional[str]:
116
139
  """
117
- Generates a formatted label for a bounding box.
140
+ Generate a formatted label for a bounding box.
118
141
 
119
142
  This method constructs a label string for a bounding box using the class index and confidence score.
120
143
  Optionally includes the track ID if provided. The label format adapts based on the display settings
@@ -123,17 +146,17 @@ class BaseSolution:
123
146
  Args:
124
147
  cls (int): The class index of the detected object.
125
148
  conf (float): The confidence score of the detection.
126
- track_id (int, optional): The unique identifier for the tracked object. Defaults to None.
149
+ track_id (int, optional): The unique identifier for the tracked object.
127
150
 
128
151
  Returns:
129
- (str or None): The formatted label string if `self.show_labels` is True; otherwise, None.
152
+ (str | None): The formatted label string if `self.show_labels` is True; otherwise, None.
130
153
  """
131
154
  name = ("" if track_id is None else f"{track_id} ") + self.names[cls]
132
155
  return (f"{name} {conf:.2f}" if self.show_conf else name) if self.show_labels else None
133
156
 
134
- def extract_tracks(self, im0):
157
+ def extract_tracks(self, im0: np.ndarray):
135
158
  """
136
- Applies object tracking and extracts tracks from an input image or frame.
159
+ Apply object tracking and extract tracks from an input image or frame.
137
160
 
138
161
  Args:
139
162
  im0 (np.ndarray): The input image or frame.
@@ -158,9 +181,9 @@ class BaseSolution:
158
181
  self.LOGGER.warning("no tracks found!")
159
182
  self.boxes, self.clss, self.track_ids, self.confs = [], [], [], []
160
183
 
161
- def store_tracking_history(self, track_id, box):
184
+ def store_tracking_history(self, track_id: int, box):
162
185
  """
163
- Stores the tracking history of an object.
186
+ Store the tracking history of an object.
164
187
 
165
188
  This method updates the tracking history for a given object by appending the center point of its
166
189
  bounding box to the track line. It maintains a maximum of 30 points in the tracking history.
@@ -187,7 +210,7 @@ class BaseSolution:
187
210
  self.Polygon(self.region) if len(self.region) >= 3 else self.LineString(self.region)
188
211
  ) # region or line
189
212
 
190
- def display_output(self, plot_im):
213
+ def display_output(self, plot_im: np.ndarray):
191
214
  """
192
215
  Display the results of the processing, which could involve showing frames, printing counts, or saving results.
193
216
 
@@ -195,7 +218,7 @@ class BaseSolution:
195
218
  the processed frame with annotations, and allows for user interaction to close the display.
196
219
 
197
220
  Args:
198
- plot_im (numpy.ndarray): The image or frame that has been processed and annotated.
221
+ plot_im (np.ndarray): The image or frame that has been processed and annotated.
199
222
 
200
223
  Examples:
201
224
  >>> solution = BaseSolution()
@@ -240,8 +263,8 @@ class SolutionAnnotator(Annotator):
240
263
  A specialized annotator class for visualizing and analyzing computer vision tasks.
241
264
 
242
265
  This class extends the base Annotator class, providing additional methods for drawing regions, centroids, tracking
243
- trails, and visual annotations for Ultralytics Solutions: https://docs.ultralytics.com/solutions/.
244
- and parking management.
266
+ trails, and visual annotations for Ultralytics Solutions. It offers comprehensive visualization capabilities for
267
+ various computer vision applications including object detection, tracking, pose estimation, and analytics.
245
268
 
246
269
  Attributes:
247
270
  im (np.ndarray): The image being annotated.
@@ -252,19 +275,19 @@ class SolutionAnnotator(Annotator):
252
275
  example (str): An example attribute for demonstration purposes.
253
276
 
254
277
  Methods:
255
- draw_region: Draws a region using specified points, colors, and thickness.
256
- queue_counts_display: Displays queue counts in the specified region.
257
- display_analytics: Displays overall statistics for parking lot management.
258
- estimate_pose_angle: Calculates the angle between three points in an object pose.
259
- draw_specific_points: Draws specific keypoints on the image.
260
- plot_workout_information: Draws a labeled text box on the image.
261
- plot_angle_and_count_and_stage: Visualizes angle, step count, and stage for workout monitoring.
262
- plot_distance_and_line: Displays the distance between centroids and connects them with a line.
263
- display_objects_labels: Annotates bounding boxes with object class labels.
264
- sweep_annotator: Visualizes a vertical sweep line and optional label.
265
- visioneye: Maps and connects object centroids to a visual "eye" point.
266
- circle_label: Draws a circular label within a bounding box.
267
- text_label: Draws a rectangular label within a bounding box.
278
+ draw_region: Draw a region using specified points, colors, and thickness.
279
+ queue_counts_display: Display queue counts in the specified region.
280
+ display_analytics: Display overall statistics for parking lot management.
281
+ estimate_pose_angle: Calculate the angle between three points in an object pose.
282
+ draw_specific_kpts: Draw specific keypoints on the image.
283
+ plot_workout_information: Draw a labeled text box on the image.
284
+ plot_angle_and_count_and_stage: Visualize angle, step count, and stage for workout monitoring.
285
+ plot_distance_and_line: Display the distance between centroids and connect them with a line.
286
+ display_objects_labels: Annotate bounding boxes with object class labels.
287
+ sweep_annotator: Visualize a vertical sweep line and optional label.
288
+ visioneye: Map and connect object centroids to a visual "eye" point.
289
+ circle_label: Draw a circular label within a bounding box.
290
+ text_label: Draw a rectangular label within a bounding box.
268
291
 
269
292
  Examples:
270
293
  >>> annotator = SolutionAnnotator(image)
@@ -274,26 +297,39 @@ class SolutionAnnotator(Annotator):
274
297
  ... )
275
298
  """
276
299
 
277
- def __init__(self, im, line_width=None, font_size=None, font="Arial.ttf", pil=False, example="abc"):
300
+ def __init__(
301
+ self,
302
+ im: np.ndarray,
303
+ line_width: Optional[int] = None,
304
+ font_size: Optional[int] = None,
305
+ font: str = "Arial.ttf",
306
+ pil: bool = False,
307
+ example: str = "abc",
308
+ ):
278
309
  """
279
- Initializes the SolutionAnnotator class with an image for annotation.
310
+ Initialize the SolutionAnnotator class with an image for annotation.
280
311
 
281
312
  Args:
282
313
  im (np.ndarray): The image to be annotated.
283
314
  line_width (int, optional): Line thickness for drawing on the image.
284
315
  font_size (int, optional): Font size for text annotations.
285
- font (str, optional): Path to the font file.
286
- pil (bool, optional): Indicates whether to use PIL for rendering text.
287
- example (str, optional): An example parameter for demonstration purposes.
316
+ font (str): Path to the font file.
317
+ pil (bool): Indicates whether to use PIL for rendering text.
318
+ example (str): An example parameter for demonstration purposes.
288
319
  """
289
320
  super().__init__(im, line_width, font_size, font, pil, example)
290
321
 
291
- def draw_region(self, reg_pts=None, color=(0, 255, 0), thickness=5):
322
+ def draw_region(
323
+ self,
324
+ reg_pts: Optional[List[Tuple[int, int]]] = None,
325
+ color: Tuple[int, int, int] = (0, 255, 0),
326
+ thickness: int = 5,
327
+ ):
292
328
  """
293
329
  Draw a region or line on the image.
294
330
 
295
331
  Args:
296
- reg_pts (List[Tuple[int, int]]): Region points (for line 2 points, for region 4+ points).
332
+ reg_pts (List[Tuple[int, int]], optional): Region points (for line 2 points, for region 4+ points).
297
333
  color (Tuple[int, int, int]): RGB color value for the region.
298
334
  thickness (int): Line thickness for drawing the region.
299
335
  """
@@ -303,13 +339,19 @@ class SolutionAnnotator(Annotator):
303
339
  for point in reg_pts:
304
340
  cv2.circle(self.im, (point[0], point[1]), thickness * 2, color, -1) # -1 fills the circle
305
341
 
306
- def queue_counts_display(self, label, points=None, region_color=(255, 255, 255), txt_color=(0, 0, 0)):
342
+ def queue_counts_display(
343
+ self,
344
+ label: str,
345
+ points: Optional[List[Tuple[int, int]]] = None,
346
+ region_color: Tuple[int, int, int] = (255, 255, 255),
347
+ txt_color: Tuple[int, int, int] = (0, 0, 0),
348
+ ):
307
349
  """
308
- Displays queue counts on an image centered at the points with customizable font size and colors.
350
+ Display queue counts on an image centered at the points with customizable font size and colors.
309
351
 
310
352
  Args:
311
353
  label (str): Queue counts label.
312
- points (List[Tuple[int, int]]): Region points for center point calculation to display text.
354
+ points (List[Tuple[int, int]], optional): Region points for center point calculation to display text.
313
355
  region_color (Tuple[int, int, int]): RGB queue region color.
314
356
  txt_color (Tuple[int, int, int]): RGB text display color.
315
357
  """
@@ -343,7 +385,14 @@ class SolutionAnnotator(Annotator):
343
385
  lineType=cv2.LINE_AA,
344
386
  )
345
387
 
346
- def display_analytics(self, im0, text, txt_color, bg_color, margin):
388
+ def display_analytics(
389
+ self,
390
+ im0: np.ndarray,
391
+ text: Dict[str, Any],
392
+ txt_color: Tuple[int, int, int],
393
+ bg_color: Tuple[int, int, int],
394
+ margin: int,
395
+ ):
347
396
  """
348
397
  Display the overall statistics for parking lots, object counter etc.
349
398
 
@@ -373,7 +422,7 @@ class SolutionAnnotator(Annotator):
373
422
  text_y_offset = rect_y2
374
423
 
375
424
  @staticmethod
376
- def estimate_pose_angle(a, b, c):
425
+ def estimate_pose_angle(a: List[float], b: List[float], c: List[float]) -> float:
377
426
  """
378
427
  Calculate the angle between three points for workout monitoring.
379
428
 
@@ -389,20 +438,26 @@ class SolutionAnnotator(Annotator):
389
438
  angle = abs(radians * 180.0 / math.pi)
390
439
  return angle if angle <= 180.0 else (360 - angle)
391
440
 
392
- def draw_specific_kpts(self, keypoints, indices=None, radius=2, conf_thresh=0.25):
441
+ def draw_specific_kpts(
442
+ self,
443
+ keypoints: List[List[float]],
444
+ indices: Optional[List[int]] = None,
445
+ radius: int = 2,
446
+ conf_thresh: float = 0.25,
447
+ ) -> np.ndarray:
393
448
  """
394
449
  Draw specific keypoints for gym steps counting.
395
450
 
396
451
  Args:
397
452
  keypoints (List[List[float]]): Keypoints data to be plotted, each in format [x, y, confidence].
398
453
  indices (List[int], optional): Keypoint indices to be plotted.
399
- radius (int, optional): Keypoint radius.
400
- conf_thresh (float, optional): Confidence threshold for keypoints.
454
+ radius (int): Keypoint radius.
455
+ conf_thresh (float): Confidence threshold for keypoints.
401
456
 
402
457
  Returns:
403
458
  (np.ndarray): Image with drawn keypoints.
404
459
 
405
- Note:
460
+ Notes:
406
461
  Keypoint format: [x, y] or [x, y, confidence].
407
462
  Modifies self.im in-place.
408
463
  """
@@ -419,20 +474,26 @@ class SolutionAnnotator(Annotator):
419
474
 
420
475
  return self.im
421
476
 
422
- def plot_workout_information(self, display_text, position, color=(104, 31, 17), txt_color=(255, 255, 255)):
477
+ def plot_workout_information(
478
+ self,
479
+ display_text: str,
480
+ position: Tuple[int, int],
481
+ color: Tuple[int, int, int] = (104, 31, 17),
482
+ txt_color: Tuple[int, int, int] = (255, 255, 255),
483
+ ) -> int:
423
484
  """
424
485
  Draw workout text with a background on the image.
425
486
 
426
487
  Args:
427
488
  display_text (str): The text to be displayed.
428
489
  position (Tuple[int, int]): Coordinates (x, y) on the image where the text will be placed.
429
- color (Tuple[int, int, int], optional): Text background color.
430
- txt_color (Tuple[int, int, int], optional): Text foreground color.
490
+ color (Tuple[int, int, int]): Text background color.
491
+ txt_color (Tuple[int, int, int]): Text foreground color.
431
492
 
432
493
  Returns:
433
494
  (int): The height of the text.
434
495
  """
435
- (text_width, text_height), _ = cv2.getTextSize(display_text, 0, self.sf, self.tf)
496
+ (text_width, text_height), _ = cv2.getTextSize(display_text, 0, fontScale=self.sf, thickness=self.tf)
436
497
 
437
498
  # Draw background rectangle
438
499
  cv2.rectangle(
@@ -448,7 +509,13 @@ class SolutionAnnotator(Annotator):
448
509
  return text_height
449
510
 
450
511
  def plot_angle_and_count_and_stage(
451
- self, angle_text, count_text, stage_text, center_kpt, color=(104, 31, 17), txt_color=(255, 255, 255)
512
+ self,
513
+ angle_text: str,
514
+ count_text: str,
515
+ stage_text: str,
516
+ center_kpt: List[int],
517
+ color: Tuple[int, int, int] = (104, 31, 17),
518
+ txt_color: Tuple[int, int, int] = (255, 255, 255),
452
519
  ):
453
520
  """
454
521
  Plot the pose angle, count value, and step stage for workout monitoring.
@@ -458,8 +525,8 @@ class SolutionAnnotator(Annotator):
458
525
  count_text (str): Counts value for workout monitoring.
459
526
  stage_text (str): Stage decision for workout monitoring.
460
527
  center_kpt (List[int]): Centroid pose index for workout monitoring.
461
- color (Tuple[int, int, int], optional): Text background color.
462
- txt_color (Tuple[int, int, int], optional): Text foreground color.
528
+ color (Tuple[int, int, int]): Text background color.
529
+ txt_color (Tuple[int, int, int]): Text foreground color.
463
530
  """
464
531
  # Format text
465
532
  angle_text, count_text, stage_text = f" {angle_text:.2f}", f"Steps : {count_text}", f" {stage_text}"
@@ -476,7 +543,11 @@ class SolutionAnnotator(Annotator):
476
543
  )
477
544
 
478
545
  def plot_distance_and_line(
479
- self, pixels_distance, centroids, line_color=(104, 31, 17), centroid_color=(255, 0, 255)
546
+ self,
547
+ pixels_distance: float,
548
+ centroids: List[Tuple[int, int]],
549
+ line_color: Tuple[int, int, int] = (104, 31, 17),
550
+ centroid_color: Tuple[int, int, int] = (255, 0, 255),
480
551
  ):
481
552
  """
482
553
  Plot the distance and line between two centroids on the frame.
@@ -484,8 +555,8 @@ class SolutionAnnotator(Annotator):
484
555
  Args:
485
556
  pixels_distance (float): Pixels distance between two bbox centroids.
486
557
  centroids (List[Tuple[int, int]]): Bounding box centroids data.
487
- line_color (Tuple[int, int, int], optional): Distance line color.
488
- centroid_color (Tuple[int, int, int], optional): Bounding box centroid color.
558
+ line_color (Tuple[int, int, int]): Distance line color.
559
+ centroid_color (Tuple[int, int, int]): Bounding box centroid color.
489
560
  """
490
561
  # Get the text size
491
562
  text = f"Pixels Distance: {pixels_distance:.2f}"
@@ -511,7 +582,16 @@ class SolutionAnnotator(Annotator):
511
582
  cv2.circle(self.im, centroids[0], 6, centroid_color, -1)
512
583
  cv2.circle(self.im, centroids[1], 6, centroid_color, -1)
513
584
 
514
- def display_objects_labels(self, im0, text, txt_color, bg_color, x_center, y_center, margin):
585
+ def display_objects_labels(
586
+ self,
587
+ im0: np.ndarray,
588
+ text: str,
589
+ txt_color: Tuple[int, int, int],
590
+ bg_color: Tuple[int, int, int],
591
+ x_center: float,
592
+ y_center: float,
593
+ margin: int,
594
+ ):
515
595
  """
516
596
  Display the bounding boxes labels in parking management app.
517
597
 
@@ -551,7 +631,14 @@ class SolutionAnnotator(Annotator):
551
631
  lineType=cv2.LINE_AA,
552
632
  )
553
633
 
554
- def sweep_annotator(self, line_x=0, line_y=0, label=None, color=(221, 0, 186), txt_color=(255, 255, 255)):
634
+ def sweep_annotator(
635
+ self,
636
+ line_x: int = 0,
637
+ line_y: int = 0,
638
+ label: Optional[str] = None,
639
+ color: Tuple[int, int, int] = (221, 0, 186),
640
+ txt_color: Tuple[int, int, int] = (255, 255, 255),
641
+ ):
555
642
  """
556
643
  Draw a sweep annotation line and an optional label.
557
644
 
@@ -585,7 +672,13 @@ class SolutionAnnotator(Annotator):
585
672
  self.tf,
586
673
  )
587
674
 
588
- def visioneye(self, box, center_point, color=(235, 219, 11), pin_color=(255, 0, 255)):
675
+ def visioneye(
676
+ self,
677
+ box: List[float],
678
+ center_point: Tuple[int, int],
679
+ color: Tuple[int, int, int] = (235, 219, 11),
680
+ pin_color: Tuple[int, int, int] = (255, 0, 255),
681
+ ):
589
682
  """
590
683
  Perform pinpoint human-vision eye mapping and plotting.
591
684
 
@@ -600,7 +693,14 @@ class SolutionAnnotator(Annotator):
600
693
  cv2.circle(self.im, center_bbox, self.tf * 2, color, -1)
601
694
  cv2.line(self.im, center_point, center_bbox, color, self.tf)
602
695
 
603
- def circle_label(self, box, label="", color=(128, 128, 128), txt_color=(255, 255, 255), margin=2):
696
+ def circle_label(
697
+ self,
698
+ box: Tuple[float, float, float, float],
699
+ label: str = "",
700
+ color: Tuple[int, int, int] = (128, 128, 128),
701
+ txt_color: Tuple[int, int, int] = (255, 255, 255),
702
+ margin: int = 2,
703
+ ):
604
704
  """
605
705
  Draw a label with a background circle centered within a given bounding box.
606
706
 
@@ -638,7 +738,14 @@ class SolutionAnnotator(Annotator):
638
738
  lineType=cv2.LINE_AA,
639
739
  )
640
740
 
641
- def text_label(self, box, label="", color=(128, 128, 128), txt_color=(255, 255, 255), margin=5):
741
+ def text_label(
742
+ self,
743
+ box: Tuple[float, float, float, float],
744
+ label: str = "",
745
+ color: Tuple[int, int, int] = (128, 128, 128),
746
+ txt_color: Tuple[int, int, int] = (255, 255, 255),
747
+ margin: int = 5,
748
+ ):
642
749
  """
643
750
  Draw a label with a background rectangle centered within a given bounding box.
644
751
 
@@ -681,7 +788,8 @@ class SolutionResults:
681
788
  A class to encapsulate the results of Ultralytics Solutions.
682
789
 
683
790
  This class is designed to store and manage various outputs generated by the solution pipeline, including counts,
684
- angles, and workout stages.
791
+ angles, workout stages, and other analytics data. It provides a structured way to access and manipulate results
792
+ from different computer vision solutions such as object counting, pose estimation, and tracking analytics.
685
793
 
686
794
  Attributes:
687
795
  plot_im (np.ndarray): Processed image with counts, blurred, or other effects from solutions.
@@ -697,9 +805,10 @@ class SolutionResults:
697
805
  filled_slots (int): The number of filled slots in a monitored area.
698
806
  email_sent (bool): A flag indicating whether an email notification was sent.
699
807
  total_tracks (int): The total number of tracked objects.
700
- region_counts (dict): The count of objects within a specific region.
808
+ region_counts (Dict): The count of objects within a specific region.
701
809
  speed_dict (Dict[str, float]): A dictionary containing speed information for tracked objects.
702
810
  total_crop_objects (int): Total number of cropped objects using ObjectCropper class.
811
+ speed (Dict): Performance timing information for tracking and solution processing.
703
812
  """
704
813
 
705
814
  def __init__(self, **kwargs):
@@ -730,7 +839,7 @@ class SolutionResults:
730
839
  # Override with user-defined values
731
840
  self.__dict__.update(kwargs)
732
841
 
733
- def __str__(self):
842
+ def __str__(self) -> str:
734
843
  """
735
844
  Return a formatted string representation of the SolutionResults object.
736
845
 
@@ -12,24 +12,29 @@ class SpeedEstimator(BaseSolution):
12
12
  A class to estimate the speed of objects in a real-time video stream based on their tracks.
13
13
 
14
14
  This class extends the BaseSolution class and provides functionality for estimating object speeds using
15
- tracking data in video streams.
15
+ tracking data in video streams. Speed is calculated based on pixel displacement over time and converted
16
+ to real-world units using a configurable meters-per-pixel scale factor.
16
17
 
17
18
  Attributes:
18
- spd (Dict[int, float]): Dictionary storing speed data for tracked objects.
19
- trk_hist (Dict[int, float]): Dictionary storing the object tracking data.
20
- max_hist (int): maximum track history before computing speed
21
- meters_per_pixel (float): Real-world meters represented by one pixel (e.g., 0.04 for 4m over 100px).
22
- max_speed (int): Maximum allowed object speed; values above this will be capped at 120 km/h.
19
+ fps (float): Video frame rate for time calculations.
20
+ frame_count (int): Global frame counter for tracking temporal information.
21
+ trk_frame_ids (dict): Maps track IDs to their first frame index.
22
+ spd (dict): Final speed per object in km/h once locked.
23
+ trk_hist (dict): Maps track IDs to deque of position history.
24
+ locked_ids (set): Track IDs whose speed has been finalized.
25
+ max_hist (int): Required frame history before computing speed.
26
+ meter_per_pixel (float): Real-world meters represented by one pixel for scene scale conversion.
27
+ max_speed (int): Maximum allowed object speed; values above this will be capped.
23
28
 
24
29
  Methods:
25
- initialize_region: Initializes the speed estimation region.
26
- process: Processes input frames to estimate object speeds.
27
- store_tracking_history: Stores the tracking history for an object.
28
- extract_tracks: Extracts tracks from the current frame.
29
- display_output: Displays the output with annotations.
30
+ process: Process input frames to estimate object speeds based on tracking data.
31
+ store_tracking_history: Store the tracking history for an object.
32
+ extract_tracks: Extract tracks from the current frame.
33
+ display_output: Display the output with annotations.
30
34
 
31
35
  Examples:
32
- >>> estimator = SpeedEstimator()
36
+ Initialize speed estimator and process a frame
37
+ >>> estimator = SpeedEstimator(meter_per_pixel=0.04, max_speed=120)
33
38
  >>> frame = cv2.imread("frame.jpg")
34
39
  >>> results = estimator.process(frame)
35
40
  >>> cv2.imshow("Speed Estimation", results.plot_im)
@@ -44,15 +49,15 @@ class SpeedEstimator(BaseSolution):
44
49
  """
45
50
  super().__init__(**kwargs)
46
51
 
47
- self.fps = self.CFG["fps"] # assumed video FPS
48
- self.frame_count = 0 # global frame count
52
+ self.fps = self.CFG["fps"] # Video frame rate for time calculations
53
+ self.frame_count = 0 # Global frame counter
49
54
  self.trk_frame_ids = {} # Track ID → first frame index
50
55
  self.spd = {} # Final speed per object (km/h), once locked
51
56
  self.trk_hist = {} # Track ID → deque of (time, position)
52
57
  self.locked_ids = set() # Track IDs whose speed has been finalized
53
58
  self.max_hist = self.CFG["max_hist"] # Required frame history before computing speed
54
59
  self.meter_per_pixel = self.CFG["meter_per_pixel"] # Scene scale, depends on camera details
55
- self.max_speed = self.CFG["max_speed"] # max_speed adjustment
60
+ self.max_speed = self.CFG["max_speed"] # Maximum speed adjustment
56
61
 
57
62
  def process(self, im0):
58
63
  """
@@ -65,6 +70,7 @@ class SpeedEstimator(BaseSolution):
65
70
  (SolutionResults): Contains processed image `plot_im` and `total_tracks` (number of tracked objects).
66
71
 
67
72
  Examples:
73
+ Process a frame for speed estimation
68
74
  >>> estimator = SpeedEstimator()
69
75
  >>> image = np.random.randint(0, 255, (480, 640, 3), dtype=np.uint8)
70
76
  >>> results = estimator.process(image)
@@ -89,15 +95,15 @@ class SpeedEstimator(BaseSolution):
89
95
  p0, p1 = trk_hist[0], trk_hist[-1] # First and last points of track
90
96
  dt = (self.frame_count - self.trk_frame_ids[track_id]) / self.fps # Time in seconds
91
97
  if dt > 0:
92
- dx, dy = p1[0] - p0[0], p1[1] - p0[1] # pixel displacement
93
- pixel_distance = sqrt(dx * dx + dy * dy) # get pixel distance
94
- meters = pixel_distance * self.meter_per_pixel # convert to meters
98
+ dx, dy = p1[0] - p0[0], p1[1] - p0[1] # Pixel displacement
99
+ pixel_distance = sqrt(dx * dx + dy * dy) # Calculate pixel distance
100
+ meters = pixel_distance * self.meter_per_pixel # Convert to meters
95
101
  self.spd[track_id] = int(
96
102
  min((meters / dt) * 3.6, self.max_speed)
97
- ) # convert to km/h and store final speed
98
- self.locked_ids.add(track_id) # prevent further updates
99
- self.trk_hist.pop(track_id, None) # free memory
100
- self.trk_frame_ids.pop(track_id, None) # optional: remove frame start too
103
+ ) # Convert to km/h and store final speed
104
+ self.locked_ids.add(track_id) # Prevent further updates
105
+ self.trk_hist.pop(track_id, None) # Free memory
106
+ self.trk_frame_ids.pop(track_id, None) # Remove frame start reference
101
107
 
102
108
  if track_id in self.spd:
103
109
  speed_label = f"{self.spd[track_id]} km/h"