ultralytics 8.3.143__py3-none-any.whl → 8.3.145__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (148) hide show
  1. tests/conftest.py +7 -24
  2. tests/test_cli.py +1 -1
  3. tests/test_cuda.py +7 -2
  4. tests/test_engine.py +7 -8
  5. tests/test_exports.py +16 -16
  6. tests/test_integrations.py +1 -1
  7. tests/test_solutions.py +11 -11
  8. ultralytics/__init__.py +1 -1
  9. ultralytics/cfg/__init__.py +16 -13
  10. ultralytics/data/annotator.py +6 -5
  11. ultralytics/data/augment.py +127 -126
  12. ultralytics/data/base.py +54 -51
  13. ultralytics/data/build.py +47 -23
  14. ultralytics/data/converter.py +47 -43
  15. ultralytics/data/dataset.py +51 -50
  16. ultralytics/data/loaders.py +77 -44
  17. ultralytics/data/split.py +22 -9
  18. ultralytics/data/split_dota.py +63 -39
  19. ultralytics/data/utils.py +59 -39
  20. ultralytics/engine/exporter.py +79 -27
  21. ultralytics/engine/model.py +52 -51
  22. ultralytics/engine/predictor.py +37 -28
  23. ultralytics/engine/results.py +191 -161
  24. ultralytics/engine/trainer.py +36 -19
  25. ultralytics/engine/tuner.py +12 -9
  26. ultralytics/engine/validator.py +7 -9
  27. ultralytics/hub/__init__.py +11 -13
  28. ultralytics/hub/auth.py +22 -2
  29. ultralytics/hub/google/__init__.py +19 -19
  30. ultralytics/hub/session.py +37 -51
  31. ultralytics/hub/utils.py +19 -5
  32. ultralytics/models/fastsam/model.py +30 -12
  33. ultralytics/models/fastsam/predict.py +5 -6
  34. ultralytics/models/fastsam/utils.py +3 -3
  35. ultralytics/models/fastsam/val.py +10 -6
  36. ultralytics/models/nas/model.py +9 -5
  37. ultralytics/models/nas/predict.py +6 -6
  38. ultralytics/models/nas/val.py +3 -3
  39. ultralytics/models/rtdetr/model.py +7 -6
  40. ultralytics/models/rtdetr/predict.py +14 -7
  41. ultralytics/models/rtdetr/train.py +10 -4
  42. ultralytics/models/rtdetr/val.py +36 -9
  43. ultralytics/models/sam/amg.py +30 -12
  44. ultralytics/models/sam/build.py +22 -22
  45. ultralytics/models/sam/model.py +10 -9
  46. ultralytics/models/sam/modules/blocks.py +76 -80
  47. ultralytics/models/sam/modules/decoders.py +6 -8
  48. ultralytics/models/sam/modules/encoders.py +23 -26
  49. ultralytics/models/sam/modules/memory_attention.py +13 -1
  50. ultralytics/models/sam/modules/sam.py +57 -26
  51. ultralytics/models/sam/modules/tiny_encoder.py +232 -237
  52. ultralytics/models/sam/modules/transformer.py +13 -13
  53. ultralytics/models/sam/modules/utils.py +11 -19
  54. ultralytics/models/sam/predict.py +114 -101
  55. ultralytics/models/utils/loss.py +98 -77
  56. ultralytics/models/utils/ops.py +116 -67
  57. ultralytics/models/yolo/classify/predict.py +5 -5
  58. ultralytics/models/yolo/classify/train.py +32 -28
  59. ultralytics/models/yolo/classify/val.py +7 -8
  60. ultralytics/models/yolo/detect/predict.py +1 -0
  61. ultralytics/models/yolo/detect/train.py +15 -14
  62. ultralytics/models/yolo/detect/val.py +37 -36
  63. ultralytics/models/yolo/model.py +106 -23
  64. ultralytics/models/yolo/obb/predict.py +3 -4
  65. ultralytics/models/yolo/obb/train.py +14 -6
  66. ultralytics/models/yolo/obb/val.py +29 -23
  67. ultralytics/models/yolo/pose/predict.py +9 -8
  68. ultralytics/models/yolo/pose/train.py +24 -16
  69. ultralytics/models/yolo/pose/val.py +44 -26
  70. ultralytics/models/yolo/segment/predict.py +5 -5
  71. ultralytics/models/yolo/segment/train.py +11 -7
  72. ultralytics/models/yolo/segment/val.py +2 -2
  73. ultralytics/models/yolo/world/train.py +33 -23
  74. ultralytics/models/yolo/world/train_world.py +11 -3
  75. ultralytics/models/yolo/yoloe/predict.py +11 -11
  76. ultralytics/models/yolo/yoloe/train.py +73 -21
  77. ultralytics/models/yolo/yoloe/train_seg.py +10 -7
  78. ultralytics/models/yolo/yoloe/val.py +42 -18
  79. ultralytics/nn/autobackend.py +59 -15
  80. ultralytics/nn/modules/__init__.py +4 -4
  81. ultralytics/nn/modules/activation.py +4 -1
  82. ultralytics/nn/modules/block.py +178 -111
  83. ultralytics/nn/modules/conv.py +6 -5
  84. ultralytics/nn/modules/head.py +469 -121
  85. ultralytics/nn/modules/transformer.py +147 -58
  86. ultralytics/nn/tasks.py +227 -20
  87. ultralytics/nn/text_model.py +30 -33
  88. ultralytics/solutions/ai_gym.py +4 -6
  89. ultralytics/solutions/analytics.py +7 -4
  90. ultralytics/solutions/config.py +10 -10
  91. ultralytics/solutions/distance_calculation.py +11 -10
  92. ultralytics/solutions/heatmap.py +2 -2
  93. ultralytics/solutions/instance_segmentation.py +7 -4
  94. ultralytics/solutions/object_blurrer.py +3 -3
  95. ultralytics/solutions/object_counter.py +15 -11
  96. ultralytics/solutions/object_cropper.py +3 -2
  97. ultralytics/solutions/parking_management.py +29 -28
  98. ultralytics/solutions/queue_management.py +6 -6
  99. ultralytics/solutions/region_counter.py +10 -3
  100. ultralytics/solutions/security_alarm.py +3 -3
  101. ultralytics/solutions/similarity_search.py +85 -24
  102. ultralytics/solutions/solutions.py +189 -79
  103. ultralytics/solutions/speed_estimation.py +28 -22
  104. ultralytics/solutions/streamlit_inference.py +17 -12
  105. ultralytics/solutions/trackzone.py +4 -4
  106. ultralytics/trackers/basetrack.py +16 -23
  107. ultralytics/trackers/bot_sort.py +30 -20
  108. ultralytics/trackers/byte_tracker.py +70 -64
  109. ultralytics/trackers/track.py +4 -8
  110. ultralytics/trackers/utils/gmc.py +31 -58
  111. ultralytics/trackers/utils/kalman_filter.py +37 -37
  112. ultralytics/trackers/utils/matching.py +1 -1
  113. ultralytics/utils/__init__.py +105 -89
  114. ultralytics/utils/autobatch.py +16 -3
  115. ultralytics/utils/autodevice.py +54 -24
  116. ultralytics/utils/benchmarks.py +45 -29
  117. ultralytics/utils/callbacks/base.py +3 -3
  118. ultralytics/utils/callbacks/clearml.py +9 -9
  119. ultralytics/utils/callbacks/comet.py +67 -25
  120. ultralytics/utils/callbacks/dvc.py +7 -10
  121. ultralytics/utils/callbacks/mlflow.py +2 -5
  122. ultralytics/utils/callbacks/neptune.py +7 -13
  123. ultralytics/utils/callbacks/raytune.py +1 -1
  124. ultralytics/utils/callbacks/tensorboard.py +5 -6
  125. ultralytics/utils/callbacks/wb.py +14 -14
  126. ultralytics/utils/checks.py +14 -13
  127. ultralytics/utils/dist.py +5 -5
  128. ultralytics/utils/downloads.py +94 -67
  129. ultralytics/utils/errors.py +5 -5
  130. ultralytics/utils/export.py +61 -47
  131. ultralytics/utils/files.py +23 -22
  132. ultralytics/utils/instance.py +48 -52
  133. ultralytics/utils/loss.py +78 -40
  134. ultralytics/utils/metrics.py +186 -130
  135. ultralytics/utils/ops.py +186 -190
  136. ultralytics/utils/patches.py +15 -17
  137. ultralytics/utils/plotting.py +71 -27
  138. ultralytics/utils/tal.py +21 -15
  139. ultralytics/utils/torch_utils.py +53 -50
  140. ultralytics/utils/triton.py +5 -4
  141. ultralytics/utils/tuner.py +5 -5
  142. {ultralytics-8.3.143.dist-info → ultralytics-8.3.145.dist-info}/METADATA +2 -2
  143. ultralytics-8.3.145.dist-info/RECORD +272 -0
  144. ultralytics-8.3.143.dist-info/RECORD +0 -272
  145. {ultralytics-8.3.143.dist-info → ultralytics-8.3.145.dist-info}/WHEEL +0 -0
  146. {ultralytics-8.3.143.dist-info → ultralytics-8.3.145.dist-info}/entry_points.txt +0 -0
  147. {ultralytics-8.3.143.dist-info → ultralytics-8.3.145.dist-info}/licenses/LICENSE +0 -0
  148. {ultralytics-8.3.143.dist-info → ultralytics-8.3.145.dist-info}/top_level.txt +0 -0
@@ -2,6 +2,7 @@
2
2
 
3
3
  import math
4
4
  from collections import defaultdict
5
+ from typing import Any, Dict, List, Optional, Tuple
5
6
 
6
7
  import cv2
7
8
  import numpy as np
@@ -18,25 +19,47 @@ class BaseSolution:
18
19
  A base class for managing Ultralytics Solutions.
19
20
 
20
21
  This class provides core functionality for various Ultralytics Solutions, including model loading, object tracking,
21
- and region initialization.
22
+ and region initialization. It serves as the foundation for implementing specific computer vision solutions such as
23
+ object counting, pose estimation, and analytics.
22
24
 
23
25
  Attributes:
24
- LineString (shapely.geometry.LineString): Class for creating line string geometries.
25
- Polygon (shapely.geometry.Polygon): Class for creating polygon geometries.
26
- Point (shapely.geometry.Point): Class for creating point geometries.
27
- CFG (dict): Configuration dictionary loaded from a YAML file and updated with kwargs.
28
- region (List[Tuple[int, int]]): List of coordinate tuples defining a region of interest.
26
+ LineString: Class for creating line string geometries from shapely.
27
+ Polygon: Class for creating polygon geometries from shapely.
28
+ Point: Class for creating point geometries from shapely.
29
+ prep: Prepared geometry function from shapely for optimized spatial operations.
30
+ CFG (Dict[str, Any]): Configuration dictionary loaded from YAML file and updated with kwargs.
31
+ LOGGER: Logger instance for solution-specific logging.
32
+ annotator: Annotator instance for drawing on images.
33
+ tracks: YOLO tracking results from the latest inference.
34
+ track_data: Extracted tracking data (boxes or OBB) from tracks.
35
+ boxes (List): Bounding box coordinates from tracking results.
36
+ clss (List[int]): Class indices from tracking results.
37
+ track_ids (List[int]): Track IDs from tracking results.
38
+ confs (List[float]): Confidence scores from tracking results.
39
+ track_line: Current track line for storing tracking history.
40
+ masks: Segmentation masks from tracking results.
41
+ r_s: Region or line geometry object for spatial operations.
42
+ frame_no (int): Current frame number for logging purposes.
43
+ region (List[Tuple[int, int]]): List of coordinate tuples defining region of interest.
29
44
  line_width (int): Width of lines used in visualizations.
30
- model (ultralytics.YOLO): Loaded YOLO model instance.
45
+ model (YOLO): Loaded YOLO model instance.
31
46
  names (Dict[int, str]): Dictionary mapping class indices to class names.
32
- env_check (bool): Flag indicating whether the environment supports image display.
33
- track_history (collections.defaultdict): Dictionary to store tracking history for each object.
47
+ classes (List[int]): List of class indices to track.
48
+ show_conf (bool): Flag to show confidence scores in annotations.
49
+ show_labels (bool): Flag to show class labels in annotations.
50
+ device (str): Device for model inference.
51
+ track_add_args (Dict[str, Any]): Additional arguments for tracking configuration.
52
+ env_check (bool): Flag indicating whether environment supports image display.
53
+ track_history (defaultdict): Dictionary storing tracking history for each object.
54
+ profilers (Tuple): Profiler instances for performance monitoring.
34
55
 
35
56
  Methods:
36
- extract_tracks: Apply object tracking and extract tracks from an input image.
37
- store_tracking_history: Store object tracking history for a given track ID and bounding box.
38
- initialize_region: Initialize the counting region and line segment based on configuration.
39
- display_output: Display the results of processing, including showing frames or saving results.
57
+ adjust_box_label: Generate formatted label for bounding box.
58
+ extract_tracks: Apply object tracking and extract tracks from input image.
59
+ store_tracking_history: Store object tracking history for given track ID and bounding box.
60
+ initialize_region: Initialize counting region and line segment based on configuration.
61
+ display_output: Display processing results including frames or saved results.
62
+ process: Process method to be implemented by each Solution subclass.
40
63
 
41
64
  Examples:
42
65
  >>> solution = BaseSolution(model="yolo11n.pt", region=[(0, 0), (100, 0), (100, 100), (0, 100)])
@@ -46,12 +69,12 @@ class BaseSolution:
46
69
  >>> solution.display_output(image)
47
70
  """
48
71
 
49
- def __init__(self, is_cli=False, **kwargs):
72
+ def __init__(self, is_cli: bool = False, **kwargs):
50
73
  """
51
- Initializes the BaseSolution class with configuration settings and the YOLO model.
74
+ Initialize the BaseSolution class with configuration settings and YOLO model.
52
75
 
53
76
  Args:
54
- is_cli (bool): Enables CLI mode if set to True.
77
+ is_cli (bool): Enable CLI mode if set to True.
55
78
  **kwargs (Any): Additional configuration parameters that override defaults.
56
79
  """
57
80
  self.CFG = vars(SolutionConfig().update(**kwargs))
@@ -112,9 +135,9 @@ class BaseSolution:
112
135
  ops.Profile(device=self.device), # solution
113
136
  )
114
137
 
115
- def adjust_box_label(self, cls, conf, track_id=None):
138
+ def adjust_box_label(self, cls: int, conf: float, track_id: Optional[int] = None) -> Optional[str]:
116
139
  """
117
- Generates a formatted label for a bounding box.
140
+ Generate a formatted label for a bounding box.
118
141
 
119
142
  This method constructs a label string for a bounding box using the class index and confidence score.
120
143
  Optionally includes the track ID if provided. The label format adapts based on the display settings
@@ -123,17 +146,17 @@ class BaseSolution:
123
146
  Args:
124
147
  cls (int): The class index of the detected object.
125
148
  conf (float): The confidence score of the detection.
126
- track_id (int, optional): The unique identifier for the tracked object. Defaults to None.
149
+ track_id (int, optional): The unique identifier for the tracked object.
127
150
 
128
151
  Returns:
129
- (str or None): The formatted label string if `self.show_labels` is True; otherwise, None.
152
+ (str | None): The formatted label string if `self.show_labels` is True; otherwise, None.
130
153
  """
131
154
  name = ("" if track_id is None else f"{track_id} ") + self.names[cls]
132
155
  return (f"{name} {conf:.2f}" if self.show_conf else name) if self.show_labels else None
133
156
 
134
- def extract_tracks(self, im0):
157
+ def extract_tracks(self, im0: np.ndarray):
135
158
  """
136
- Applies object tracking and extracts tracks from an input image or frame.
159
+ Apply object tracking and extract tracks from an input image or frame.
137
160
 
138
161
  Args:
139
162
  im0 (np.ndarray): The input image or frame.
@@ -146,11 +169,12 @@ class BaseSolution:
146
169
  with self.profilers[0]:
147
170
  self.tracks = self.model.track(
148
171
  source=im0, persist=True, classes=self.classes, verbose=False, **self.track_add_args
149
- )
150
- self.track_data = self.tracks[0].obb or self.tracks[0].boxes # Extract tracks for OBB or object detection
172
+ )[0]
173
+ is_obb = self.tracks.obb is not None
174
+ self.track_data = self.tracks.obb if is_obb else self.tracks.boxes # Extract tracks for OBB or object detection
151
175
 
152
- if self.track_data and self.track_data.id is not None:
153
- self.boxes = self.track_data.xyxy.cpu()
176
+ if self.track_data and self.track_data.is_track:
177
+ self.boxes = (self.track_data.xyxyxyxy if is_obb else self.track_data.xyxy).cpu()
154
178
  self.clss = self.track_data.cls.cpu().tolist()
155
179
  self.track_ids = self.track_data.id.int().cpu().tolist()
156
180
  self.confs = self.track_data.conf.cpu().tolist()
@@ -158,9 +182,9 @@ class BaseSolution:
158
182
  self.LOGGER.warning("no tracks found!")
159
183
  self.boxes, self.clss, self.track_ids, self.confs = [], [], [], []
160
184
 
161
- def store_tracking_history(self, track_id, box):
185
+ def store_tracking_history(self, track_id: int, box):
162
186
  """
163
- Stores the tracking history of an object.
187
+ Store the tracking history of an object.
164
188
 
165
189
  This method updates the tracking history for a given object by appending the center point of its
166
190
  bounding box to the track line. It maintains a maximum of 30 points in the tracking history.
@@ -187,7 +211,7 @@ class BaseSolution:
187
211
  self.Polygon(self.region) if len(self.region) >= 3 else self.LineString(self.region)
188
212
  ) # region or line
189
213
 
190
- def display_output(self, plot_im):
214
+ def display_output(self, plot_im: np.ndarray):
191
215
  """
192
216
  Display the results of the processing, which could involve showing frames, printing counts, or saving results.
193
217
 
@@ -195,7 +219,7 @@ class BaseSolution:
195
219
  the processed frame with annotations, and allows for user interaction to close the display.
196
220
 
197
221
  Args:
198
- plot_im (numpy.ndarray): The image or frame that has been processed and annotated.
222
+ plot_im (np.ndarray): The image or frame that has been processed and annotated.
199
223
 
200
224
  Examples:
201
225
  >>> solution = BaseSolution()
@@ -240,8 +264,8 @@ class SolutionAnnotator(Annotator):
240
264
  A specialized annotator class for visualizing and analyzing computer vision tasks.
241
265
 
242
266
  This class extends the base Annotator class, providing additional methods for drawing regions, centroids, tracking
243
- trails, and visual annotations for Ultralytics Solutions: https://docs.ultralytics.com/solutions/.
244
- and parking management.
267
+ trails, and visual annotations for Ultralytics Solutions. It offers comprehensive visualization capabilities for
268
+ various computer vision applications including object detection, tracking, pose estimation, and analytics.
245
269
 
246
270
  Attributes:
247
271
  im (np.ndarray): The image being annotated.
@@ -252,19 +276,19 @@ class SolutionAnnotator(Annotator):
252
276
  example (str): An example attribute for demonstration purposes.
253
277
 
254
278
  Methods:
255
- draw_region: Draws a region using specified points, colors, and thickness.
256
- queue_counts_display: Displays queue counts in the specified region.
257
- display_analytics: Displays overall statistics for parking lot management.
258
- estimate_pose_angle: Calculates the angle between three points in an object pose.
259
- draw_specific_points: Draws specific keypoints on the image.
260
- plot_workout_information: Draws a labeled text box on the image.
261
- plot_angle_and_count_and_stage: Visualizes angle, step count, and stage for workout monitoring.
262
- plot_distance_and_line: Displays the distance between centroids and connects them with a line.
263
- display_objects_labels: Annotates bounding boxes with object class labels.
264
- sweep_annotator: Visualizes a vertical sweep line and optional label.
265
- visioneye: Maps and connects object centroids to a visual "eye" point.
266
- circle_label: Draws a circular label within a bounding box.
267
- text_label: Draws a rectangular label within a bounding box.
279
+ draw_region: Draw a region using specified points, colors, and thickness.
280
+ queue_counts_display: Display queue counts in the specified region.
281
+ display_analytics: Display overall statistics for parking lot management.
282
+ estimate_pose_angle: Calculate the angle between three points in an object pose.
283
+ draw_specific_kpts: Draw specific keypoints on the image.
284
+ plot_workout_information: Draw a labeled text box on the image.
285
+ plot_angle_and_count_and_stage: Visualize angle, step count, and stage for workout monitoring.
286
+ plot_distance_and_line: Display the distance between centroids and connect them with a line.
287
+ display_objects_labels: Annotate bounding boxes with object class labels.
288
+ sweep_annotator: Visualize a vertical sweep line and optional label.
289
+ visioneye: Map and connect object centroids to a visual "eye" point.
290
+ circle_label: Draw a circular label within a bounding box.
291
+ text_label: Draw a rectangular label within a bounding box.
268
292
 
269
293
  Examples:
270
294
  >>> annotator = SolutionAnnotator(image)
@@ -274,26 +298,39 @@ class SolutionAnnotator(Annotator):
274
298
  ... )
275
299
  """
276
300
 
277
- def __init__(self, im, line_width=None, font_size=None, font="Arial.ttf", pil=False, example="abc"):
301
+ def __init__(
302
+ self,
303
+ im: np.ndarray,
304
+ line_width: Optional[int] = None,
305
+ font_size: Optional[int] = None,
306
+ font: str = "Arial.ttf",
307
+ pil: bool = False,
308
+ example: str = "abc",
309
+ ):
278
310
  """
279
- Initializes the SolutionAnnotator class with an image for annotation.
311
+ Initialize the SolutionAnnotator class with an image for annotation.
280
312
 
281
313
  Args:
282
314
  im (np.ndarray): The image to be annotated.
283
315
  line_width (int, optional): Line thickness for drawing on the image.
284
316
  font_size (int, optional): Font size for text annotations.
285
- font (str, optional): Path to the font file.
286
- pil (bool, optional): Indicates whether to use PIL for rendering text.
287
- example (str, optional): An example parameter for demonstration purposes.
317
+ font (str): Path to the font file.
318
+ pil (bool): Indicates whether to use PIL for rendering text.
319
+ example (str): An example parameter for demonstration purposes.
288
320
  """
289
321
  super().__init__(im, line_width, font_size, font, pil, example)
290
322
 
291
- def draw_region(self, reg_pts=None, color=(0, 255, 0), thickness=5):
323
+ def draw_region(
324
+ self,
325
+ reg_pts: Optional[List[Tuple[int, int]]] = None,
326
+ color: Tuple[int, int, int] = (0, 255, 0),
327
+ thickness: int = 5,
328
+ ):
292
329
  """
293
330
  Draw a region or line on the image.
294
331
 
295
332
  Args:
296
- reg_pts (List[Tuple[int, int]]): Region points (for line 2 points, for region 4+ points).
333
+ reg_pts (List[Tuple[int, int]], optional): Region points (for line 2 points, for region 4+ points).
297
334
  color (Tuple[int, int, int]): RGB color value for the region.
298
335
  thickness (int): Line thickness for drawing the region.
299
336
  """
@@ -303,13 +340,19 @@ class SolutionAnnotator(Annotator):
303
340
  for point in reg_pts:
304
341
  cv2.circle(self.im, (point[0], point[1]), thickness * 2, color, -1) # -1 fills the circle
305
342
 
306
- def queue_counts_display(self, label, points=None, region_color=(255, 255, 255), txt_color=(0, 0, 0)):
343
+ def queue_counts_display(
344
+ self,
345
+ label: str,
346
+ points: Optional[List[Tuple[int, int]]] = None,
347
+ region_color: Tuple[int, int, int] = (255, 255, 255),
348
+ txt_color: Tuple[int, int, int] = (0, 0, 0),
349
+ ):
307
350
  """
308
- Displays queue counts on an image centered at the points with customizable font size and colors.
351
+ Display queue counts on an image centered at the points with customizable font size and colors.
309
352
 
310
353
  Args:
311
354
  label (str): Queue counts label.
312
- points (List[Tuple[int, int]]): Region points for center point calculation to display text.
355
+ points (List[Tuple[int, int]], optional): Region points for center point calculation to display text.
313
356
  region_color (Tuple[int, int, int]): RGB queue region color.
314
357
  txt_color (Tuple[int, int, int]): RGB text display color.
315
358
  """
@@ -343,7 +386,14 @@ class SolutionAnnotator(Annotator):
343
386
  lineType=cv2.LINE_AA,
344
387
  )
345
388
 
346
- def display_analytics(self, im0, text, txt_color, bg_color, margin):
389
+ def display_analytics(
390
+ self,
391
+ im0: np.ndarray,
392
+ text: Dict[str, Any],
393
+ txt_color: Tuple[int, int, int],
394
+ bg_color: Tuple[int, int, int],
395
+ margin: int,
396
+ ):
347
397
  """
348
398
  Display the overall statistics for parking lots, object counter etc.
349
399
 
@@ -373,7 +423,7 @@ class SolutionAnnotator(Annotator):
373
423
  text_y_offset = rect_y2
374
424
 
375
425
  @staticmethod
376
- def estimate_pose_angle(a, b, c):
426
+ def estimate_pose_angle(a: List[float], b: List[float], c: List[float]) -> float:
377
427
  """
378
428
  Calculate the angle between three points for workout monitoring.
379
429
 
@@ -389,20 +439,26 @@ class SolutionAnnotator(Annotator):
389
439
  angle = abs(radians * 180.0 / math.pi)
390
440
  return angle if angle <= 180.0 else (360 - angle)
391
441
 
392
- def draw_specific_kpts(self, keypoints, indices=None, radius=2, conf_thresh=0.25):
442
+ def draw_specific_kpts(
443
+ self,
444
+ keypoints: List[List[float]],
445
+ indices: Optional[List[int]] = None,
446
+ radius: int = 2,
447
+ conf_thresh: float = 0.25,
448
+ ) -> np.ndarray:
393
449
  """
394
450
  Draw specific keypoints for gym steps counting.
395
451
 
396
452
  Args:
397
453
  keypoints (List[List[float]]): Keypoints data to be plotted, each in format [x, y, confidence].
398
454
  indices (List[int], optional): Keypoint indices to be plotted.
399
- radius (int, optional): Keypoint radius.
400
- conf_thresh (float, optional): Confidence threshold for keypoints.
455
+ radius (int): Keypoint radius.
456
+ conf_thresh (float): Confidence threshold for keypoints.
401
457
 
402
458
  Returns:
403
459
  (np.ndarray): Image with drawn keypoints.
404
460
 
405
- Note:
461
+ Notes:
406
462
  Keypoint format: [x, y] or [x, y, confidence].
407
463
  Modifies self.im in-place.
408
464
  """
@@ -419,20 +475,26 @@ class SolutionAnnotator(Annotator):
419
475
 
420
476
  return self.im
421
477
 
422
- def plot_workout_information(self, display_text, position, color=(104, 31, 17), txt_color=(255, 255, 255)):
478
+ def plot_workout_information(
479
+ self,
480
+ display_text: str,
481
+ position: Tuple[int, int],
482
+ color: Tuple[int, int, int] = (104, 31, 17),
483
+ txt_color: Tuple[int, int, int] = (255, 255, 255),
484
+ ) -> int:
423
485
  """
424
486
  Draw workout text with a background on the image.
425
487
 
426
488
  Args:
427
489
  display_text (str): The text to be displayed.
428
490
  position (Tuple[int, int]): Coordinates (x, y) on the image where the text will be placed.
429
- color (Tuple[int, int, int], optional): Text background color.
430
- txt_color (Tuple[int, int, int], optional): Text foreground color.
491
+ color (Tuple[int, int, int]): Text background color.
492
+ txt_color (Tuple[int, int, int]): Text foreground color.
431
493
 
432
494
  Returns:
433
495
  (int): The height of the text.
434
496
  """
435
- (text_width, text_height), _ = cv2.getTextSize(display_text, 0, self.sf, self.tf)
497
+ (text_width, text_height), _ = cv2.getTextSize(display_text, 0, fontScale=self.sf, thickness=self.tf)
436
498
 
437
499
  # Draw background rectangle
438
500
  cv2.rectangle(
@@ -448,7 +510,13 @@ class SolutionAnnotator(Annotator):
448
510
  return text_height
449
511
 
450
512
  def plot_angle_and_count_and_stage(
451
- self, angle_text, count_text, stage_text, center_kpt, color=(104, 31, 17), txt_color=(255, 255, 255)
513
+ self,
514
+ angle_text: str,
515
+ count_text: str,
516
+ stage_text: str,
517
+ center_kpt: List[int],
518
+ color: Tuple[int, int, int] = (104, 31, 17),
519
+ txt_color: Tuple[int, int, int] = (255, 255, 255),
452
520
  ):
453
521
  """
454
522
  Plot the pose angle, count value, and step stage for workout monitoring.
@@ -458,8 +526,8 @@ class SolutionAnnotator(Annotator):
458
526
  count_text (str): Counts value for workout monitoring.
459
527
  stage_text (str): Stage decision for workout monitoring.
460
528
  center_kpt (List[int]): Centroid pose index for workout monitoring.
461
- color (Tuple[int, int, int], optional): Text background color.
462
- txt_color (Tuple[int, int, int], optional): Text foreground color.
529
+ color (Tuple[int, int, int]): Text background color.
530
+ txt_color (Tuple[int, int, int]): Text foreground color.
463
531
  """
464
532
  # Format text
465
533
  angle_text, count_text, stage_text = f" {angle_text:.2f}", f"Steps : {count_text}", f" {stage_text}"
@@ -476,7 +544,11 @@ class SolutionAnnotator(Annotator):
476
544
  )
477
545
 
478
546
  def plot_distance_and_line(
479
- self, pixels_distance, centroids, line_color=(104, 31, 17), centroid_color=(255, 0, 255)
547
+ self,
548
+ pixels_distance: float,
549
+ centroids: List[Tuple[int, int]],
550
+ line_color: Tuple[int, int, int] = (104, 31, 17),
551
+ centroid_color: Tuple[int, int, int] = (255, 0, 255),
480
552
  ):
481
553
  """
482
554
  Plot the distance and line between two centroids on the frame.
@@ -484,8 +556,8 @@ class SolutionAnnotator(Annotator):
484
556
  Args:
485
557
  pixels_distance (float): Pixels distance between two bbox centroids.
486
558
  centroids (List[Tuple[int, int]]): Bounding box centroids data.
487
- line_color (Tuple[int, int, int], optional): Distance line color.
488
- centroid_color (Tuple[int, int, int], optional): Bounding box centroid color.
559
+ line_color (Tuple[int, int, int]): Distance line color.
560
+ centroid_color (Tuple[int, int, int]): Bounding box centroid color.
489
561
  """
490
562
  # Get the text size
491
563
  text = f"Pixels Distance: {pixels_distance:.2f}"
@@ -511,7 +583,16 @@ class SolutionAnnotator(Annotator):
511
583
  cv2.circle(self.im, centroids[0], 6, centroid_color, -1)
512
584
  cv2.circle(self.im, centroids[1], 6, centroid_color, -1)
513
585
 
514
- def display_objects_labels(self, im0, text, txt_color, bg_color, x_center, y_center, margin):
586
+ def display_objects_labels(
587
+ self,
588
+ im0: np.ndarray,
589
+ text: str,
590
+ txt_color: Tuple[int, int, int],
591
+ bg_color: Tuple[int, int, int],
592
+ x_center: float,
593
+ y_center: float,
594
+ margin: int,
595
+ ):
515
596
  """
516
597
  Display the bounding boxes labels in parking management app.
517
598
 
@@ -551,7 +632,14 @@ class SolutionAnnotator(Annotator):
551
632
  lineType=cv2.LINE_AA,
552
633
  )
553
634
 
554
- def sweep_annotator(self, line_x=0, line_y=0, label=None, color=(221, 0, 186), txt_color=(255, 255, 255)):
635
+ def sweep_annotator(
636
+ self,
637
+ line_x: int = 0,
638
+ line_y: int = 0,
639
+ label: Optional[str] = None,
640
+ color: Tuple[int, int, int] = (221, 0, 186),
641
+ txt_color: Tuple[int, int, int] = (255, 255, 255),
642
+ ):
555
643
  """
556
644
  Draw a sweep annotation line and an optional label.
557
645
 
@@ -585,7 +673,13 @@ class SolutionAnnotator(Annotator):
585
673
  self.tf,
586
674
  )
587
675
 
588
- def visioneye(self, box, center_point, color=(235, 219, 11), pin_color=(255, 0, 255)):
676
+ def visioneye(
677
+ self,
678
+ box: List[float],
679
+ center_point: Tuple[int, int],
680
+ color: Tuple[int, int, int] = (235, 219, 11),
681
+ pin_color: Tuple[int, int, int] = (255, 0, 255),
682
+ ):
589
683
  """
590
684
  Perform pinpoint human-vision eye mapping and plotting.
591
685
 
@@ -600,7 +694,14 @@ class SolutionAnnotator(Annotator):
600
694
  cv2.circle(self.im, center_bbox, self.tf * 2, color, -1)
601
695
  cv2.line(self.im, center_point, center_bbox, color, self.tf)
602
696
 
603
- def circle_label(self, box, label="", color=(128, 128, 128), txt_color=(255, 255, 255), margin=2):
697
+ def circle_label(
698
+ self,
699
+ box: Tuple[float, float, float, float],
700
+ label: str = "",
701
+ color: Tuple[int, int, int] = (128, 128, 128),
702
+ txt_color: Tuple[int, int, int] = (255, 255, 255),
703
+ margin: int = 2,
704
+ ):
604
705
  """
605
706
  Draw a label with a background circle centered within a given bounding box.
606
707
 
@@ -638,7 +739,14 @@ class SolutionAnnotator(Annotator):
638
739
  lineType=cv2.LINE_AA,
639
740
  )
640
741
 
641
- def text_label(self, box, label="", color=(128, 128, 128), txt_color=(255, 255, 255), margin=5):
742
+ def text_label(
743
+ self,
744
+ box: Tuple[float, float, float, float],
745
+ label: str = "",
746
+ color: Tuple[int, int, int] = (128, 128, 128),
747
+ txt_color: Tuple[int, int, int] = (255, 255, 255),
748
+ margin: int = 5,
749
+ ):
642
750
  """
643
751
  Draw a label with a background rectangle centered within a given bounding box.
644
752
 
@@ -681,7 +789,8 @@ class SolutionResults:
681
789
  A class to encapsulate the results of Ultralytics Solutions.
682
790
 
683
791
  This class is designed to store and manage various outputs generated by the solution pipeline, including counts,
684
- angles, and workout stages.
792
+ angles, workout stages, and other analytics data. It provides a structured way to access and manipulate results
793
+ from different computer vision solutions such as object counting, pose estimation, and tracking analytics.
685
794
 
686
795
  Attributes:
687
796
  plot_im (np.ndarray): Processed image with counts, blurred, or other effects from solutions.
@@ -697,9 +806,10 @@ class SolutionResults:
697
806
  filled_slots (int): The number of filled slots in a monitored area.
698
807
  email_sent (bool): A flag indicating whether an email notification was sent.
699
808
  total_tracks (int): The total number of tracked objects.
700
- region_counts (dict): The count of objects within a specific region.
809
+ region_counts (Dict): The count of objects within a specific region.
701
810
  speed_dict (Dict[str, float]): A dictionary containing speed information for tracked objects.
702
811
  total_crop_objects (int): Total number of cropped objects using ObjectCropper class.
812
+ speed (Dict): Performance timing information for tracking and solution processing.
703
813
  """
704
814
 
705
815
  def __init__(self, **kwargs):
@@ -730,7 +840,7 @@ class SolutionResults:
730
840
  # Override with user-defined values
731
841
  self.__dict__.update(kwargs)
732
842
 
733
- def __str__(self):
843
+ def __str__(self) -> str:
734
844
  """
735
845
  Return a formatted string representation of the SolutionResults object.
736
846
 
@@ -12,24 +12,29 @@ class SpeedEstimator(BaseSolution):
12
12
  A class to estimate the speed of objects in a real-time video stream based on their tracks.
13
13
 
14
14
  This class extends the BaseSolution class and provides functionality for estimating object speeds using
15
- tracking data in video streams.
15
+ tracking data in video streams. Speed is calculated based on pixel displacement over time and converted
16
+ to real-world units using a configurable meters-per-pixel scale factor.
16
17
 
17
18
  Attributes:
18
- spd (Dict[int, float]): Dictionary storing speed data for tracked objects.
19
- trk_hist (Dict[int, float]): Dictionary storing the object tracking data.
20
- max_hist (int): maximum track history before computing speed
21
- meters_per_pixel (float): Real-world meters represented by one pixel (e.g., 0.04 for 4m over 100px).
22
- max_speed (int): Maximum allowed object speed; values above this will be capped at 120 km/h.
19
+ fps (float): Video frame rate for time calculations.
20
+ frame_count (int): Global frame counter for tracking temporal information.
21
+ trk_frame_ids (dict): Maps track IDs to their first frame index.
22
+ spd (dict): Final speed per object in km/h once locked.
23
+ trk_hist (dict): Maps track IDs to deque of position history.
24
+ locked_ids (set): Track IDs whose speed has been finalized.
25
+ max_hist (int): Required frame history before computing speed.
26
+ meter_per_pixel (float): Real-world meters represented by one pixel for scene scale conversion.
27
+ max_speed (int): Maximum allowed object speed; values above this will be capped.
23
28
 
24
29
  Methods:
25
- initialize_region: Initializes the speed estimation region.
26
- process: Processes input frames to estimate object speeds.
27
- store_tracking_history: Stores the tracking history for an object.
28
- extract_tracks: Extracts tracks from the current frame.
29
- display_output: Displays the output with annotations.
30
+ process: Process input frames to estimate object speeds based on tracking data.
31
+ store_tracking_history: Store the tracking history for an object.
32
+ extract_tracks: Extract tracks from the current frame.
33
+ display_output: Display the output with annotations.
30
34
 
31
35
  Examples:
32
- >>> estimator = SpeedEstimator()
36
+ Initialize speed estimator and process a frame
37
+ >>> estimator = SpeedEstimator(meter_per_pixel=0.04, max_speed=120)
33
38
  >>> frame = cv2.imread("frame.jpg")
34
39
  >>> results = estimator.process(frame)
35
40
  >>> cv2.imshow("Speed Estimation", results.plot_im)
@@ -44,15 +49,15 @@ class SpeedEstimator(BaseSolution):
44
49
  """
45
50
  super().__init__(**kwargs)
46
51
 
47
- self.fps = self.CFG["fps"] # assumed video FPS
48
- self.frame_count = 0 # global frame count
52
+ self.fps = self.CFG["fps"] # Video frame rate for time calculations
53
+ self.frame_count = 0 # Global frame counter
49
54
  self.trk_frame_ids = {} # Track ID → first frame index
50
55
  self.spd = {} # Final speed per object (km/h), once locked
51
56
  self.trk_hist = {} # Track ID → deque of (time, position)
52
57
  self.locked_ids = set() # Track IDs whose speed has been finalized
53
58
  self.max_hist = self.CFG["max_hist"] # Required frame history before computing speed
54
59
  self.meter_per_pixel = self.CFG["meter_per_pixel"] # Scene scale, depends on camera details
55
- self.max_speed = self.CFG["max_speed"] # max_speed adjustment
60
+ self.max_speed = self.CFG["max_speed"] # Maximum speed adjustment
56
61
 
57
62
  def process(self, im0):
58
63
  """
@@ -65,6 +70,7 @@ class SpeedEstimator(BaseSolution):
65
70
  (SolutionResults): Contains processed image `plot_im` and `total_tracks` (number of tracked objects).
66
71
 
67
72
  Examples:
73
+ Process a frame for speed estimation
68
74
  >>> estimator = SpeedEstimator()
69
75
  >>> image = np.random.randint(0, 255, (480, 640, 3), dtype=np.uint8)
70
76
  >>> results = estimator.process(image)
@@ -89,15 +95,15 @@ class SpeedEstimator(BaseSolution):
89
95
  p0, p1 = trk_hist[0], trk_hist[-1] # First and last points of track
90
96
  dt = (self.frame_count - self.trk_frame_ids[track_id]) / self.fps # Time in seconds
91
97
  if dt > 0:
92
- dx, dy = p1[0] - p0[0], p1[1] - p0[1] # pixel displacement
93
- pixel_distance = sqrt(dx * dx + dy * dy) # get pixel distance
94
- meters = pixel_distance * self.meter_per_pixel # convert to meters
98
+ dx, dy = p1[0] - p0[0], p1[1] - p0[1] # Pixel displacement
99
+ pixel_distance = sqrt(dx * dx + dy * dy) # Calculate pixel distance
100
+ meters = pixel_distance * self.meter_per_pixel # Convert to meters
95
101
  self.spd[track_id] = int(
96
102
  min((meters / dt) * 3.6, self.max_speed)
97
- ) # convert to km/h and store final speed
98
- self.locked_ids.add(track_id) # prevent further updates
99
- self.trk_hist.pop(track_id, None) # free memory
100
- self.trk_frame_ids.pop(track_id, None) # optional: remove frame start too
103
+ ) # Convert to km/h and store final speed
104
+ self.locked_ids.add(track_id) # Prevent further updates
105
+ self.trk_hist.pop(track_id, None) # Free memory
106
+ self.trk_frame_ids.pop(track_id, None) # Remove frame start reference
101
107
 
102
108
  if track_id in self.spd:
103
109
  speed_label = f"{self.spd[track_id]} km/h"