ultralytics 8.3.101__py3-none-any.whl → 8.3.103__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. tests/test_exports.py +14 -5
  2. tests/test_solutions.py +140 -76
  3. ultralytics/__init__.py +1 -1
  4. ultralytics/cfg/__init__.py +1 -1
  5. ultralytics/engine/exporter.py +23 -8
  6. ultralytics/engine/tuner.py +8 -2
  7. ultralytics/hub/__init__.py +29 -2
  8. ultralytics/hub/google/__init__.py +18 -1
  9. ultralytics/models/fastsam/predict.py +12 -1
  10. ultralytics/models/nas/predict.py +21 -3
  11. ultralytics/models/rtdetr/val.py +26 -2
  12. ultralytics/models/sam/amg.py +22 -1
  13. ultralytics/models/sam/modules/encoders.py +85 -4
  14. ultralytics/models/sam/modules/memory_attention.py +61 -3
  15. ultralytics/models/sam/modules/utils.py +108 -5
  16. ultralytics/models/utils/loss.py +38 -2
  17. ultralytics/models/utils/ops.py +15 -1
  18. ultralytics/models/yolo/classify/predict.py +11 -1
  19. ultralytics/models/yolo/classify/train.py +17 -1
  20. ultralytics/models/yolo/classify/val.py +82 -6
  21. ultralytics/models/yolo/detect/predict.py +20 -1
  22. ultralytics/models/yolo/model.py +55 -4
  23. ultralytics/models/yolo/obb/predict.py +16 -1
  24. ultralytics/models/yolo/obb/train.py +35 -2
  25. ultralytics/models/yolo/obb/val.py +87 -6
  26. ultralytics/models/yolo/pose/predict.py +18 -1
  27. ultralytics/models/yolo/pose/train.py +48 -3
  28. ultralytics/models/yolo/pose/val.py +113 -8
  29. ultralytics/models/yolo/segment/predict.py +27 -2
  30. ultralytics/models/yolo/segment/train.py +61 -3
  31. ultralytics/models/yolo/segment/val.py +10 -1
  32. ultralytics/models/yolo/world/train_world.py +29 -1
  33. ultralytics/models/yolo/yoloe/train.py +47 -3
  34. ultralytics/nn/autobackend.py +9 -8
  35. ultralytics/nn/modules/activation.py +26 -3
  36. ultralytics/nn/modules/block.py +89 -0
  37. ultralytics/nn/modules/head.py +3 -92
  38. ultralytics/nn/modules/utils.py +70 -4
  39. ultralytics/nn/tasks.py +3 -0
  40. ultralytics/nn/text_model.py +93 -17
  41. ultralytics/solutions/instance_segmentation.py +15 -7
  42. ultralytics/solutions/solutions.py +2 -47
  43. ultralytics/utils/benchmarks.py +1 -1
  44. ultralytics/utils/callbacks/base.py +22 -5
  45. ultralytics/utils/callbacks/comet.py +93 -5
  46. ultralytics/utils/callbacks/dvc.py +64 -5
  47. ultralytics/utils/callbacks/neptune.py +25 -2
  48. ultralytics/utils/callbacks/tensorboard.py +30 -2
  49. ultralytics/utils/callbacks/wb.py +16 -1
  50. ultralytics/utils/dist.py +35 -2
  51. ultralytics/utils/errors.py +27 -6
  52. ultralytics/utils/metrics.py +1 -1
  53. ultralytics/utils/patches.py +33 -5
  54. ultralytics/utils/torch_utils.py +14 -6
  55. ultralytics/utils/triton.py +16 -3
  56. ultralytics/utils/tuner.py +17 -9
  57. {ultralytics-8.3.101.dist-info → ultralytics-8.3.103.dist-info}/METADATA +3 -4
  58. {ultralytics-8.3.101.dist-info → ultralytics-8.3.103.dist-info}/RECORD +62 -62
  59. {ultralytics-8.3.101.dist-info → ultralytics-8.3.103.dist-info}/WHEEL +0 -0
  60. {ultralytics-8.3.101.dist-info → ultralytics-8.3.103.dist-info}/entry_points.txt +0 -0
  61. {ultralytics-8.3.101.dist-info → ultralytics-8.3.103.dist-info}/licenses/LICENSE +0 -0
  62. {ultralytics-8.3.101.dist-info → ultralytics-8.3.103.dist-info}/top_level.txt +0 -0
@@ -6,7 +6,7 @@ from pathlib import Path
6
6
  import torch
7
7
  import torch.nn as nn
8
8
 
9
- from ultralytics.utils import LOGGER, checks
9
+ from ultralytics.utils import checks
10
10
  from ultralytics.utils.torch_utils import smart_inference_mode
11
11
 
12
12
  try:
@@ -59,9 +59,10 @@ class TextModel(nn.Module):
59
59
 
60
60
  class CLIP(TextModel):
61
61
  """
62
- OpenAI CLIP text encoder implementation.
62
+ Implements OpenAI's CLIP (Contrastive Language-Image Pre-training) text encoder.
63
63
 
64
- This class implements the TextModel interface using OpenAI's CLIP model for text encoding.
64
+ This class provides a text encoder based on OpenAI's CLIP model, which can convert text into feature vectors
65
+ that are aligned with corresponding image features in a shared embedding space.
65
66
 
66
67
  Attributes:
67
68
  model (clip.model.CLIP): The loaded CLIP model.
@@ -70,15 +71,33 @@ class CLIP(TextModel):
70
71
  Methods:
71
72
  tokenize: Convert input texts to CLIP tokens.
72
73
  encode_text: Encode tokenized texts into normalized feature vectors.
74
+
75
+ Examples:
76
+ >>> from ultralytics.models.sam import CLIP
77
+ >>> import torch
78
+ >>> device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
79
+ >>> clip_model = CLIP(size="ViT-B/32", device=device)
80
+ >>> tokens = clip_model.tokenize(["a photo of a cat", "a photo of a dog"])
81
+ >>> text_features = clip_model.encode_text(tokens)
82
+ >>> print(text_features.shape)
73
83
  """
74
84
 
75
85
  def __init__(self, size, device):
76
86
  """
77
87
  Initialize the CLIP text encoder.
78
88
 
89
+ This class implements the TextModel interface using OpenAI's CLIP model for text encoding. It loads
90
+ a pre-trained CLIP model of the specified size and prepares it for text encoding tasks.
91
+
79
92
  Args:
80
93
  size (str): Model size identifier (e.g., 'ViT-B/32').
81
94
  device (torch.device): Device to load the model on.
95
+
96
+ Examples:
97
+ >>> import torch
98
+ >>> from ultralytics.models.sam.modules.clip import CLIP
99
+ >>> clip_model = CLIP("ViT-B/32", device=torch.device("cuda:0"))
100
+ >>> text_features = clip_model.encode_text(["a photo of a cat", "a photo of a dog"])
82
101
  """
83
102
  super().__init__()
84
103
  self.model = clip.load(size, device=device)[0]
@@ -87,7 +106,20 @@ class CLIP(TextModel):
87
106
  self.eval()
88
107
 
89
108
  def tokenize(self, texts):
90
- """Convert input texts to CLIP tokens."""
109
+ """
110
+ Convert input texts to CLIP tokens.
111
+
112
+ Args:
113
+ texts (str | List[str]): Input text or list of texts to tokenize.
114
+
115
+ Returns:
116
+ (torch.Tensor): Tokenized text tensor with shape (batch_size, context_length) ready for model processing.
117
+
118
+ Examples:
119
+ >>> model = CLIP("ViT-B/32", device="cpu")
120
+ >>> tokens = model.tokenize("a photo of a cat")
121
+ >>> print(tokens.shape) # torch.Size([1, 77])
122
+ """
91
123
  return clip.tokenize(texts).to(self.device)
92
124
 
93
125
  @smart_inference_mode()
@@ -95,12 +127,22 @@ class CLIP(TextModel):
95
127
  """
96
128
  Encode tokenized texts into normalized feature vectors.
97
129
 
130
+ This method processes tokenized text inputs through the CLIP model to generate feature vectors, which are then
131
+ normalized to unit length. These normalized vectors can be used for text-image similarity comparisons.
132
+
98
133
  Args:
99
- texts (torch.Tensor): Tokenized text inputs.
100
- dtype (torch.dtype): Data type for output features.
134
+ texts (torch.Tensor): Tokenized text inputs, typically created using the tokenize() method.
135
+ dtype (torch.dtype, optional): Data type for output features. Default is torch.float32.
101
136
 
102
137
  Returns:
103
- (torch.Tensor): Normalized text feature vectors.
138
+ (torch.Tensor): Normalized text feature vectors with unit length (L2 norm = 1).
139
+
140
+ Examples:
141
+ >>> clip_model = CLIP("ViT-B/32", device="cuda")
142
+ >>> tokens = clip_model.tokenize(["a photo of a cat", "a photo of a dog"])
143
+ >>> features = clip_model.encode_text(tokens)
144
+ >>> features.shape
145
+ torch.Size([2, 512])
104
146
  """
105
147
  txt_feats = self.model.encode_text(texts).to(dtype)
106
148
  txt_feats = txt_feats / txt_feats.norm(p=2, dim=-1, keepdim=True)
@@ -109,9 +151,10 @@ class CLIP(TextModel):
109
151
 
110
152
  class MobileCLIP(TextModel):
111
153
  """
112
- Apple MobileCLIP text encoder implementation.
154
+ Implement Apple's MobileCLIP text encoder for efficient text encoding.
113
155
 
114
- This class implements the TextModel interface using Apple's MobileCLIP model for efficient text encoding.
156
+ This class implements the TextModel interface using Apple's MobileCLIP model, providing efficient text encoding
157
+ capabilities for vision-language tasks.
115
158
 
116
159
  Attributes:
117
160
  model (mobileclip.model.MobileCLIP): The loaded MobileCLIP model.
@@ -122,6 +165,12 @@ class MobileCLIP(TextModel):
122
165
  Methods:
123
166
  tokenize: Convert input texts to MobileCLIP tokens.
124
167
  encode_text: Encode tokenized texts into normalized feature vectors.
168
+
169
+ Examples:
170
+ >>> device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
171
+ >>> text_encoder = MobileCLIP(size="s0", device=device)
172
+ >>> tokens = text_encoder.tokenize(["a photo of a cat", "a photo of a dog"])
173
+ >>> features = text_encoder.encode_text(tokens)
125
174
  """
126
175
 
127
176
  config_size_map = {"s0": "s0", "s1": "s1", "s2": "s2", "b": "b", "blt": "b"}
@@ -130,9 +179,18 @@ class MobileCLIP(TextModel):
130
179
  """
131
180
  Initialize the MobileCLIP text encoder.
132
181
 
182
+ This class implements the TextModel interface using Apple's MobileCLIP model for efficient text encoding.
183
+
133
184
  Args:
134
185
  size (str): Model size identifier (e.g., 's0', 's1', 's2', 'b', 'blt').
135
186
  device (torch.device): Device to load the model on.
187
+
188
+ Examples:
189
+ >>> from ultralytics.nn.modules import MobileCLIP
190
+ >>> import torch
191
+ >>> model = MobileCLIP("s0", device=torch.device("cpu"))
192
+ >>> tokens = model.tokenize(["a photo of a cat", "a photo of a dog"])
193
+ >>> features = model.encode_text(tokens)
136
194
  """
137
195
  super().__init__()
138
196
  config = self.config_size_map[size]
@@ -148,7 +206,19 @@ class MobileCLIP(TextModel):
148
206
  self.eval()
149
207
 
150
208
  def tokenize(self, texts):
151
- """Convert input texts to MobileCLIP tokens."""
209
+ """
210
+ Convert input texts to MobileCLIP tokens.
211
+
212
+ Args:
213
+ texts (list[str]): List of text strings to tokenize.
214
+
215
+ Returns:
216
+ (torch.Tensor): Tokenized text inputs with shape (batch_size, sequence_length).
217
+
218
+ Examples:
219
+ >>> model = MobileCLIP("s0", "cpu")
220
+ >>> tokens = model.tokenize(["a photo of a cat", "a photo of a dog"])
221
+ """
152
222
  return self.tokenizer(texts).to(self.device)
153
223
 
154
224
  @smart_inference_mode()
@@ -158,10 +228,17 @@ class MobileCLIP(TextModel):
158
228
 
159
229
  Args:
160
230
  texts (torch.Tensor): Tokenized text inputs.
161
- dtype (torch.dtype): Data type for output features.
231
+ dtype (torch.dtype, optional): Data type for output features.
162
232
 
163
233
  Returns:
164
- (torch.Tensor): Normalized text feature vectors.
234
+ (torch.Tensor): Normalized text feature vectors with L2 normalization applied.
235
+
236
+ Examples:
237
+ >>> model = MobileCLIP("s0", device="cpu")
238
+ >>> tokens = model.tokenize(["a photo of a cat", "a photo of a dog"])
239
+ >>> features = model.encode_text(tokens)
240
+ >>> features.shape
241
+ torch.Size([2, 512]) # Actual dimension depends on model size
165
242
  """
166
243
  text_features = self.model.encode_text(texts).to(dtype)
167
244
  text_features /= text_features.norm(p=2, dim=-1, keepdim=True)
@@ -179,15 +256,14 @@ def build_text_model(variant, device=None):
179
256
  Returns:
180
257
  (TextModel): Instantiated text encoding model.
181
258
 
182
- Raises:
183
- AssertionError: If the specified variant is not supported.
259
+ Examples:
260
+ >>> model = build_text_model("clip:ViT-B/32", device=torch.device("cuda"))
261
+ >>> model = build_text_model("mobileclip:s0", device=torch.device("cpu"))
184
262
  """
185
- LOGGER.info(f"Build text model {variant}")
186
263
  base, size = variant.split(":")
187
264
  if base == "clip":
188
265
  return CLIP(size, device)
189
266
  elif base == "mobileclip":
190
267
  return MobileCLIP(size, device)
191
268
  else:
192
- print("Variant not found")
193
- assert False
269
+ raise ValueError(f"Unrecognized base model: '{base}'. Supported base models: 'clip', 'mobileclip'.")
@@ -1,7 +1,7 @@
1
1
  # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
2
2
 
3
- from ultralytics.solutions.solutions import BaseSolution, SolutionAnnotator, SolutionResults
4
- from ultralytics.utils.plotting import colors
3
+ from ultralytics.engine.results import Results
4
+ from ultralytics.solutions.solutions import BaseSolution, SolutionResults
5
5
 
6
6
 
7
7
  class InstanceSegmentation(BaseSolution):
@@ -41,6 +41,10 @@ class InstanceSegmentation(BaseSolution):
41
41
  kwargs["model"] = kwargs.get("model", "yolo11n-seg.pt")
42
42
  super().__init__(**kwargs)
43
43
 
44
+ self.show_conf = self.CFG.get("show_conf", True)
45
+ self.show_labels = self.CFG.get("show_labels", True)
46
+ self.show_boxes = self.CFG.get("show_boxes", True)
47
+
44
48
  def process(self, im0):
45
49
  """
46
50
  Perform instance segmentation on the input image and annotate the results.
@@ -58,17 +62,21 @@ class InstanceSegmentation(BaseSolution):
58
62
  >>> print(summary)
59
63
  """
60
64
  self.extract_tracks(im0) # Extract tracks (bounding boxes, classes, and masks)
61
- annotator = SolutionAnnotator(im0, self.line_width)
62
65
 
63
66
  # Iterate over detected classes, track IDs, and segmentation masks
64
67
  if self.masks is None:
65
68
  self.LOGGER.warning("⚠️ No masks detected! Ensure you're using a supported Ultralytics segmentation model.")
69
+ plot_im = im0
66
70
  else:
67
- for cls, t_id, mask in zip(self.clss, self.track_ids, self.masks):
68
- # Annotate the image with segmentation mask, mask color, and label
69
- annotator.segmentation_mask(mask=mask, mask_color=colors(t_id, True), label=self.names[cls])
71
+ results = Results(im0, path=None, names=self.names, boxes=self.track_data.data, masks=self.masks.data)
72
+ plot_im = results.plot(
73
+ line_width=self.line_width,
74
+ boxes=self.show_boxes,
75
+ conf=self.show_conf,
76
+ labels=self.show_labels,
77
+ color_mode="instance",
78
+ )
70
79
 
71
- plot_im = annotator.result()
72
80
  self.display_output(plot_im) # Display the annotated output using the base class function
73
81
 
74
82
  # Return SolutionResults
@@ -52,7 +52,7 @@ class BaseSolution:
52
52
  is_cli (bool): Enables CLI mode if set to True.
53
53
  **kwargs (Any): Additional configuration parameters that override defaults.
54
54
  """
55
- check_requirements("shapely>=2.0.0")
55
+ check_requirements("shapely>=2.0.0,<2.1.0")
56
56
  from shapely.geometry import LineString, Point, Polygon
57
57
  from shapely.prepared import prep
58
58
 
@@ -122,7 +122,7 @@ class BaseSolution:
122
122
  self.track_data = self.tracks[0].obb or self.tracks[0].boxes # Extract tracks for OBB or object detection
123
123
 
124
124
  self.masks = (
125
- self.tracks[0].masks.xy if hasattr(self.tracks[0], "masks") and self.tracks[0].masks is not None else None
125
+ self.tracks[0].masks if hasattr(self.tracks[0], "masks") and self.tracks[0].masks is not None else None
126
126
  )
127
127
 
128
128
  if self.track_data and self.track_data.id is not None:
@@ -225,7 +225,6 @@ class SolutionAnnotator(Annotator):
225
225
  plot_angle_and_count_and_stage: Visualizes angle, step count, and stage for workout monitoring.
226
226
  plot_distance_and_line: Displays the distance between centroids and connects them with a line.
227
227
  display_objects_labels: Annotates bounding boxes with object class labels.
228
- segmentation_mask: Draws mask for segmented objects and optionally labels them.
229
228
  sweep_annotator: Visualizes a vertical sweep line and optional label.
230
229
  visioneye: Maps and connects object centroids to a visual "eye" point.
231
230
  circle_label: Draws a circular label within a bounding box.
@@ -519,50 +518,6 @@ class SolutionAnnotator(Annotator):
519
518
  lineType=cv2.LINE_AA,
520
519
  )
521
520
 
522
- def segmentation_mask(self, mask, mask_color=(255, 0, 255), label=None, alpha=0.5):
523
- """
524
- Draw an optimized segmentation mask with smooth corners, highlighted edge, and dynamic text box size.
525
-
526
- Args:
527
- mask (np.ndarray): A 2D array of shape (N, 2) containing the object mask.
528
- mask_color (Tuple[int, int, int]): RGB color for the mask.
529
- label (str, optional): Text label for the object.
530
- alpha (float): Transparency level (0 = fully transparent, 1 = fully opaque).
531
- """
532
- if mask.size == 0:
533
- return
534
-
535
- overlay = self.im.copy()
536
- mask = np.int32([mask])
537
-
538
- # Approximate polygon for smooth corners with epsilon
539
- refined_mask = cv2.approxPolyDP(mask, 0.002 * cv2.arcLength(mask, True), True)
540
-
541
- # Apply a highlighter effect by drawing a thick outer shadow
542
- cv2.polylines(overlay, [refined_mask], isClosed=True, color=mask_color, thickness=self.lw * 3)
543
- cv2.fillPoly(overlay, [refined_mask], mask_color) # draw mask with primary color
544
-
545
- # Apply an inner glow effect for extra clarity
546
- cv2.polylines(overlay, [refined_mask], isClosed=True, color=mask_color, thickness=self.lw)
547
-
548
- self.im = cv2.addWeighted(overlay, alpha, self.im, 1 - alpha, 0) # blend overlay with the original image
549
-
550
- # Draw label if provided
551
- if label:
552
- text_size, _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, self.sf, self.tf)
553
- text_x, text_y = refined_mask[0][0][0], refined_mask[0][0][1]
554
- rect_start, rect_end = (text_x - 5, text_y - text_size[1] - 5), (text_x + text_size[0] + 5, text_y + 5)
555
- cv2.rectangle(self.im, rect_start, rect_end, mask_color, -1)
556
- cv2.putText(
557
- self.im,
558
- label,
559
- (text_x, text_y),
560
- cv2.FONT_HERSHEY_SIMPLEX,
561
- self.sf,
562
- self.get_txt_color(mask_color),
563
- self.tf,
564
- )
565
-
566
521
  def sweep_annotator(self, line_x=0, line_y=0, label=None, color=(221, 0, 186), txt_color=(255, 255, 255)):
567
522
  """
568
523
  Draw a sweep annotation line and an optional label.
@@ -126,7 +126,7 @@ def benchmark(
126
126
  assert not isinstance(model, YOLOWorld), "YOLOWorldv2 TensorFlow exports not supported by onnx2tf yet"
127
127
  if i == 11: # Paddle
128
128
  assert not isinstance(model, YOLOWorld), "YOLOWorldv2 Paddle exports not supported yet"
129
- assert not model.task == "obb", "Paddle OBB bug https://github.com/PaddlePaddle/Paddle/issues/72024"
129
+ assert model.task != "obb", "Paddle OBB bug https://github.com/PaddlePaddle/Paddle/issues/72024"
130
130
  assert not is_end2end, "End-to-end models not supported by PaddlePaddle yet"
131
131
  assert LINUX or MACOS, "Windows Paddle exports not supported yet"
132
132
  if i == 12: # MNN
@@ -176,21 +176,38 @@ default_callbacks = {
176
176
 
177
177
  def get_default_callbacks():
178
178
  """
179
- Return a copy of the default_callbacks dictionary with lists as default values.
179
+ Get the default callbacks for Ultralytics training, validation, prediction, and export processes.
180
180
 
181
181
  Returns:
182
- (defaultdict): A defaultdict with keys from default_callbacks and empty lists as default values.
182
+ (dict): Dictionary of default callbacks for various training events. Each key in the dictionary represents an
183
+ event during the training process, and the corresponding value is a list of callback functions that are
184
+ executed when that event occurs.
185
+
186
+ Examples:
187
+ >>> callbacks = get_default_callbacks()
188
+ >>> print(list(callbacks.keys())) # show all available callback events
189
+ ['on_pretrain_routine_start', 'on_pretrain_routine_end', ...]
183
190
  """
184
191
  return defaultdict(list, deepcopy(default_callbacks))
185
192
 
186
193
 
187
194
  def add_integration_callbacks(instance):
188
195
  """
189
- Add integration callbacks from various sources to the instance's callbacks.
196
+ Add integration callbacks to the instance's callbacks dictionary.
197
+
198
+ This function loads and adds various integration callbacks to the provided instance. The specific callbacks added
199
+ depend on the type of instance provided. All instances receive HUB callbacks, while Trainer instances also receive
200
+ additional callbacks for various integrations like ClearML, Comet, DVC, MLflow, Neptune, Ray Tune, TensorBoard,
201
+ and Weights & Biases.
190
202
 
191
203
  Args:
192
- instance (Trainer | Predictor | Validator | Exporter): An object with a 'callbacks' attribute that is a
193
- dictionary of callback lists.
204
+ instance (Trainer | Predictor | Validator | Exporter): The object instance to which callbacks will be added.
205
+ The type of instance determines which callbacks are loaded.
206
+
207
+ Examples:
208
+ >>> from ultralytics.engine.trainer import BaseTrainer
209
+ >>> trainer = BaseTrainer()
210
+ >>> add_integration_callbacks(trainer)
194
211
  """
195
212
  # Load HUB callbacks
196
213
  from .hub import callbacks as hub_cb
@@ -155,7 +155,32 @@ def _scale_bounding_box_to_original_image_shape(
155
155
 
156
156
 
157
157
  def _format_ground_truth_annotations_for_detection(img_idx, image_path, batch, class_name_map=None) -> Optional[dict]:
158
- """Format ground truth annotations for detection."""
158
+ """
159
+ Format ground truth annotations for object detection.
160
+
161
+ This function processes ground truth annotations from a batch of images for object detection tasks. It extracts
162
+ bounding boxes, class labels, and other metadata for a specific image in the batch, and formats them for
163
+ visualization or evaluation.
164
+
165
+ Args:
166
+ img_idx (int): Index of the image in the batch to process.
167
+ image_path (str | Path): Path to the image file.
168
+ batch (dict): Batch dictionary containing detection data with keys:
169
+ - 'batch_idx': Tensor of batch indices
170
+ - 'bboxes': Tensor of bounding boxes in normalized xywh format
171
+ - 'cls': Tensor of class labels
172
+ - 'ori_shape': Original image shapes
173
+ - 'resized_shape': Resized image shapes
174
+ - 'ratio_pad': Ratio and padding information
175
+ class_name_map (dict | None, optional): Mapping from class indices to class names.
176
+
177
+ Returns:
178
+ (dict | None): Formatted ground truth annotations with the following structure:
179
+ - 'boxes': List of box coordinates [x, y, width, height]
180
+ - 'label': Label string with format "gt_{class_name}"
181
+ - 'score': Confidence score (always 1.0, scaled by _scale_confidence_score)
182
+ Returns None if no bounding boxes are found for the image.
183
+ """
159
184
  indices = batch["batch_idx"] == img_idx
160
185
  bboxes = batch["bboxes"][indices]
161
186
  if len(bboxes) == 0:
@@ -284,7 +309,22 @@ def _log_confusion_matrix(experiment, trainer, curr_step, curr_epoch) -> None:
284
309
 
285
310
 
286
311
  def _log_images(experiment, image_paths, curr_step, annotations=None) -> None:
287
- """Logs images to the experiment with optional annotations."""
312
+ """
313
+ Log images to the experiment with optional annotations.
314
+
315
+ This function logs images to a Comet ML experiment, optionally including annotation data for visualization
316
+ such as bounding boxes or segmentation masks.
317
+
318
+ Args:
319
+ experiment (comet_ml.Experiment): The Comet ML experiment to log images to.
320
+ image_paths (List[Path]): List of paths to images that will be logged.
321
+ curr_step (int): Current training step/iteration for tracking in the experiment timeline.
322
+ annotations (List[List[dict]], optional): Nested list of annotation dictionaries for each image. Each annotation
323
+ contains visualization data like bounding boxes, labels, and confidence scores.
324
+
325
+ Returns:
326
+ None
327
+ """
288
328
  if annotations:
289
329
  for image_path, annotation in zip(image_paths, annotations):
290
330
  experiment.log_image(image_path, name=image_path.stem, step=curr_step, annotations=annotation)
@@ -295,7 +335,23 @@ def _log_images(experiment, image_paths, curr_step, annotations=None) -> None:
295
335
 
296
336
 
297
337
  def _log_image_predictions(experiment, validator, curr_step) -> None:
298
- """Logs predicted boxes for a single image during training."""
338
+ """
339
+ Log predicted boxes for a single image during training.
340
+
341
+ This function logs image predictions to a Comet ML experiment during model validation. It processes
342
+ validation data and formats both ground truth and prediction annotations for visualization in the Comet
343
+ dashboard. The function respects configured limits on the number of images to log.
344
+
345
+ Args:
346
+ experiment (comet_ml.Experiment): The Comet ML experiment to log to.
347
+ validator (BaseValidator): The validator instance containing validation data and predictions.
348
+ curr_step (int): The current training step for logging timeline.
349
+
350
+ Notes:
351
+ This function uses global state to track the number of logged predictions across calls.
352
+ It only logs predictions for supported tasks defined in COMET_SUPPORTED_TASKS.
353
+ The number of logged images is limited by the COMET_MAX_IMAGE_PREDICTIONS environment variable.
354
+ """
299
355
  global _comet_image_prediction_count
300
356
 
301
357
  task = validator.args.task
@@ -342,7 +398,22 @@ def _log_image_predictions(experiment, validator, curr_step) -> None:
342
398
 
343
399
 
344
400
  def _log_plots(experiment, trainer) -> None:
345
- """Logs evaluation plots and label plots for the experiment."""
401
+ """
402
+ Log evaluation plots and label plots for the experiment.
403
+
404
+ This function logs various evaluation plots and confusion matrices to the experiment tracking system. It handles
405
+ different types of metrics (SegmentMetrics, PoseMetrics, DetMetrics, OBBMetrics) and logs the appropriate plots
406
+ for each type.
407
+
408
+ Args:
409
+ experiment (comet_ml.Experiment): The Comet ML experiment to log plots to.
410
+ trainer (ultralytics.engine.trainer.BaseTrainer): The trainer object containing validation metrics and save
411
+ directory information.
412
+
413
+ Examples:
414
+ >>> from ultralytics.utils.callbacks.comet import _log_plots
415
+ >>> _log_plots(experiment, trainer)
416
+ """
346
417
  plot_filenames = None
347
418
  if isinstance(trainer.validator.metrics, SegmentMetrics) and trainer.validator.metrics.task == "segment":
348
419
  plot_filenames = [
@@ -401,7 +472,24 @@ def on_train_epoch_end(trainer) -> None:
401
472
 
402
473
 
403
474
  def on_fit_epoch_end(trainer) -> None:
404
- """Logs model assets at the end of each epoch."""
475
+ """
476
+ Log model assets at the end of each epoch during training.
477
+
478
+ This function is called at the end of each training epoch to log metrics, learning rates, and model information
479
+ to a Comet ML experiment. It also logs model assets, confusion matrices, and image predictions based on
480
+ configuration settings.
481
+
482
+ The function retrieves the current Comet ML experiment and logs various training metrics. If it's the first epoch,
483
+ it also logs model information. On specified save intervals, it logs the model, confusion matrix (if enabled),
484
+ and image predictions (if enabled).
485
+
486
+ Args:
487
+ trainer (BaseTrainer): The YOLO trainer object containing training state, metrics, and configuration.
488
+
489
+ Examples:
490
+ >>> # Inside a training loop
491
+ >>> on_fit_epoch_end(trainer) # Log metrics and assets to Comet ML
492
+ """
405
493
  experiment = comet_ml.get_running_experiment()
406
494
  if not experiment:
407
495
  return
@@ -27,7 +27,21 @@ except (ImportError, AssertionError, TypeError):
27
27
 
28
28
 
29
29
  def _log_images(path: Path, prefix: str = "") -> None:
30
- """Logs images at specified path with an optional prefix using DVCLive."""
30
+ """
31
+ Log images at specified path with an optional prefix using DVCLive.
32
+
33
+ This function logs images found at the given path to DVCLive, organizing them by batch to enable slider
34
+ functionality in the UI. It processes image filenames to extract batch information and restructures the path
35
+ accordingly.
36
+
37
+ Args:
38
+ path (Path): Path to the image file to be logged.
39
+ prefix (str): Optional prefix to add to the image name when logging.
40
+
41
+ Examples:
42
+ >>> from pathlib import Path
43
+ >>> _log_images(Path("runs/train/exp/val_batch0_pred.jpg"), prefix="validation")
44
+ """
31
45
  if live:
32
46
  name = path.name
33
47
 
@@ -41,7 +55,13 @@ def _log_images(path: Path, prefix: str = "") -> None:
41
55
 
42
56
 
43
57
  def _log_plots(plots: dict, prefix: str = "") -> None:
44
- """Logs plot images for training progress if they have not been previously processed."""
58
+ """
59
+ Log plot images for training progress if they have not been previously processed.
60
+
61
+ Args:
62
+ plots (dict): Dictionary containing plot information with timestamps.
63
+ prefix (str, optional): Optional prefix to add to the logged image paths.
64
+ """
45
65
  for name, params in plots.items():
46
66
  timestamp = params["timestamp"]
47
67
  if _processed_plots.get(name) != timestamp:
@@ -50,7 +70,19 @@ def _log_plots(plots: dict, prefix: str = "") -> None:
50
70
 
51
71
 
52
72
  def _log_confusion_matrix(validator) -> None:
53
- """Logs the confusion matrix for the given validator using DVCLive."""
73
+ """
74
+ Log confusion matrix for a validator using DVCLive.
75
+
76
+ This function processes the confusion matrix from a validator object and logs it to DVCLive by converting
77
+ the matrix into lists of target and prediction labels.
78
+
79
+ Args:
80
+ validator (BaseValidator): The validator object containing the confusion matrix and class names.
81
+ Must have attributes: confusion_matrix.matrix, confusion_matrix.task, and names.
82
+
83
+ Returns:
84
+ None
85
+ """
54
86
  targets = []
55
87
  preds = []
56
88
  matrix = validator.confusion_matrix.matrix
@@ -94,7 +126,20 @@ def on_train_epoch_start(trainer) -> None:
94
126
 
95
127
 
96
128
  def on_fit_epoch_end(trainer) -> None:
97
- """Logs training metrics and model info, and advances to next step on the end of each fit epoch."""
129
+ """
130
+ Log training metrics, model info, and advance to next step at the end of each fit epoch.
131
+
132
+ This function is called at the end of each fit epoch during training. It logs various metrics including
133
+ training loss items, validation metrics, and learning rates. On the first epoch, it also logs model
134
+ information. Additionally, it logs training and validation plots and advances the DVCLive step counter.
135
+
136
+ Args:
137
+ trainer (BaseTrainer): The trainer object containing training state, metrics, and plots.
138
+
139
+ Notes:
140
+ This function only performs logging operations when DVCLive logging is active and during a training epoch.
141
+ The global variable _training_epoch is used to track whether the current epoch is a training epoch.
142
+ """
98
143
  global _training_epoch
99
144
  if live and _training_epoch:
100
145
  all_metrics = {**trainer.label_loss_items(trainer.tloss, prefix="train"), **trainer.metrics, **trainer.lr}
@@ -115,7 +160,21 @@ def on_fit_epoch_end(trainer) -> None:
115
160
 
116
161
 
117
162
  def on_train_end(trainer) -> None:
118
- """Logs the best metrics, plots, and confusion matrix at the end of training if DVCLive is active."""
163
+ """
164
+ Log best metrics, plots, and confusion matrix at the end of training.
165
+
166
+ This function is called at the conclusion of the training process to log final metrics, visualizations, and
167
+ model artifacts if DVCLive logging is active. It captures the best model performance metrics, training plots,
168
+ validation plots, and confusion matrix for later analysis.
169
+
170
+ Args:
171
+ trainer (BaseTrainer): The trainer object containing training state, metrics, and validation results.
172
+
173
+ Examples:
174
+ >>> # Inside a custom training loop
175
+ >>> from ultralytics.utils.callbacks.dvc import on_train_end
176
+ >>> on_train_end(trainer) # Log final metrics and artifacts
177
+ """
119
178
  if live:
120
179
  # At the end log the best metrics. It runs validator on the best model internally.
121
180
  all_metrics = {**trainer.label_loss_items(trainer.tloss, prefix="train"), **trainer.metrics, **trainer.lr}
@@ -19,14 +19,37 @@ except (ImportError, AssertionError):
19
19
 
20
20
 
21
21
  def _log_scalars(scalars: dict, step: int = 0) -> None:
22
- """Log scalars to the NeptuneAI experiment logger."""
22
+ """
23
+ Log scalars to the NeptuneAI experiment logger.
24
+
25
+ Args:
26
+ scalars (dict): Dictionary of scalar values to log to NeptuneAI.
27
+ step (int): The current step or iteration number for logging.
28
+
29
+ Examples:
30
+ >>> metrics = {"mAP": 0.85, "loss": 0.32}
31
+ >>> _log_scalars(metrics, step=100)
32
+ """
23
33
  if run:
24
34
  for k, v in scalars.items():
25
35
  run[k].append(value=v, step=step)
26
36
 
27
37
 
28
38
  def _log_images(imgs_dict: dict, group: str = "") -> None:
29
- """Log images to the NeptuneAI experiment logger."""
39
+ """
40
+ Log images to the NeptuneAI experiment logger.
41
+
42
+ This function logs image data to Neptune.ai when a valid Neptune run is active. Images are organized
43
+ under the specified group name.
44
+
45
+ Args:
46
+ imgs_dict (dict): Dictionary of images to log, with keys as image names and values as image data.
47
+ group (str, optional): Group name to organize images under in the Neptune UI.
48
+
49
+ Examples:
50
+ >>> # Log validation images
51
+ >>> _log_images({"val_batch": img_tensor}, group="validation")
52
+ """
30
53
  if run:
31
54
  for k, v in imgs_dict.items():
32
55
  run[f"{group}/{k}"].upload(File(v))