ultralytics 8.3.101__py3-none-any.whl → 8.3.103__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tests/test_exports.py +14 -5
- tests/test_solutions.py +140 -76
- ultralytics/__init__.py +1 -1
- ultralytics/cfg/__init__.py +1 -1
- ultralytics/engine/exporter.py +23 -8
- ultralytics/engine/tuner.py +8 -2
- ultralytics/hub/__init__.py +29 -2
- ultralytics/hub/google/__init__.py +18 -1
- ultralytics/models/fastsam/predict.py +12 -1
- ultralytics/models/nas/predict.py +21 -3
- ultralytics/models/rtdetr/val.py +26 -2
- ultralytics/models/sam/amg.py +22 -1
- ultralytics/models/sam/modules/encoders.py +85 -4
- ultralytics/models/sam/modules/memory_attention.py +61 -3
- ultralytics/models/sam/modules/utils.py +108 -5
- ultralytics/models/utils/loss.py +38 -2
- ultralytics/models/utils/ops.py +15 -1
- ultralytics/models/yolo/classify/predict.py +11 -1
- ultralytics/models/yolo/classify/train.py +17 -1
- ultralytics/models/yolo/classify/val.py +82 -6
- ultralytics/models/yolo/detect/predict.py +20 -1
- ultralytics/models/yolo/model.py +55 -4
- ultralytics/models/yolo/obb/predict.py +16 -1
- ultralytics/models/yolo/obb/train.py +35 -2
- ultralytics/models/yolo/obb/val.py +87 -6
- ultralytics/models/yolo/pose/predict.py +18 -1
- ultralytics/models/yolo/pose/train.py +48 -3
- ultralytics/models/yolo/pose/val.py +113 -8
- ultralytics/models/yolo/segment/predict.py +27 -2
- ultralytics/models/yolo/segment/train.py +61 -3
- ultralytics/models/yolo/segment/val.py +10 -1
- ultralytics/models/yolo/world/train_world.py +29 -1
- ultralytics/models/yolo/yoloe/train.py +47 -3
- ultralytics/nn/autobackend.py +9 -8
- ultralytics/nn/modules/activation.py +26 -3
- ultralytics/nn/modules/block.py +89 -0
- ultralytics/nn/modules/head.py +3 -92
- ultralytics/nn/modules/utils.py +70 -4
- ultralytics/nn/tasks.py +3 -0
- ultralytics/nn/text_model.py +93 -17
- ultralytics/solutions/instance_segmentation.py +15 -7
- ultralytics/solutions/solutions.py +2 -47
- ultralytics/utils/benchmarks.py +1 -1
- ultralytics/utils/callbacks/base.py +22 -5
- ultralytics/utils/callbacks/comet.py +93 -5
- ultralytics/utils/callbacks/dvc.py +64 -5
- ultralytics/utils/callbacks/neptune.py +25 -2
- ultralytics/utils/callbacks/tensorboard.py +30 -2
- ultralytics/utils/callbacks/wb.py +16 -1
- ultralytics/utils/dist.py +35 -2
- ultralytics/utils/errors.py +27 -6
- ultralytics/utils/metrics.py +1 -1
- ultralytics/utils/patches.py +33 -5
- ultralytics/utils/torch_utils.py +14 -6
- ultralytics/utils/triton.py +16 -3
- ultralytics/utils/tuner.py +17 -9
- {ultralytics-8.3.101.dist-info → ultralytics-8.3.103.dist-info}/METADATA +3 -4
- {ultralytics-8.3.101.dist-info → ultralytics-8.3.103.dist-info}/RECORD +62 -62
- {ultralytics-8.3.101.dist-info → ultralytics-8.3.103.dist-info}/WHEEL +0 -0
- {ultralytics-8.3.101.dist-info → ultralytics-8.3.103.dist-info}/entry_points.txt +0 -0
- {ultralytics-8.3.101.dist-info → ultralytics-8.3.103.dist-info}/licenses/LICENSE +0 -0
- {ultralytics-8.3.101.dist-info → ultralytics-8.3.103.dist-info}/top_level.txt +0 -0
ultralytics/nn/text_model.py
CHANGED
@@ -6,7 +6,7 @@ from pathlib import Path
|
|
6
6
|
import torch
|
7
7
|
import torch.nn as nn
|
8
8
|
|
9
|
-
from ultralytics.utils import
|
9
|
+
from ultralytics.utils import checks
|
10
10
|
from ultralytics.utils.torch_utils import smart_inference_mode
|
11
11
|
|
12
12
|
try:
|
@@ -59,9 +59,10 @@ class TextModel(nn.Module):
|
|
59
59
|
|
60
60
|
class CLIP(TextModel):
|
61
61
|
"""
|
62
|
-
OpenAI CLIP text encoder
|
62
|
+
Implements OpenAI's CLIP (Contrastive Language-Image Pre-training) text encoder.
|
63
63
|
|
64
|
-
This class
|
64
|
+
This class provides a text encoder based on OpenAI's CLIP model, which can convert text into feature vectors
|
65
|
+
that are aligned with corresponding image features in a shared embedding space.
|
65
66
|
|
66
67
|
Attributes:
|
67
68
|
model (clip.model.CLIP): The loaded CLIP model.
|
@@ -70,15 +71,33 @@ class CLIP(TextModel):
|
|
70
71
|
Methods:
|
71
72
|
tokenize: Convert input texts to CLIP tokens.
|
72
73
|
encode_text: Encode tokenized texts into normalized feature vectors.
|
74
|
+
|
75
|
+
Examples:
|
76
|
+
>>> from ultralytics.models.sam import CLIP
|
77
|
+
>>> import torch
|
78
|
+
>>> device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
79
|
+
>>> clip_model = CLIP(size="ViT-B/32", device=device)
|
80
|
+
>>> tokens = clip_model.tokenize(["a photo of a cat", "a photo of a dog"])
|
81
|
+
>>> text_features = clip_model.encode_text(tokens)
|
82
|
+
>>> print(text_features.shape)
|
73
83
|
"""
|
74
84
|
|
75
85
|
def __init__(self, size, device):
|
76
86
|
"""
|
77
87
|
Initialize the CLIP text encoder.
|
78
88
|
|
89
|
+
This class implements the TextModel interface using OpenAI's CLIP model for text encoding. It loads
|
90
|
+
a pre-trained CLIP model of the specified size and prepares it for text encoding tasks.
|
91
|
+
|
79
92
|
Args:
|
80
93
|
size (str): Model size identifier (e.g., 'ViT-B/32').
|
81
94
|
device (torch.device): Device to load the model on.
|
95
|
+
|
96
|
+
Examples:
|
97
|
+
>>> import torch
|
98
|
+
>>> from ultralytics.models.sam.modules.clip import CLIP
|
99
|
+
>>> clip_model = CLIP("ViT-B/32", device=torch.device("cuda:0"))
|
100
|
+
>>> text_features = clip_model.encode_text(["a photo of a cat", "a photo of a dog"])
|
82
101
|
"""
|
83
102
|
super().__init__()
|
84
103
|
self.model = clip.load(size, device=device)[0]
|
@@ -87,7 +106,20 @@ class CLIP(TextModel):
|
|
87
106
|
self.eval()
|
88
107
|
|
89
108
|
def tokenize(self, texts):
|
90
|
-
"""
|
109
|
+
"""
|
110
|
+
Convert input texts to CLIP tokens.
|
111
|
+
|
112
|
+
Args:
|
113
|
+
texts (str | List[str]): Input text or list of texts to tokenize.
|
114
|
+
|
115
|
+
Returns:
|
116
|
+
(torch.Tensor): Tokenized text tensor with shape (batch_size, context_length) ready for model processing.
|
117
|
+
|
118
|
+
Examples:
|
119
|
+
>>> model = CLIP("ViT-B/32", device="cpu")
|
120
|
+
>>> tokens = model.tokenize("a photo of a cat")
|
121
|
+
>>> print(tokens.shape) # torch.Size([1, 77])
|
122
|
+
"""
|
91
123
|
return clip.tokenize(texts).to(self.device)
|
92
124
|
|
93
125
|
@smart_inference_mode()
|
@@ -95,12 +127,22 @@ class CLIP(TextModel):
|
|
95
127
|
"""
|
96
128
|
Encode tokenized texts into normalized feature vectors.
|
97
129
|
|
130
|
+
This method processes tokenized text inputs through the CLIP model to generate feature vectors, which are then
|
131
|
+
normalized to unit length. These normalized vectors can be used for text-image similarity comparisons.
|
132
|
+
|
98
133
|
Args:
|
99
|
-
texts (torch.Tensor): Tokenized text inputs.
|
100
|
-
dtype (torch.dtype): Data type for output features.
|
134
|
+
texts (torch.Tensor): Tokenized text inputs, typically created using the tokenize() method.
|
135
|
+
dtype (torch.dtype, optional): Data type for output features. Default is torch.float32.
|
101
136
|
|
102
137
|
Returns:
|
103
|
-
(torch.Tensor): Normalized text feature vectors.
|
138
|
+
(torch.Tensor): Normalized text feature vectors with unit length (L2 norm = 1).
|
139
|
+
|
140
|
+
Examples:
|
141
|
+
>>> clip_model = CLIP("ViT-B/32", device="cuda")
|
142
|
+
>>> tokens = clip_model.tokenize(["a photo of a cat", "a photo of a dog"])
|
143
|
+
>>> features = clip_model.encode_text(tokens)
|
144
|
+
>>> features.shape
|
145
|
+
torch.Size([2, 512])
|
104
146
|
"""
|
105
147
|
txt_feats = self.model.encode_text(texts).to(dtype)
|
106
148
|
txt_feats = txt_feats / txt_feats.norm(p=2, dim=-1, keepdim=True)
|
@@ -109,9 +151,10 @@ class CLIP(TextModel):
|
|
109
151
|
|
110
152
|
class MobileCLIP(TextModel):
|
111
153
|
"""
|
112
|
-
Apple MobileCLIP text encoder
|
154
|
+
Implement Apple's MobileCLIP text encoder for efficient text encoding.
|
113
155
|
|
114
|
-
This class implements the TextModel interface using Apple's MobileCLIP model
|
156
|
+
This class implements the TextModel interface using Apple's MobileCLIP model, providing efficient text encoding
|
157
|
+
capabilities for vision-language tasks.
|
115
158
|
|
116
159
|
Attributes:
|
117
160
|
model (mobileclip.model.MobileCLIP): The loaded MobileCLIP model.
|
@@ -122,6 +165,12 @@ class MobileCLIP(TextModel):
|
|
122
165
|
Methods:
|
123
166
|
tokenize: Convert input texts to MobileCLIP tokens.
|
124
167
|
encode_text: Encode tokenized texts into normalized feature vectors.
|
168
|
+
|
169
|
+
Examples:
|
170
|
+
>>> device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
171
|
+
>>> text_encoder = MobileCLIP(size="s0", device=device)
|
172
|
+
>>> tokens = text_encoder.tokenize(["a photo of a cat", "a photo of a dog"])
|
173
|
+
>>> features = text_encoder.encode_text(tokens)
|
125
174
|
"""
|
126
175
|
|
127
176
|
config_size_map = {"s0": "s0", "s1": "s1", "s2": "s2", "b": "b", "blt": "b"}
|
@@ -130,9 +179,18 @@ class MobileCLIP(TextModel):
|
|
130
179
|
"""
|
131
180
|
Initialize the MobileCLIP text encoder.
|
132
181
|
|
182
|
+
This class implements the TextModel interface using Apple's MobileCLIP model for efficient text encoding.
|
183
|
+
|
133
184
|
Args:
|
134
185
|
size (str): Model size identifier (e.g., 's0', 's1', 's2', 'b', 'blt').
|
135
186
|
device (torch.device): Device to load the model on.
|
187
|
+
|
188
|
+
Examples:
|
189
|
+
>>> from ultralytics.nn.modules import MobileCLIP
|
190
|
+
>>> import torch
|
191
|
+
>>> model = MobileCLIP("s0", device=torch.device("cpu"))
|
192
|
+
>>> tokens = model.tokenize(["a photo of a cat", "a photo of a dog"])
|
193
|
+
>>> features = model.encode_text(tokens)
|
136
194
|
"""
|
137
195
|
super().__init__()
|
138
196
|
config = self.config_size_map[size]
|
@@ -148,7 +206,19 @@ class MobileCLIP(TextModel):
|
|
148
206
|
self.eval()
|
149
207
|
|
150
208
|
def tokenize(self, texts):
|
151
|
-
"""
|
209
|
+
"""
|
210
|
+
Convert input texts to MobileCLIP tokens.
|
211
|
+
|
212
|
+
Args:
|
213
|
+
texts (list[str]): List of text strings to tokenize.
|
214
|
+
|
215
|
+
Returns:
|
216
|
+
(torch.Tensor): Tokenized text inputs with shape (batch_size, sequence_length).
|
217
|
+
|
218
|
+
Examples:
|
219
|
+
>>> model = MobileCLIP("s0", "cpu")
|
220
|
+
>>> tokens = model.tokenize(["a photo of a cat", "a photo of a dog"])
|
221
|
+
"""
|
152
222
|
return self.tokenizer(texts).to(self.device)
|
153
223
|
|
154
224
|
@smart_inference_mode()
|
@@ -158,10 +228,17 @@ class MobileCLIP(TextModel):
|
|
158
228
|
|
159
229
|
Args:
|
160
230
|
texts (torch.Tensor): Tokenized text inputs.
|
161
|
-
dtype (torch.dtype): Data type for output features.
|
231
|
+
dtype (torch.dtype, optional): Data type for output features.
|
162
232
|
|
163
233
|
Returns:
|
164
|
-
(torch.Tensor): Normalized text feature vectors.
|
234
|
+
(torch.Tensor): Normalized text feature vectors with L2 normalization applied.
|
235
|
+
|
236
|
+
Examples:
|
237
|
+
>>> model = MobileCLIP("s0", device="cpu")
|
238
|
+
>>> tokens = model.tokenize(["a photo of a cat", "a photo of a dog"])
|
239
|
+
>>> features = model.encode_text(tokens)
|
240
|
+
>>> features.shape
|
241
|
+
torch.Size([2, 512]) # Actual dimension depends on model size
|
165
242
|
"""
|
166
243
|
text_features = self.model.encode_text(texts).to(dtype)
|
167
244
|
text_features /= text_features.norm(p=2, dim=-1, keepdim=True)
|
@@ -179,15 +256,14 @@ def build_text_model(variant, device=None):
|
|
179
256
|
Returns:
|
180
257
|
(TextModel): Instantiated text encoding model.
|
181
258
|
|
182
|
-
|
183
|
-
|
259
|
+
Examples:
|
260
|
+
>>> model = build_text_model("clip:ViT-B/32", device=torch.device("cuda"))
|
261
|
+
>>> model = build_text_model("mobileclip:s0", device=torch.device("cpu"))
|
184
262
|
"""
|
185
|
-
LOGGER.info(f"Build text model {variant}")
|
186
263
|
base, size = variant.split(":")
|
187
264
|
if base == "clip":
|
188
265
|
return CLIP(size, device)
|
189
266
|
elif base == "mobileclip":
|
190
267
|
return MobileCLIP(size, device)
|
191
268
|
else:
|
192
|
-
|
193
|
-
assert False
|
269
|
+
raise ValueError(f"Unrecognized base model: '{base}'. Supported base models: 'clip', 'mobileclip'.")
|
@@ -1,7 +1,7 @@
|
|
1
1
|
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
|
2
2
|
|
3
|
-
from ultralytics.
|
4
|
-
from ultralytics.
|
3
|
+
from ultralytics.engine.results import Results
|
4
|
+
from ultralytics.solutions.solutions import BaseSolution, SolutionResults
|
5
5
|
|
6
6
|
|
7
7
|
class InstanceSegmentation(BaseSolution):
|
@@ -41,6 +41,10 @@ class InstanceSegmentation(BaseSolution):
|
|
41
41
|
kwargs["model"] = kwargs.get("model", "yolo11n-seg.pt")
|
42
42
|
super().__init__(**kwargs)
|
43
43
|
|
44
|
+
self.show_conf = self.CFG.get("show_conf", True)
|
45
|
+
self.show_labels = self.CFG.get("show_labels", True)
|
46
|
+
self.show_boxes = self.CFG.get("show_boxes", True)
|
47
|
+
|
44
48
|
def process(self, im0):
|
45
49
|
"""
|
46
50
|
Perform instance segmentation on the input image and annotate the results.
|
@@ -58,17 +62,21 @@ class InstanceSegmentation(BaseSolution):
|
|
58
62
|
>>> print(summary)
|
59
63
|
"""
|
60
64
|
self.extract_tracks(im0) # Extract tracks (bounding boxes, classes, and masks)
|
61
|
-
annotator = SolutionAnnotator(im0, self.line_width)
|
62
65
|
|
63
66
|
# Iterate over detected classes, track IDs, and segmentation masks
|
64
67
|
if self.masks is None:
|
65
68
|
self.LOGGER.warning("⚠️ No masks detected! Ensure you're using a supported Ultralytics segmentation model.")
|
69
|
+
plot_im = im0
|
66
70
|
else:
|
67
|
-
|
68
|
-
|
69
|
-
|
71
|
+
results = Results(im0, path=None, names=self.names, boxes=self.track_data.data, masks=self.masks.data)
|
72
|
+
plot_im = results.plot(
|
73
|
+
line_width=self.line_width,
|
74
|
+
boxes=self.show_boxes,
|
75
|
+
conf=self.show_conf,
|
76
|
+
labels=self.show_labels,
|
77
|
+
color_mode="instance",
|
78
|
+
)
|
70
79
|
|
71
|
-
plot_im = annotator.result()
|
72
80
|
self.display_output(plot_im) # Display the annotated output using the base class function
|
73
81
|
|
74
82
|
# Return SolutionResults
|
@@ -52,7 +52,7 @@ class BaseSolution:
|
|
52
52
|
is_cli (bool): Enables CLI mode if set to True.
|
53
53
|
**kwargs (Any): Additional configuration parameters that override defaults.
|
54
54
|
"""
|
55
|
-
check_requirements("shapely>=2.0.0")
|
55
|
+
check_requirements("shapely>=2.0.0,<2.1.0")
|
56
56
|
from shapely.geometry import LineString, Point, Polygon
|
57
57
|
from shapely.prepared import prep
|
58
58
|
|
@@ -122,7 +122,7 @@ class BaseSolution:
|
|
122
122
|
self.track_data = self.tracks[0].obb or self.tracks[0].boxes # Extract tracks for OBB or object detection
|
123
123
|
|
124
124
|
self.masks = (
|
125
|
-
self.tracks[0].masks
|
125
|
+
self.tracks[0].masks if hasattr(self.tracks[0], "masks") and self.tracks[0].masks is not None else None
|
126
126
|
)
|
127
127
|
|
128
128
|
if self.track_data and self.track_data.id is not None:
|
@@ -225,7 +225,6 @@ class SolutionAnnotator(Annotator):
|
|
225
225
|
plot_angle_and_count_and_stage: Visualizes angle, step count, and stage for workout monitoring.
|
226
226
|
plot_distance_and_line: Displays the distance between centroids and connects them with a line.
|
227
227
|
display_objects_labels: Annotates bounding boxes with object class labels.
|
228
|
-
segmentation_mask: Draws mask for segmented objects and optionally labels them.
|
229
228
|
sweep_annotator: Visualizes a vertical sweep line and optional label.
|
230
229
|
visioneye: Maps and connects object centroids to a visual "eye" point.
|
231
230
|
circle_label: Draws a circular label within a bounding box.
|
@@ -519,50 +518,6 @@ class SolutionAnnotator(Annotator):
|
|
519
518
|
lineType=cv2.LINE_AA,
|
520
519
|
)
|
521
520
|
|
522
|
-
def segmentation_mask(self, mask, mask_color=(255, 0, 255), label=None, alpha=0.5):
|
523
|
-
"""
|
524
|
-
Draw an optimized segmentation mask with smooth corners, highlighted edge, and dynamic text box size.
|
525
|
-
|
526
|
-
Args:
|
527
|
-
mask (np.ndarray): A 2D array of shape (N, 2) containing the object mask.
|
528
|
-
mask_color (Tuple[int, int, int]): RGB color for the mask.
|
529
|
-
label (str, optional): Text label for the object.
|
530
|
-
alpha (float): Transparency level (0 = fully transparent, 1 = fully opaque).
|
531
|
-
"""
|
532
|
-
if mask.size == 0:
|
533
|
-
return
|
534
|
-
|
535
|
-
overlay = self.im.copy()
|
536
|
-
mask = np.int32([mask])
|
537
|
-
|
538
|
-
# Approximate polygon for smooth corners with epsilon
|
539
|
-
refined_mask = cv2.approxPolyDP(mask, 0.002 * cv2.arcLength(mask, True), True)
|
540
|
-
|
541
|
-
# Apply a highlighter effect by drawing a thick outer shadow
|
542
|
-
cv2.polylines(overlay, [refined_mask], isClosed=True, color=mask_color, thickness=self.lw * 3)
|
543
|
-
cv2.fillPoly(overlay, [refined_mask], mask_color) # draw mask with primary color
|
544
|
-
|
545
|
-
# Apply an inner glow effect for extra clarity
|
546
|
-
cv2.polylines(overlay, [refined_mask], isClosed=True, color=mask_color, thickness=self.lw)
|
547
|
-
|
548
|
-
self.im = cv2.addWeighted(overlay, alpha, self.im, 1 - alpha, 0) # blend overlay with the original image
|
549
|
-
|
550
|
-
# Draw label if provided
|
551
|
-
if label:
|
552
|
-
text_size, _ = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, self.sf, self.tf)
|
553
|
-
text_x, text_y = refined_mask[0][0][0], refined_mask[0][0][1]
|
554
|
-
rect_start, rect_end = (text_x - 5, text_y - text_size[1] - 5), (text_x + text_size[0] + 5, text_y + 5)
|
555
|
-
cv2.rectangle(self.im, rect_start, rect_end, mask_color, -1)
|
556
|
-
cv2.putText(
|
557
|
-
self.im,
|
558
|
-
label,
|
559
|
-
(text_x, text_y),
|
560
|
-
cv2.FONT_HERSHEY_SIMPLEX,
|
561
|
-
self.sf,
|
562
|
-
self.get_txt_color(mask_color),
|
563
|
-
self.tf,
|
564
|
-
)
|
565
|
-
|
566
521
|
def sweep_annotator(self, line_x=0, line_y=0, label=None, color=(221, 0, 186), txt_color=(255, 255, 255)):
|
567
522
|
"""
|
568
523
|
Draw a sweep annotation line and an optional label.
|
ultralytics/utils/benchmarks.py
CHANGED
@@ -126,7 +126,7 @@ def benchmark(
|
|
126
126
|
assert not isinstance(model, YOLOWorld), "YOLOWorldv2 TensorFlow exports not supported by onnx2tf yet"
|
127
127
|
if i == 11: # Paddle
|
128
128
|
assert not isinstance(model, YOLOWorld), "YOLOWorldv2 Paddle exports not supported yet"
|
129
|
-
assert
|
129
|
+
assert model.task != "obb", "Paddle OBB bug https://github.com/PaddlePaddle/Paddle/issues/72024"
|
130
130
|
assert not is_end2end, "End-to-end models not supported by PaddlePaddle yet"
|
131
131
|
assert LINUX or MACOS, "Windows Paddle exports not supported yet"
|
132
132
|
if i == 12: # MNN
|
@@ -176,21 +176,38 @@ default_callbacks = {
|
|
176
176
|
|
177
177
|
def get_default_callbacks():
|
178
178
|
"""
|
179
|
-
|
179
|
+
Get the default callbacks for Ultralytics training, validation, prediction, and export processes.
|
180
180
|
|
181
181
|
Returns:
|
182
|
-
(
|
182
|
+
(dict): Dictionary of default callbacks for various training events. Each key in the dictionary represents an
|
183
|
+
event during the training process, and the corresponding value is a list of callback functions that are
|
184
|
+
executed when that event occurs.
|
185
|
+
|
186
|
+
Examples:
|
187
|
+
>>> callbacks = get_default_callbacks()
|
188
|
+
>>> print(list(callbacks.keys())) # show all available callback events
|
189
|
+
['on_pretrain_routine_start', 'on_pretrain_routine_end', ...]
|
183
190
|
"""
|
184
191
|
return defaultdict(list, deepcopy(default_callbacks))
|
185
192
|
|
186
193
|
|
187
194
|
def add_integration_callbacks(instance):
|
188
195
|
"""
|
189
|
-
Add integration callbacks
|
196
|
+
Add integration callbacks to the instance's callbacks dictionary.
|
197
|
+
|
198
|
+
This function loads and adds various integration callbacks to the provided instance. The specific callbacks added
|
199
|
+
depend on the type of instance provided. All instances receive HUB callbacks, while Trainer instances also receive
|
200
|
+
additional callbacks for various integrations like ClearML, Comet, DVC, MLflow, Neptune, Ray Tune, TensorBoard,
|
201
|
+
and Weights & Biases.
|
190
202
|
|
191
203
|
Args:
|
192
|
-
instance (Trainer | Predictor | Validator | Exporter):
|
193
|
-
|
204
|
+
instance (Trainer | Predictor | Validator | Exporter): The object instance to which callbacks will be added.
|
205
|
+
The type of instance determines which callbacks are loaded.
|
206
|
+
|
207
|
+
Examples:
|
208
|
+
>>> from ultralytics.engine.trainer import BaseTrainer
|
209
|
+
>>> trainer = BaseTrainer()
|
210
|
+
>>> add_integration_callbacks(trainer)
|
194
211
|
"""
|
195
212
|
# Load HUB callbacks
|
196
213
|
from .hub import callbacks as hub_cb
|
@@ -155,7 +155,32 @@ def _scale_bounding_box_to_original_image_shape(
|
|
155
155
|
|
156
156
|
|
157
157
|
def _format_ground_truth_annotations_for_detection(img_idx, image_path, batch, class_name_map=None) -> Optional[dict]:
|
158
|
-
"""
|
158
|
+
"""
|
159
|
+
Format ground truth annotations for object detection.
|
160
|
+
|
161
|
+
This function processes ground truth annotations from a batch of images for object detection tasks. It extracts
|
162
|
+
bounding boxes, class labels, and other metadata for a specific image in the batch, and formats them for
|
163
|
+
visualization or evaluation.
|
164
|
+
|
165
|
+
Args:
|
166
|
+
img_idx (int): Index of the image in the batch to process.
|
167
|
+
image_path (str | Path): Path to the image file.
|
168
|
+
batch (dict): Batch dictionary containing detection data with keys:
|
169
|
+
- 'batch_idx': Tensor of batch indices
|
170
|
+
- 'bboxes': Tensor of bounding boxes in normalized xywh format
|
171
|
+
- 'cls': Tensor of class labels
|
172
|
+
- 'ori_shape': Original image shapes
|
173
|
+
- 'resized_shape': Resized image shapes
|
174
|
+
- 'ratio_pad': Ratio and padding information
|
175
|
+
class_name_map (dict | None, optional): Mapping from class indices to class names.
|
176
|
+
|
177
|
+
Returns:
|
178
|
+
(dict | None): Formatted ground truth annotations with the following structure:
|
179
|
+
- 'boxes': List of box coordinates [x, y, width, height]
|
180
|
+
- 'label': Label string with format "gt_{class_name}"
|
181
|
+
- 'score': Confidence score (always 1.0, scaled by _scale_confidence_score)
|
182
|
+
Returns None if no bounding boxes are found for the image.
|
183
|
+
"""
|
159
184
|
indices = batch["batch_idx"] == img_idx
|
160
185
|
bboxes = batch["bboxes"][indices]
|
161
186
|
if len(bboxes) == 0:
|
@@ -284,7 +309,22 @@ def _log_confusion_matrix(experiment, trainer, curr_step, curr_epoch) -> None:
|
|
284
309
|
|
285
310
|
|
286
311
|
def _log_images(experiment, image_paths, curr_step, annotations=None) -> None:
|
287
|
-
"""
|
312
|
+
"""
|
313
|
+
Log images to the experiment with optional annotations.
|
314
|
+
|
315
|
+
This function logs images to a Comet ML experiment, optionally including annotation data for visualization
|
316
|
+
such as bounding boxes or segmentation masks.
|
317
|
+
|
318
|
+
Args:
|
319
|
+
experiment (comet_ml.Experiment): The Comet ML experiment to log images to.
|
320
|
+
image_paths (List[Path]): List of paths to images that will be logged.
|
321
|
+
curr_step (int): Current training step/iteration for tracking in the experiment timeline.
|
322
|
+
annotations (List[List[dict]], optional): Nested list of annotation dictionaries for each image. Each annotation
|
323
|
+
contains visualization data like bounding boxes, labels, and confidence scores.
|
324
|
+
|
325
|
+
Returns:
|
326
|
+
None
|
327
|
+
"""
|
288
328
|
if annotations:
|
289
329
|
for image_path, annotation in zip(image_paths, annotations):
|
290
330
|
experiment.log_image(image_path, name=image_path.stem, step=curr_step, annotations=annotation)
|
@@ -295,7 +335,23 @@ def _log_images(experiment, image_paths, curr_step, annotations=None) -> None:
|
|
295
335
|
|
296
336
|
|
297
337
|
def _log_image_predictions(experiment, validator, curr_step) -> None:
|
298
|
-
"""
|
338
|
+
"""
|
339
|
+
Log predicted boxes for a single image during training.
|
340
|
+
|
341
|
+
This function logs image predictions to a Comet ML experiment during model validation. It processes
|
342
|
+
validation data and formats both ground truth and prediction annotations for visualization in the Comet
|
343
|
+
dashboard. The function respects configured limits on the number of images to log.
|
344
|
+
|
345
|
+
Args:
|
346
|
+
experiment (comet_ml.Experiment): The Comet ML experiment to log to.
|
347
|
+
validator (BaseValidator): The validator instance containing validation data and predictions.
|
348
|
+
curr_step (int): The current training step for logging timeline.
|
349
|
+
|
350
|
+
Notes:
|
351
|
+
This function uses global state to track the number of logged predictions across calls.
|
352
|
+
It only logs predictions for supported tasks defined in COMET_SUPPORTED_TASKS.
|
353
|
+
The number of logged images is limited by the COMET_MAX_IMAGE_PREDICTIONS environment variable.
|
354
|
+
"""
|
299
355
|
global _comet_image_prediction_count
|
300
356
|
|
301
357
|
task = validator.args.task
|
@@ -342,7 +398,22 @@ def _log_image_predictions(experiment, validator, curr_step) -> None:
|
|
342
398
|
|
343
399
|
|
344
400
|
def _log_plots(experiment, trainer) -> None:
|
345
|
-
"""
|
401
|
+
"""
|
402
|
+
Log evaluation plots and label plots for the experiment.
|
403
|
+
|
404
|
+
This function logs various evaluation plots and confusion matrices to the experiment tracking system. It handles
|
405
|
+
different types of metrics (SegmentMetrics, PoseMetrics, DetMetrics, OBBMetrics) and logs the appropriate plots
|
406
|
+
for each type.
|
407
|
+
|
408
|
+
Args:
|
409
|
+
experiment (comet_ml.Experiment): The Comet ML experiment to log plots to.
|
410
|
+
trainer (ultralytics.engine.trainer.BaseTrainer): The trainer object containing validation metrics and save
|
411
|
+
directory information.
|
412
|
+
|
413
|
+
Examples:
|
414
|
+
>>> from ultralytics.utils.callbacks.comet import _log_plots
|
415
|
+
>>> _log_plots(experiment, trainer)
|
416
|
+
"""
|
346
417
|
plot_filenames = None
|
347
418
|
if isinstance(trainer.validator.metrics, SegmentMetrics) and trainer.validator.metrics.task == "segment":
|
348
419
|
plot_filenames = [
|
@@ -401,7 +472,24 @@ def on_train_epoch_end(trainer) -> None:
|
|
401
472
|
|
402
473
|
|
403
474
|
def on_fit_epoch_end(trainer) -> None:
|
404
|
-
"""
|
475
|
+
"""
|
476
|
+
Log model assets at the end of each epoch during training.
|
477
|
+
|
478
|
+
This function is called at the end of each training epoch to log metrics, learning rates, and model information
|
479
|
+
to a Comet ML experiment. It also logs model assets, confusion matrices, and image predictions based on
|
480
|
+
configuration settings.
|
481
|
+
|
482
|
+
The function retrieves the current Comet ML experiment and logs various training metrics. If it's the first epoch,
|
483
|
+
it also logs model information. On specified save intervals, it logs the model, confusion matrix (if enabled),
|
484
|
+
and image predictions (if enabled).
|
485
|
+
|
486
|
+
Args:
|
487
|
+
trainer (BaseTrainer): The YOLO trainer object containing training state, metrics, and configuration.
|
488
|
+
|
489
|
+
Examples:
|
490
|
+
>>> # Inside a training loop
|
491
|
+
>>> on_fit_epoch_end(trainer) # Log metrics and assets to Comet ML
|
492
|
+
"""
|
405
493
|
experiment = comet_ml.get_running_experiment()
|
406
494
|
if not experiment:
|
407
495
|
return
|
@@ -27,7 +27,21 @@ except (ImportError, AssertionError, TypeError):
|
|
27
27
|
|
28
28
|
|
29
29
|
def _log_images(path: Path, prefix: str = "") -> None:
|
30
|
-
"""
|
30
|
+
"""
|
31
|
+
Log images at specified path with an optional prefix using DVCLive.
|
32
|
+
|
33
|
+
This function logs images found at the given path to DVCLive, organizing them by batch to enable slider
|
34
|
+
functionality in the UI. It processes image filenames to extract batch information and restructures the path
|
35
|
+
accordingly.
|
36
|
+
|
37
|
+
Args:
|
38
|
+
path (Path): Path to the image file to be logged.
|
39
|
+
prefix (str): Optional prefix to add to the image name when logging.
|
40
|
+
|
41
|
+
Examples:
|
42
|
+
>>> from pathlib import Path
|
43
|
+
>>> _log_images(Path("runs/train/exp/val_batch0_pred.jpg"), prefix="validation")
|
44
|
+
"""
|
31
45
|
if live:
|
32
46
|
name = path.name
|
33
47
|
|
@@ -41,7 +55,13 @@ def _log_images(path: Path, prefix: str = "") -> None:
|
|
41
55
|
|
42
56
|
|
43
57
|
def _log_plots(plots: dict, prefix: str = "") -> None:
|
44
|
-
"""
|
58
|
+
"""
|
59
|
+
Log plot images for training progress if they have not been previously processed.
|
60
|
+
|
61
|
+
Args:
|
62
|
+
plots (dict): Dictionary containing plot information with timestamps.
|
63
|
+
prefix (str, optional): Optional prefix to add to the logged image paths.
|
64
|
+
"""
|
45
65
|
for name, params in plots.items():
|
46
66
|
timestamp = params["timestamp"]
|
47
67
|
if _processed_plots.get(name) != timestamp:
|
@@ -50,7 +70,19 @@ def _log_plots(plots: dict, prefix: str = "") -> None:
|
|
50
70
|
|
51
71
|
|
52
72
|
def _log_confusion_matrix(validator) -> None:
|
53
|
-
"""
|
73
|
+
"""
|
74
|
+
Log confusion matrix for a validator using DVCLive.
|
75
|
+
|
76
|
+
This function processes the confusion matrix from a validator object and logs it to DVCLive by converting
|
77
|
+
the matrix into lists of target and prediction labels.
|
78
|
+
|
79
|
+
Args:
|
80
|
+
validator (BaseValidator): The validator object containing the confusion matrix and class names.
|
81
|
+
Must have attributes: confusion_matrix.matrix, confusion_matrix.task, and names.
|
82
|
+
|
83
|
+
Returns:
|
84
|
+
None
|
85
|
+
"""
|
54
86
|
targets = []
|
55
87
|
preds = []
|
56
88
|
matrix = validator.confusion_matrix.matrix
|
@@ -94,7 +126,20 @@ def on_train_epoch_start(trainer) -> None:
|
|
94
126
|
|
95
127
|
|
96
128
|
def on_fit_epoch_end(trainer) -> None:
|
97
|
-
"""
|
129
|
+
"""
|
130
|
+
Log training metrics, model info, and advance to next step at the end of each fit epoch.
|
131
|
+
|
132
|
+
This function is called at the end of each fit epoch during training. It logs various metrics including
|
133
|
+
training loss items, validation metrics, and learning rates. On the first epoch, it also logs model
|
134
|
+
information. Additionally, it logs training and validation plots and advances the DVCLive step counter.
|
135
|
+
|
136
|
+
Args:
|
137
|
+
trainer (BaseTrainer): The trainer object containing training state, metrics, and plots.
|
138
|
+
|
139
|
+
Notes:
|
140
|
+
This function only performs logging operations when DVCLive logging is active and during a training epoch.
|
141
|
+
The global variable _training_epoch is used to track whether the current epoch is a training epoch.
|
142
|
+
"""
|
98
143
|
global _training_epoch
|
99
144
|
if live and _training_epoch:
|
100
145
|
all_metrics = {**trainer.label_loss_items(trainer.tloss, prefix="train"), **trainer.metrics, **trainer.lr}
|
@@ -115,7 +160,21 @@ def on_fit_epoch_end(trainer) -> None:
|
|
115
160
|
|
116
161
|
|
117
162
|
def on_train_end(trainer) -> None:
|
118
|
-
"""
|
163
|
+
"""
|
164
|
+
Log best metrics, plots, and confusion matrix at the end of training.
|
165
|
+
|
166
|
+
This function is called at the conclusion of the training process to log final metrics, visualizations, and
|
167
|
+
model artifacts if DVCLive logging is active. It captures the best model performance metrics, training plots,
|
168
|
+
validation plots, and confusion matrix for later analysis.
|
169
|
+
|
170
|
+
Args:
|
171
|
+
trainer (BaseTrainer): The trainer object containing training state, metrics, and validation results.
|
172
|
+
|
173
|
+
Examples:
|
174
|
+
>>> # Inside a custom training loop
|
175
|
+
>>> from ultralytics.utils.callbacks.dvc import on_train_end
|
176
|
+
>>> on_train_end(trainer) # Log final metrics and artifacts
|
177
|
+
"""
|
119
178
|
if live:
|
120
179
|
# At the end log the best metrics. It runs validator on the best model internally.
|
121
180
|
all_metrics = {**trainer.label_loss_items(trainer.tloss, prefix="train"), **trainer.metrics, **trainer.lr}
|
@@ -19,14 +19,37 @@ except (ImportError, AssertionError):
|
|
19
19
|
|
20
20
|
|
21
21
|
def _log_scalars(scalars: dict, step: int = 0) -> None:
|
22
|
-
"""
|
22
|
+
"""
|
23
|
+
Log scalars to the NeptuneAI experiment logger.
|
24
|
+
|
25
|
+
Args:
|
26
|
+
scalars (dict): Dictionary of scalar values to log to NeptuneAI.
|
27
|
+
step (int): The current step or iteration number for logging.
|
28
|
+
|
29
|
+
Examples:
|
30
|
+
>>> metrics = {"mAP": 0.85, "loss": 0.32}
|
31
|
+
>>> _log_scalars(metrics, step=100)
|
32
|
+
"""
|
23
33
|
if run:
|
24
34
|
for k, v in scalars.items():
|
25
35
|
run[k].append(value=v, step=step)
|
26
36
|
|
27
37
|
|
28
38
|
def _log_images(imgs_dict: dict, group: str = "") -> None:
|
29
|
-
"""
|
39
|
+
"""
|
40
|
+
Log images to the NeptuneAI experiment logger.
|
41
|
+
|
42
|
+
This function logs image data to Neptune.ai when a valid Neptune run is active. Images are organized
|
43
|
+
under the specified group name.
|
44
|
+
|
45
|
+
Args:
|
46
|
+
imgs_dict (dict): Dictionary of images to log, with keys as image names and values as image data.
|
47
|
+
group (str, optional): Group name to organize images under in the Neptune UI.
|
48
|
+
|
49
|
+
Examples:
|
50
|
+
>>> # Log validation images
|
51
|
+
>>> _log_images({"val_batch": img_tensor}, group="validation")
|
52
|
+
"""
|
30
53
|
if run:
|
31
54
|
for k, v in imgs_dict.items():
|
32
55
|
run[f"{group}/{k}"].upload(File(v))
|