ultralytics 8.2.81__py3-none-any.whl → 8.2.83__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ultralytics might be problematic. Click here for more details.
- tests/test_solutions.py +0 -4
- ultralytics/__init__.py +1 -1
- ultralytics/cfg/__init__.py +21 -21
- ultralytics/data/annotator.py +1 -1
- ultralytics/data/augment.py +58 -58
- ultralytics/data/base.py +3 -3
- ultralytics/data/converter.py +7 -8
- ultralytics/data/explorer/explorer.py +7 -23
- ultralytics/data/loaders.py +2 -2
- ultralytics/data/split_dota.py +11 -3
- ultralytics/data/utils.py +6 -10
- ultralytics/engine/exporter.py +2 -4
- ultralytics/engine/model.py +47 -47
- ultralytics/engine/predictor.py +1 -1
- ultralytics/engine/results.py +28 -28
- ultralytics/engine/trainer.py +11 -8
- ultralytics/engine/tuner.py +7 -8
- ultralytics/engine/validator.py +3 -5
- ultralytics/hub/__init__.py +5 -5
- ultralytics/hub/auth.py +6 -2
- ultralytics/hub/session.py +3 -5
- ultralytics/models/fastsam/model.py +13 -10
- ultralytics/models/fastsam/predict.py +2 -2
- ultralytics/models/fastsam/utils.py +0 -1
- ultralytics/models/nas/model.py +4 -4
- ultralytics/models/nas/predict.py +1 -2
- ultralytics/models/nas/val.py +1 -1
- ultralytics/models/rtdetr/predict.py +1 -1
- ultralytics/models/rtdetr/train.py +1 -1
- ultralytics/models/rtdetr/val.py +1 -1
- ultralytics/models/sam/model.py +11 -11
- ultralytics/models/sam/modules/decoders.py +7 -4
- ultralytics/models/sam/modules/sam.py +9 -1
- ultralytics/models/sam/modules/tiny_encoder.py +1 -1
- ultralytics/models/sam/modules/transformer.py +0 -2
- ultralytics/models/sam/modules/utils.py +1 -1
- ultralytics/models/sam/predict.py +10 -10
- ultralytics/models/utils/loss.py +29 -17
- ultralytics/models/utils/ops.py +1 -5
- ultralytics/models/yolo/classify/predict.py +1 -1
- ultralytics/models/yolo/classify/train.py +1 -1
- ultralytics/models/yolo/classify/val.py +1 -1
- ultralytics/models/yolo/detect/predict.py +1 -1
- ultralytics/models/yolo/detect/train.py +1 -1
- ultralytics/models/yolo/detect/val.py +1 -1
- ultralytics/models/yolo/model.py +6 -2
- ultralytics/models/yolo/obb/predict.py +1 -1
- ultralytics/models/yolo/obb/train.py +1 -1
- ultralytics/models/yolo/obb/val.py +2 -2
- ultralytics/models/yolo/pose/predict.py +1 -1
- ultralytics/models/yolo/pose/train.py +1 -1
- ultralytics/models/yolo/pose/val.py +1 -1
- ultralytics/models/yolo/segment/predict.py +1 -1
- ultralytics/models/yolo/segment/train.py +1 -1
- ultralytics/models/yolo/segment/val.py +1 -1
- ultralytics/models/yolo/world/train.py +1 -1
- ultralytics/nn/autobackend.py +2 -2
- ultralytics/nn/modules/__init__.py +2 -2
- ultralytics/nn/modules/block.py +8 -20
- ultralytics/nn/modules/conv.py +1 -3
- ultralytics/nn/modules/head.py +16 -31
- ultralytics/nn/modules/transformer.py +0 -1
- ultralytics/nn/modules/utils.py +0 -1
- ultralytics/nn/tasks.py +11 -9
- ultralytics/solutions/__init__.py +1 -0
- ultralytics/solutions/ai_gym.py +0 -2
- ultralytics/solutions/analytics.py +1 -6
- ultralytics/solutions/heatmap.py +0 -1
- ultralytics/solutions/object_counter.py +0 -2
- ultralytics/solutions/queue_management.py +0 -2
- ultralytics/trackers/basetrack.py +1 -1
- ultralytics/trackers/byte_tracker.py +2 -2
- ultralytics/trackers/utils/gmc.py +5 -5
- ultralytics/trackers/utils/kalman_filter.py +1 -1
- ultralytics/trackers/utils/matching.py +1 -5
- ultralytics/utils/__init__.py +137 -24
- ultralytics/utils/autobatch.py +7 -4
- ultralytics/utils/benchmarks.py +6 -14
- ultralytics/utils/callbacks/base.py +0 -1
- ultralytics/utils/callbacks/comet.py +0 -1
- ultralytics/utils/callbacks/tensorboard.py +0 -1
- ultralytics/utils/checks.py +15 -18
- ultralytics/utils/downloads.py +6 -7
- ultralytics/utils/files.py +3 -4
- ultralytics/utils/instance.py +17 -7
- ultralytics/utils/metrics.py +16 -16
- ultralytics/utils/ops.py +8 -8
- ultralytics/utils/plotting.py +25 -35
- ultralytics/utils/tal.py +27 -18
- ultralytics/utils/torch_utils.py +12 -13
- ultralytics/utils/tuner.py +2 -3
- {ultralytics-8.2.81.dist-info → ultralytics-8.2.83.dist-info}/METADATA +4 -3
- {ultralytics-8.2.81.dist-info → ultralytics-8.2.83.dist-info}/RECORD +97 -97
- {ultralytics-8.2.81.dist-info → ultralytics-8.2.83.dist-info}/WHEEL +1 -1
- {ultralytics-8.2.81.dist-info → ultralytics-8.2.83.dist-info}/LICENSE +0 -0
- {ultralytics-8.2.81.dist-info → ultralytics-8.2.83.dist-info}/entry_points.txt +0 -0
- {ultralytics-8.2.81.dist-info → ultralytics-8.2.83.dist-info}/top_level.txt +0 -0
ultralytics/models/sam/model.py
CHANGED
|
@@ -41,8 +41,8 @@ class SAM(Model):
|
|
|
41
41
|
info: Logs information about the SAM model.
|
|
42
42
|
|
|
43
43
|
Examples:
|
|
44
|
-
>>> sam = SAM(
|
|
45
|
-
>>> results = sam.predict(
|
|
44
|
+
>>> sam = SAM("sam_b.pt")
|
|
45
|
+
>>> results = sam.predict("image.jpg", points=[[500, 375]])
|
|
46
46
|
>>> for r in results:
|
|
47
47
|
>>> print(f"Detected {len(r.masks)} masks")
|
|
48
48
|
"""
|
|
@@ -58,7 +58,7 @@ class SAM(Model):
|
|
|
58
58
|
NotImplementedError: If the model file extension is not .pt or .pth.
|
|
59
59
|
|
|
60
60
|
Examples:
|
|
61
|
-
>>> sam = SAM(
|
|
61
|
+
>>> sam = SAM("sam_b.pt")
|
|
62
62
|
>>> print(sam.is_sam2)
|
|
63
63
|
"""
|
|
64
64
|
if model and Path(model).suffix not in {".pt", ".pth"}:
|
|
@@ -78,8 +78,8 @@ class SAM(Model):
|
|
|
78
78
|
task (str | None): Task name. If provided, it specifies the particular task the model is being loaded for.
|
|
79
79
|
|
|
80
80
|
Examples:
|
|
81
|
-
>>> sam = SAM(
|
|
82
|
-
>>> sam._load(
|
|
81
|
+
>>> sam = SAM("sam_b.pt")
|
|
82
|
+
>>> sam._load("path/to/custom_weights.pt")
|
|
83
83
|
"""
|
|
84
84
|
self.model = build_sam(weights)
|
|
85
85
|
|
|
@@ -100,8 +100,8 @@ class SAM(Model):
|
|
|
100
100
|
(List): The model predictions.
|
|
101
101
|
|
|
102
102
|
Examples:
|
|
103
|
-
>>> sam = SAM(
|
|
104
|
-
>>> results = sam.predict(
|
|
103
|
+
>>> sam = SAM("sam_b.pt")
|
|
104
|
+
>>> results = sam.predict("image.jpg", points=[[500, 375]])
|
|
105
105
|
>>> for r in results:
|
|
106
106
|
... print(f"Detected {len(r.masks)} masks")
|
|
107
107
|
"""
|
|
@@ -130,8 +130,8 @@ class SAM(Model):
|
|
|
130
130
|
(List): The model predictions, typically containing segmentation masks and other relevant information.
|
|
131
131
|
|
|
132
132
|
Examples:
|
|
133
|
-
>>> sam = SAM(
|
|
134
|
-
>>> results = sam(
|
|
133
|
+
>>> sam = SAM("sam_b.pt")
|
|
134
|
+
>>> results = sam("image.jpg", points=[[500, 375]])
|
|
135
135
|
>>> print(f"Detected {len(results[0].masks)} masks")
|
|
136
136
|
"""
|
|
137
137
|
return self.predict(source, stream, bboxes, points, labels, **kwargs)
|
|
@@ -151,7 +151,7 @@ class SAM(Model):
|
|
|
151
151
|
(Tuple): A tuple containing the model's information (string representations of the model).
|
|
152
152
|
|
|
153
153
|
Examples:
|
|
154
|
-
>>> sam = SAM(
|
|
154
|
+
>>> sam = SAM("sam_b.pt")
|
|
155
155
|
>>> info = sam.info()
|
|
156
156
|
>>> print(info[0]) # Print summary information
|
|
157
157
|
"""
|
|
@@ -167,7 +167,7 @@ class SAM(Model):
|
|
|
167
167
|
class. For SAM2 models, it maps to SAM2Predictor, otherwise to the standard Predictor.
|
|
168
168
|
|
|
169
169
|
Examples:
|
|
170
|
-
>>> sam = SAM(
|
|
170
|
+
>>> sam = SAM("sam_b.pt")
|
|
171
171
|
>>> task_map = sam.task_map
|
|
172
172
|
>>> print(task_map)
|
|
173
173
|
{'segment': <class 'ultralytics.models.sam.predict.Predictor'>}
|
|
@@ -32,8 +32,9 @@ class MaskDecoder(nn.Module):
|
|
|
32
32
|
|
|
33
33
|
Examples:
|
|
34
34
|
>>> decoder = MaskDecoder(transformer_dim=256, transformer=transformer_module)
|
|
35
|
-
>>> masks, iou_pred = decoder(
|
|
36
|
-
...
|
|
35
|
+
>>> masks, iou_pred = decoder(
|
|
36
|
+
... image_embeddings, image_pe, sparse_prompt_embeddings, dense_prompt_embeddings, multimask_output=True
|
|
37
|
+
... )
|
|
37
38
|
>>> print(f"Predicted masks shape: {masks.shape}, IoU predictions shape: {iou_pred.shape}")
|
|
38
39
|
"""
|
|
39
40
|
|
|
@@ -213,7 +214,8 @@ class SAM2MaskDecoder(nn.Module):
|
|
|
213
214
|
>>> dense_prompt_embeddings = torch.rand(1, 256, 64, 64)
|
|
214
215
|
>>> decoder = SAM2MaskDecoder(256, transformer)
|
|
215
216
|
>>> masks, iou_pred, sam_tokens_out, obj_score_logits = decoder.forward(
|
|
216
|
-
... image_embeddings, image_pe, sparse_prompt_embeddings, dense_prompt_embeddings, True, False
|
|
217
|
+
... image_embeddings, image_pe, sparse_prompt_embeddings, dense_prompt_embeddings, True, False
|
|
218
|
+
... )
|
|
217
219
|
"""
|
|
218
220
|
|
|
219
221
|
def __init__(
|
|
@@ -345,7 +347,8 @@ class SAM2MaskDecoder(nn.Module):
|
|
|
345
347
|
>>> dense_prompt_embeddings = torch.rand(1, 256, 64, 64)
|
|
346
348
|
>>> decoder = SAM2MaskDecoder(256, transformer)
|
|
347
349
|
>>> masks, iou_pred, sam_tokens_out, obj_score_logits = decoder.forward(
|
|
348
|
-
... image_embeddings, image_pe, sparse_prompt_embeddings, dense_prompt_embeddings, True, False
|
|
350
|
+
... image_embeddings, image_pe, sparse_prompt_embeddings, dense_prompt_embeddings, True, False
|
|
351
|
+
... )
|
|
349
352
|
"""
|
|
350
353
|
masks, iou_pred, mask_tokens_out, object_score_logits = self.predict_masks(
|
|
351
354
|
image_embeddings=image_embeddings,
|
|
@@ -417,7 +417,15 @@ class SAM2Model(torch.nn.Module):
|
|
|
417
417
|
>>> point_inputs = {"point_coords": torch.rand(1, 2, 2), "point_labels": torch.tensor([[1, 0]])}
|
|
418
418
|
>>> mask_inputs = torch.rand(1, 1, 512, 512)
|
|
419
419
|
>>> results = model._forward_sam_heads(backbone_features, point_inputs, mask_inputs)
|
|
420
|
-
>>>
|
|
420
|
+
>>> (
|
|
421
|
+
... low_res_multimasks,
|
|
422
|
+
... high_res_multimasks,
|
|
423
|
+
... ious,
|
|
424
|
+
... low_res_masks,
|
|
425
|
+
... high_res_masks,
|
|
426
|
+
... obj_ptr,
|
|
427
|
+
... object_score_logits,
|
|
428
|
+
... ) = results
|
|
421
429
|
"""
|
|
422
430
|
B = backbone_features.size(0)
|
|
423
431
|
device = backbone_features.device
|
|
@@ -716,7 +716,7 @@ class BasicLayer(nn.Module):
|
|
|
716
716
|
|
|
717
717
|
Examples:
|
|
718
718
|
>>> layer = BasicLayer(dim=96, input_resolution=(56, 56), depth=2, num_heads=3, window_size=7)
|
|
719
|
-
>>> x = torch.randn(1, 56*56, 96)
|
|
719
|
+
>>> x = torch.randn(1, 56 * 56, 96)
|
|
720
720
|
>>> output = layer(x)
|
|
721
721
|
>>> print(output.shape)
|
|
722
722
|
"""
|
|
@@ -232,7 +232,6 @@ class TwoWayAttentionBlock(nn.Module):
|
|
|
232
232
|
|
|
233
233
|
def forward(self, queries: Tensor, keys: Tensor, query_pe: Tensor, key_pe: Tensor) -> Tuple[Tensor, Tensor]:
|
|
234
234
|
"""Applies two-way attention to process query and key embeddings in a transformer block."""
|
|
235
|
-
|
|
236
235
|
# Self attention block
|
|
237
236
|
if self.skip_first_layer_pe:
|
|
238
237
|
queries = self.self_attn(q=queries, k=queries, v=queries)
|
|
@@ -353,7 +352,6 @@ class Attention(nn.Module):
|
|
|
353
352
|
|
|
354
353
|
def forward(self, q: Tensor, k: Tensor, v: Tensor) -> Tensor:
|
|
355
354
|
"""Applies multi-head attention to query, key, and value tensors with optional downsampling."""
|
|
356
|
-
|
|
357
355
|
# Input projections
|
|
358
356
|
q = self.q_proj(q)
|
|
359
357
|
k = self.k_proj(k)
|
|
@@ -22,7 +22,7 @@ def select_closest_cond_frames(frame_idx, cond_frame_outputs, max_cond_frame_num
|
|
|
22
22
|
|
|
23
23
|
Examples:
|
|
24
24
|
>>> frame_idx = 5
|
|
25
|
-
>>> cond_frame_outputs = {1:
|
|
25
|
+
>>> cond_frame_outputs = {1: "a", 3: "b", 7: "c", 9: "d"}
|
|
26
26
|
>>> max_cond_frame_num = 2
|
|
27
27
|
>>> selected, unselected = select_closest_cond_frames(frame_idx, cond_frame_outputs, max_cond_frame_num)
|
|
28
28
|
>>> print(selected)
|
|
@@ -69,8 +69,8 @@ class Predictor(BasePredictor):
|
|
|
69
69
|
|
|
70
70
|
Examples:
|
|
71
71
|
>>> predictor = Predictor()
|
|
72
|
-
>>> predictor.setup_model(model_path=
|
|
73
|
-
>>> predictor.set_image(
|
|
72
|
+
>>> predictor.setup_model(model_path="sam_model.pt")
|
|
73
|
+
>>> predictor.set_image("image.jpg")
|
|
74
74
|
>>> masks, scores, boxes = predictor.generate()
|
|
75
75
|
>>> results = predictor.postprocess((masks, scores, boxes), im, orig_img)
|
|
76
76
|
"""
|
|
@@ -90,8 +90,8 @@ class Predictor(BasePredictor):
|
|
|
90
90
|
|
|
91
91
|
Examples:
|
|
92
92
|
>>> predictor = Predictor(cfg=DEFAULT_CFG)
|
|
93
|
-
>>> predictor = Predictor(overrides={
|
|
94
|
-
>>> predictor = Predictor(_callbacks={
|
|
93
|
+
>>> predictor = Predictor(overrides={"imgsz": 640})
|
|
94
|
+
>>> predictor = Predictor(_callbacks={"on_predict_start": custom_callback})
|
|
95
95
|
"""
|
|
96
96
|
if overrides is None:
|
|
97
97
|
overrides = {}
|
|
@@ -188,8 +188,8 @@ class Predictor(BasePredictor):
|
|
|
188
188
|
|
|
189
189
|
Examples:
|
|
190
190
|
>>> predictor = Predictor()
|
|
191
|
-
>>> predictor.setup_model(model_path=
|
|
192
|
-
>>> predictor.set_image(
|
|
191
|
+
>>> predictor.setup_model(model_path="sam_model.pt")
|
|
192
|
+
>>> predictor.set_image("image.jpg")
|
|
193
193
|
>>> masks, scores, logits = predictor.inference(im, bboxes=[[0, 0, 100, 100]])
|
|
194
194
|
"""
|
|
195
195
|
# Override prompts if any stored in self.prompts
|
|
@@ -475,8 +475,8 @@ class Predictor(BasePredictor):
|
|
|
475
475
|
|
|
476
476
|
Examples:
|
|
477
477
|
>>> predictor = Predictor()
|
|
478
|
-
>>> predictor.setup_source(
|
|
479
|
-
>>> predictor.setup_source(
|
|
478
|
+
>>> predictor.setup_source("path/to/images")
|
|
479
|
+
>>> predictor.setup_source("video.mp4")
|
|
480
480
|
>>> predictor.setup_source(None) # Uses default source if available
|
|
481
481
|
|
|
482
482
|
Notes:
|
|
@@ -504,8 +504,8 @@ class Predictor(BasePredictor):
|
|
|
504
504
|
|
|
505
505
|
Examples:
|
|
506
506
|
>>> predictor = Predictor()
|
|
507
|
-
>>> predictor.set_image(
|
|
508
|
-
>>> predictor.set_image(cv2.imread(
|
|
507
|
+
>>> predictor.set_image("path/to/image.jpg")
|
|
508
|
+
>>> predictor.set_image(cv2.imread("path/to/image.jpg"))
|
|
509
509
|
|
|
510
510
|
Notes:
|
|
511
511
|
- This method should be called before performing inference on a new image.
|
ultralytics/models/utils/loss.py
CHANGED
|
@@ -34,15 +34,19 @@ class DETRLoss(nn.Module):
|
|
|
34
34
|
self, nc=80, loss_gain=None, aux_loss=True, use_fl=True, use_vfl=False, use_uni_match=False, uni_match_ind=0
|
|
35
35
|
):
|
|
36
36
|
"""
|
|
37
|
-
DETR loss function.
|
|
37
|
+
Initialize DETR loss function with customizable components and gains.
|
|
38
|
+
|
|
39
|
+
Uses default loss_gain if not provided. Initializes HungarianMatcher with
|
|
40
|
+
preset cost gains. Supports auxiliary losses and various loss types.
|
|
38
41
|
|
|
39
42
|
Args:
|
|
40
|
-
nc (int):
|
|
41
|
-
loss_gain (dict):
|
|
42
|
-
aux_loss (bool):
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
43
|
+
nc (int): Number of classes.
|
|
44
|
+
loss_gain (dict): Coefficients for different loss components.
|
|
45
|
+
aux_loss (bool): Use auxiliary losses from each decoder layer.
|
|
46
|
+
use_fl (bool): Use FocalLoss.
|
|
47
|
+
use_vfl (bool): Use VarifocalLoss.
|
|
48
|
+
use_uni_match (bool): Use fixed layer for auxiliary branch label assignment.
|
|
49
|
+
uni_match_ind (int): Index of fixed layer for uni_match.
|
|
46
50
|
"""
|
|
47
51
|
super().__init__()
|
|
48
52
|
|
|
@@ -82,9 +86,7 @@ class DETRLoss(nn.Module):
|
|
|
82
86
|
return {name_class: loss_cls.squeeze() * self.loss_gain["class"]}
|
|
83
87
|
|
|
84
88
|
def _get_loss_bbox(self, pred_bboxes, gt_bboxes, postfix=""):
|
|
85
|
-
"""
|
|
86
|
-
boxes.
|
|
87
|
-
"""
|
|
89
|
+
"""Computes bounding box and GIoU losses for predicted and ground truth bounding boxes."""
|
|
88
90
|
# Boxes: [b, query, 4], gt_bbox: list[[n, 4]]
|
|
89
91
|
name_bbox = f"loss_bbox{postfix}"
|
|
90
92
|
name_giou = f"loss_giou{postfix}"
|
|
@@ -250,14 +252,24 @@ class DETRLoss(nn.Module):
|
|
|
250
252
|
|
|
251
253
|
def forward(self, pred_bboxes, pred_scores, batch, postfix="", **kwargs):
|
|
252
254
|
"""
|
|
255
|
+
Calculate loss for predicted bounding boxes and scores.
|
|
256
|
+
|
|
253
257
|
Args:
|
|
254
|
-
pred_bboxes (torch.Tensor): [l, b, query, 4]
|
|
255
|
-
pred_scores (torch.Tensor): [l, b, query, num_classes]
|
|
256
|
-
batch (dict):
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
gt_groups (List
|
|
260
|
-
postfix (str):
|
|
258
|
+
pred_bboxes (torch.Tensor): Predicted bounding boxes, shape [l, b, query, 4].
|
|
259
|
+
pred_scores (torch.Tensor): Predicted class scores, shape [l, b, query, num_classes].
|
|
260
|
+
batch (dict): Batch information containing:
|
|
261
|
+
cls (torch.Tensor): Ground truth classes, shape [num_gts].
|
|
262
|
+
bboxes (torch.Tensor): Ground truth bounding boxes, shape [num_gts, 4].
|
|
263
|
+
gt_groups (List[int]): Number of ground truths for each image in the batch.
|
|
264
|
+
postfix (str): Postfix for loss names.
|
|
265
|
+
**kwargs (Any): Additional arguments, may include 'match_indices'.
|
|
266
|
+
|
|
267
|
+
Returns:
|
|
268
|
+
(dict): Computed losses, including main and auxiliary (if enabled).
|
|
269
|
+
|
|
270
|
+
Note:
|
|
271
|
+
Uses last elements of pred_bboxes and pred_scores for main loss, and the rest for auxiliary losses if
|
|
272
|
+
self.aux_loss is True.
|
|
261
273
|
"""
|
|
262
274
|
self.device = pred_bboxes.device
|
|
263
275
|
match_indices = kwargs.get("match_indices", None)
|
ultralytics/models/utils/ops.py
CHANGED
|
@@ -32,9 +32,7 @@ class HungarianMatcher(nn.Module):
|
|
|
32
32
|
"""
|
|
33
33
|
|
|
34
34
|
def __init__(self, cost_gain=None, use_fl=True, with_mask=False, num_sample_points=12544, alpha=0.25, gamma=2.0):
|
|
35
|
-
"""Initializes HungarianMatcher
|
|
36
|
-
gamma factors.
|
|
37
|
-
"""
|
|
35
|
+
"""Initializes a HungarianMatcher module for optimal assignment of predicted and ground truth bounding boxes."""
|
|
38
36
|
super().__init__()
|
|
39
37
|
if cost_gain is None:
|
|
40
38
|
cost_gain = {"class": 1, "bbox": 5, "giou": 2, "mask": 1, "dice": 1}
|
|
@@ -70,7 +68,6 @@ class HungarianMatcher(nn.Module):
|
|
|
70
68
|
For each batch element, it holds:
|
|
71
69
|
len(index_i) = len(index_j) = min(num_queries, num_target_boxes)
|
|
72
70
|
"""
|
|
73
|
-
|
|
74
71
|
bs, nq, nc = pred_scores.shape
|
|
75
72
|
|
|
76
73
|
if sum(gt_groups) == 0:
|
|
@@ -175,7 +172,6 @@ def get_cdn_group(
|
|
|
175
172
|
bounding boxes, attention mask and meta information for denoising. If not in training mode or 'num_dn'
|
|
176
173
|
is less than or equal to 0, the function returns None for all elements in the tuple.
|
|
177
174
|
"""
|
|
178
|
-
|
|
179
175
|
if (not training) or num_dn <= 0:
|
|
180
176
|
return None, None, None, None
|
|
181
177
|
gt_groups = batch["gt_groups"]
|
|
@@ -21,7 +21,7 @@ class ClassificationPredictor(BasePredictor):
|
|
|
21
21
|
from ultralytics.utils import ASSETS
|
|
22
22
|
from ultralytics.models.yolo.classify import ClassificationPredictor
|
|
23
23
|
|
|
24
|
-
args = dict(model=
|
|
24
|
+
args = dict(model="yolov8n-cls.pt", source=ASSETS)
|
|
25
25
|
predictor = ClassificationPredictor(overrides=args)
|
|
26
26
|
predictor.predict_cli()
|
|
27
27
|
```
|
|
@@ -22,7 +22,7 @@ class ClassificationTrainer(BaseTrainer):
|
|
|
22
22
|
```python
|
|
23
23
|
from ultralytics.models.yolo.classify import ClassificationTrainer
|
|
24
24
|
|
|
25
|
-
args = dict(model=
|
|
25
|
+
args = dict(model="yolov8n-cls.pt", data="imagenet10", epochs=3)
|
|
26
26
|
trainer = ClassificationTrainer(overrides=args)
|
|
27
27
|
trainer.train()
|
|
28
28
|
```
|
|
@@ -20,7 +20,7 @@ class ClassificationValidator(BaseValidator):
|
|
|
20
20
|
```python
|
|
21
21
|
from ultralytics.models.yolo.classify import ClassificationValidator
|
|
22
22
|
|
|
23
|
-
args = dict(model=
|
|
23
|
+
args = dict(model="yolov8n-cls.pt", data="imagenet10")
|
|
24
24
|
validator = ClassificationValidator(args=args)
|
|
25
25
|
validator()
|
|
26
26
|
```
|
|
@@ -14,7 +14,7 @@ class DetectionPredictor(BasePredictor):
|
|
|
14
14
|
from ultralytics.utils import ASSETS
|
|
15
15
|
from ultralytics.models.yolo.detect import DetectionPredictor
|
|
16
16
|
|
|
17
|
-
args = dict(model=
|
|
17
|
+
args = dict(model="yolov8n.pt", source=ASSETS)
|
|
18
18
|
predictor = DetectionPredictor(overrides=args)
|
|
19
19
|
predictor.predict_cli()
|
|
20
20
|
```
|
|
@@ -24,7 +24,7 @@ class DetectionTrainer(BaseTrainer):
|
|
|
24
24
|
```python
|
|
25
25
|
from ultralytics.models.yolo.detect import DetectionTrainer
|
|
26
26
|
|
|
27
|
-
args = dict(model=
|
|
27
|
+
args = dict(model="yolov8n.pt", data="coco8.yaml", epochs=3)
|
|
28
28
|
trainer = DetectionTrainer(overrides=args)
|
|
29
29
|
trainer.train()
|
|
30
30
|
```
|
|
@@ -22,7 +22,7 @@ class DetectionValidator(BaseValidator):
|
|
|
22
22
|
```python
|
|
23
23
|
from ultralytics.models.yolo.detect import DetectionValidator
|
|
24
24
|
|
|
25
|
-
args = dict(model=
|
|
25
|
+
args = dict(model="yolov8n.pt", data="coco8.yaml")
|
|
26
26
|
validator = DetectionValidator(args=args)
|
|
27
27
|
validator()
|
|
28
28
|
```
|
ultralytics/models/yolo/model.py
CHANGED
|
@@ -64,10 +64,14 @@ class YOLOWorld(Model):
|
|
|
64
64
|
|
|
65
65
|
def __init__(self, model="yolov8s-world.pt", verbose=False) -> None:
|
|
66
66
|
"""
|
|
67
|
-
|
|
67
|
+
Initialize YOLOv8-World model with a pre-trained model file.
|
|
68
|
+
|
|
69
|
+
Loads a YOLOv8-World model for object detection. If no custom class names are provided, it assigns default
|
|
70
|
+
COCO class names.
|
|
68
71
|
|
|
69
72
|
Args:
|
|
70
|
-
model (str | Path): Path to the pre-trained model.
|
|
73
|
+
model (str | Path): Path to the pre-trained model file. Supports *.pt and *.yaml formats.
|
|
74
|
+
verbose (bool): If True, prints additional information during initialization.
|
|
71
75
|
"""
|
|
72
76
|
super().__init__(model=model, task="detect", verbose=verbose)
|
|
73
77
|
|
|
@@ -16,7 +16,7 @@ class OBBPredictor(DetectionPredictor):
|
|
|
16
16
|
from ultralytics.utils import ASSETS
|
|
17
17
|
from ultralytics.models.yolo.obb import OBBPredictor
|
|
18
18
|
|
|
19
|
-
args = dict(model=
|
|
19
|
+
args = dict(model="yolov8n-obb.pt", source=ASSETS)
|
|
20
20
|
predictor = OBBPredictor(overrides=args)
|
|
21
21
|
predictor.predict_cli()
|
|
22
22
|
```
|
|
@@ -15,7 +15,7 @@ class OBBTrainer(yolo.detect.DetectionTrainer):
|
|
|
15
15
|
```python
|
|
16
16
|
from ultralytics.models.yolo.obb import OBBTrainer
|
|
17
17
|
|
|
18
|
-
args = dict(model=
|
|
18
|
+
args = dict(model="yolov8n-obb.pt", data="dota8.yaml", epochs=3)
|
|
19
19
|
trainer = OBBTrainer(overrides=args)
|
|
20
20
|
trainer.train()
|
|
21
21
|
```
|
|
@@ -18,9 +18,9 @@ class OBBValidator(DetectionValidator):
|
|
|
18
18
|
```python
|
|
19
19
|
from ultralytics.models.yolo.obb import OBBValidator
|
|
20
20
|
|
|
21
|
-
args = dict(model=
|
|
21
|
+
args = dict(model="yolov8n-obb.pt", data="dota8.yaml")
|
|
22
22
|
validator = OBBValidator(args=args)
|
|
23
|
-
validator(model=args[
|
|
23
|
+
validator(model=args["model"])
|
|
24
24
|
```
|
|
25
25
|
"""
|
|
26
26
|
|
|
@@ -14,7 +14,7 @@ class PosePredictor(DetectionPredictor):
|
|
|
14
14
|
from ultralytics.utils import ASSETS
|
|
15
15
|
from ultralytics.models.yolo.pose import PosePredictor
|
|
16
16
|
|
|
17
|
-
args = dict(model=
|
|
17
|
+
args = dict(model="yolov8n-pose.pt", source=ASSETS)
|
|
18
18
|
predictor = PosePredictor(overrides=args)
|
|
19
19
|
predictor.predict_cli()
|
|
20
20
|
```
|
|
@@ -16,7 +16,7 @@ class PoseTrainer(yolo.detect.DetectionTrainer):
|
|
|
16
16
|
```python
|
|
17
17
|
from ultralytics.models.yolo.pose import PoseTrainer
|
|
18
18
|
|
|
19
|
-
args = dict(model=
|
|
19
|
+
args = dict(model="yolov8n-pose.pt", data="coco8-pose.yaml", epochs=3)
|
|
20
20
|
trainer = PoseTrainer(overrides=args)
|
|
21
21
|
trainer.train()
|
|
22
22
|
```
|
|
@@ -20,7 +20,7 @@ class PoseValidator(DetectionValidator):
|
|
|
20
20
|
```python
|
|
21
21
|
from ultralytics.models.yolo.pose import PoseValidator
|
|
22
22
|
|
|
23
|
-
args = dict(model=
|
|
23
|
+
args = dict(model="yolov8n-pose.pt", data="coco8-pose.yaml")
|
|
24
24
|
validator = PoseValidator(args=args)
|
|
25
25
|
validator()
|
|
26
26
|
```
|
|
@@ -14,7 +14,7 @@ class SegmentationPredictor(DetectionPredictor):
|
|
|
14
14
|
from ultralytics.utils import ASSETS
|
|
15
15
|
from ultralytics.models.yolo.segment import SegmentationPredictor
|
|
16
16
|
|
|
17
|
-
args = dict(model=
|
|
17
|
+
args = dict(model="yolov8n-seg.pt", source=ASSETS)
|
|
18
18
|
predictor = SegmentationPredictor(overrides=args)
|
|
19
19
|
predictor.predict_cli()
|
|
20
20
|
```
|
|
@@ -16,7 +16,7 @@ class SegmentationTrainer(yolo.detect.DetectionTrainer):
|
|
|
16
16
|
```python
|
|
17
17
|
from ultralytics.models.yolo.segment import SegmentationTrainer
|
|
18
18
|
|
|
19
|
-
args = dict(model=
|
|
19
|
+
args = dict(model="yolov8n-seg.pt", data="coco8-seg.yaml", epochs=3)
|
|
20
20
|
trainer = SegmentationTrainer(overrides=args)
|
|
21
21
|
trainer.train()
|
|
22
22
|
```
|
|
@@ -22,7 +22,7 @@ class SegmentationValidator(DetectionValidator):
|
|
|
22
22
|
```python
|
|
23
23
|
from ultralytics.models.yolo.segment import SegmentationValidator
|
|
24
24
|
|
|
25
|
-
args = dict(model=
|
|
25
|
+
args = dict(model="yolov8n-seg.pt", data="coco8-seg.yaml")
|
|
26
26
|
validator = SegmentationValidator(args=args)
|
|
27
27
|
validator()
|
|
28
28
|
```
|
|
@@ -29,7 +29,7 @@ class WorldTrainer(yolo.detect.DetectionTrainer):
|
|
|
29
29
|
```python
|
|
30
30
|
from ultralytics.models.yolo.world import WorldModel
|
|
31
31
|
|
|
32
|
-
args = dict(model=
|
|
32
|
+
args = dict(model="yolov8s-world.pt", data="coco8.yaml", epochs=3)
|
|
33
33
|
trainer = WorldTrainer(overrides=args)
|
|
34
34
|
trainer.train()
|
|
35
35
|
```
|
ultralytics/nn/autobackend.py
CHANGED
|
@@ -641,8 +641,8 @@ class AutoBackend(nn.Module):
|
|
|
641
641
|
@staticmethod
|
|
642
642
|
def _model_type(p="path/to/model.pt"):
|
|
643
643
|
"""
|
|
644
|
-
|
|
645
|
-
|
|
644
|
+
Takes a path to a model file and returns the model type. Possibles types are pt, jit, onnx, xml, engine, coreml,
|
|
645
|
+
saved_model, pb, tflite, edgetpu, tfjs, ncnn or paddle.
|
|
646
646
|
|
|
647
647
|
Args:
|
|
648
648
|
p: path to the model file. Defaults to path/to/model.pt
|
|
@@ -11,9 +11,9 @@ Example:
|
|
|
11
11
|
|
|
12
12
|
x = torch.ones(1, 128, 40, 40)
|
|
13
13
|
m = Conv(128, 128)
|
|
14
|
-
f = f
|
|
14
|
+
f = f"{m._get_name()}.onnx"
|
|
15
15
|
torch.onnx.export(m, x, f)
|
|
16
|
-
os.system(f
|
|
16
|
+
os.system(f"onnxslim {f} {f} && open {f}") # pip install onnxslim
|
|
17
17
|
```
|
|
18
18
|
"""
|
|
19
19
|
|
ultralytics/nn/modules/block.py
CHANGED
|
@@ -204,9 +204,7 @@ class C2(nn.Module):
|
|
|
204
204
|
"""CSP Bottleneck with 2 convolutions."""
|
|
205
205
|
|
|
206
206
|
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
|
|
207
|
-
"""Initializes
|
|
208
|
-
groups, expansion.
|
|
209
|
-
"""
|
|
207
|
+
"""Initializes a CSP Bottleneck with 2 convolutions and optional shortcut connection."""
|
|
210
208
|
super().__init__()
|
|
211
209
|
self.c = int(c2 * e) # hidden channels
|
|
212
210
|
self.cv1 = Conv(c1, 2 * self.c, 1, 1)
|
|
@@ -224,9 +222,7 @@ class C2f(nn.Module):
|
|
|
224
222
|
"""Faster Implementation of CSP Bottleneck with 2 convolutions."""
|
|
225
223
|
|
|
226
224
|
def __init__(self, c1, c2, n=1, shortcut=False, g=1, e=0.5):
|
|
227
|
-
"""
|
|
228
|
-
expansion.
|
|
229
|
-
"""
|
|
225
|
+
"""Initializes a CSP bottleneck with 2 convolutions and n Bottleneck blocks for faster processing."""
|
|
230
226
|
super().__init__()
|
|
231
227
|
self.c = int(c2 * e) # hidden channels
|
|
232
228
|
self.cv1 = Conv(c1, 2 * self.c, 1, 1)
|
|
@@ -335,9 +331,7 @@ class Bottleneck(nn.Module):
|
|
|
335
331
|
"""Standard bottleneck."""
|
|
336
332
|
|
|
337
333
|
def __init__(self, c1, c2, shortcut=True, g=1, k=(3, 3), e=0.5):
|
|
338
|
-
"""Initializes a bottleneck module with
|
|
339
|
-
expansion.
|
|
340
|
-
"""
|
|
334
|
+
"""Initializes a standard bottleneck module with optional shortcut connection and configurable parameters."""
|
|
341
335
|
super().__init__()
|
|
342
336
|
c_ = int(c2 * e) # hidden channels
|
|
343
337
|
self.cv1 = Conv(c1, c_, k[0], 1)
|
|
@@ -345,7 +339,7 @@ class Bottleneck(nn.Module):
|
|
|
345
339
|
self.add = shortcut and c1 == c2
|
|
346
340
|
|
|
347
341
|
def forward(self, x):
|
|
348
|
-
"""
|
|
342
|
+
"""Applies the YOLO FPN to input data."""
|
|
349
343
|
return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x))
|
|
350
344
|
|
|
351
345
|
|
|
@@ -449,9 +443,7 @@ class C2fAttn(nn.Module):
|
|
|
449
443
|
"""C2f module with an additional attn module."""
|
|
450
444
|
|
|
451
445
|
def __init__(self, c1, c2, n=1, ec=128, nh=1, gc=512, shortcut=False, g=1, e=0.5):
|
|
452
|
-
"""
|
|
453
|
-
expansion.
|
|
454
|
-
"""
|
|
446
|
+
"""Initializes C2f module with attention mechanism for enhanced feature extraction and processing."""
|
|
455
447
|
super().__init__()
|
|
456
448
|
self.c = int(c2 * e) # hidden channels
|
|
457
449
|
self.cv1 = Conv(c1, 2 * self.c, 1, 1)
|
|
@@ -521,9 +513,7 @@ class ImagePoolingAttn(nn.Module):
|
|
|
521
513
|
|
|
522
514
|
|
|
523
515
|
class ContrastiveHead(nn.Module):
|
|
524
|
-
"""
|
|
525
|
-
features.
|
|
526
|
-
"""
|
|
516
|
+
"""Implements contrastive learning head for region-text similarity in vision-language models."""
|
|
527
517
|
|
|
528
518
|
def __init__(self):
|
|
529
519
|
"""Initializes ContrastiveHead with specified region-text similarity parameters."""
|
|
@@ -569,16 +559,14 @@ class RepBottleneck(Bottleneck):
|
|
|
569
559
|
"""Rep bottleneck."""
|
|
570
560
|
|
|
571
561
|
def __init__(self, c1, c2, shortcut=True, g=1, k=(3, 3), e=0.5):
|
|
572
|
-
"""Initializes a RepBottleneck module with customizable in/out channels,
|
|
573
|
-
ratio.
|
|
574
|
-
"""
|
|
562
|
+
"""Initializes a RepBottleneck module with customizable in/out channels, shortcuts, groups and expansion."""
|
|
575
563
|
super().__init__(c1, c2, shortcut, g, k, e)
|
|
576
564
|
c_ = int(c2 * e) # hidden channels
|
|
577
565
|
self.cv1 = RepConv(c1, c_, k[0], 1)
|
|
578
566
|
|
|
579
567
|
|
|
580
568
|
class RepCSP(C3):
|
|
581
|
-
"""
|
|
569
|
+
"""Repeatable Cross Stage Partial Network (RepCSP) module for efficient feature extraction."""
|
|
582
570
|
|
|
583
571
|
def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
|
|
584
572
|
"""Initializes RepCSP layer with given channels, repetitions, shortcut, groups and expansion ratio."""
|
ultralytics/nn/modules/conv.py
CHANGED
|
@@ -158,9 +158,7 @@ class GhostConv(nn.Module):
|
|
|
158
158
|
"""Ghost Convolution https://github.com/huawei-noah/ghostnet."""
|
|
159
159
|
|
|
160
160
|
def __init__(self, c1, c2, k=1, s=1, g=1, act=True):
|
|
161
|
-
"""Initializes
|
|
162
|
-
activation.
|
|
163
|
-
"""
|
|
161
|
+
"""Initializes Ghost Convolution module with primary and cheap operations for efficient feature learning."""
|
|
164
162
|
super().__init__()
|
|
165
163
|
c_ = c2 // 2 # hidden channels
|
|
166
164
|
self.cv1 = Conv(c1, c_, k, s, None, g, act=act)
|