ultralytics-opencv-headless 8.4.3__py3-none-any.whl → 8.4.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tests/test_cli.py +10 -3
- tests/test_exports.py +64 -43
- tests/test_python.py +40 -11
- ultralytics/__init__.py +1 -1
- ultralytics/cfg/__init__.py +6 -5
- ultralytics/cfg/datasets/coco12-formats.yaml +101 -0
- ultralytics/cfg/default.yaml +2 -1
- ultralytics/data/augment.py +8 -0
- ultralytics/data/converter.py +32 -9
- ultralytics/data/utils.py +2 -2
- ultralytics/engine/exporter.py +11 -8
- ultralytics/engine/predictor.py +5 -0
- ultralytics/engine/results.py +8 -3
- ultralytics/engine/trainer.py +6 -4
- ultralytics/engine/tuner.py +2 -2
- ultralytics/engine/validator.py +5 -0
- ultralytics/models/sam/predict.py +2 -2
- ultralytics/models/yolo/classify/train.py +14 -1
- ultralytics/models/yolo/detect/train.py +8 -4
- ultralytics/models/yolo/pose/train.py +2 -1
- ultralytics/models/yolo/segment/predict.py +1 -1
- ultralytics/models/yolo/segment/val.py +1 -3
- ultralytics/models/yolo/world/train_world.py +21 -1
- ultralytics/models/yolo/yoloe/train.py +1 -2
- ultralytics/nn/autobackend.py +2 -2
- ultralytics/nn/modules/head.py +13 -2
- ultralytics/nn/tasks.py +18 -0
- ultralytics/solutions/security_alarm.py +1 -1
- ultralytics/trackers/byte_tracker.py +7 -7
- ultralytics/utils/benchmarks.py +3 -9
- ultralytics/utils/callbacks/platform.py +2 -1
- ultralytics/utils/callbacks/tensorboard.py +2 -0
- ultralytics/utils/callbacks/wb.py +6 -1
- ultralytics/utils/dist.py +1 -0
- ultralytics/utils/export/imx.py +21 -9
- ultralytics/utils/loss.py +18 -9
- ultralytics/utils/patches.py +42 -0
- ultralytics/utils/tal.py +15 -5
- ultralytics/utils/torch_utils.py +1 -1
- {ultralytics_opencv_headless-8.4.3.dist-info → ultralytics_opencv_headless-8.4.8.dist-info}/METADATA +12 -13
- {ultralytics_opencv_headless-8.4.3.dist-info → ultralytics_opencv_headless-8.4.8.dist-info}/RECORD +45 -44
- {ultralytics_opencv_headless-8.4.3.dist-info → ultralytics_opencv_headless-8.4.8.dist-info}/WHEEL +1 -1
- {ultralytics_opencv_headless-8.4.3.dist-info → ultralytics_opencv_headless-8.4.8.dist-info}/entry_points.txt +0 -0
- {ultralytics_opencv_headless-8.4.3.dist-info → ultralytics_opencv_headless-8.4.8.dist-info}/licenses/LICENSE +0 -0
- {ultralytics_opencv_headless-8.4.3.dist-info → ultralytics_opencv_headless-8.4.8.dist-info}/top_level.txt +0 -0
ultralytics/engine/predictor.py
CHANGED
|
@@ -387,6 +387,11 @@ class BasePredictor:
|
|
|
387
387
|
model (str | Path | torch.nn.Module, optional): Model to load or use.
|
|
388
388
|
verbose (bool): Whether to print verbose output.
|
|
389
389
|
"""
|
|
390
|
+
if hasattr(model, "end2end"):
|
|
391
|
+
if self.args.end2end is not None:
|
|
392
|
+
model.end2end = self.args.end2end
|
|
393
|
+
if model.end2end:
|
|
394
|
+
model.set_head_attr(max_det=self.args.max_det, agnostic_nms=self.args.agnostic_nms)
|
|
390
395
|
self.model = AutoBackend(
|
|
391
396
|
model=model or self.args.model,
|
|
392
397
|
device=select_device(self.args.device, verbose=verbose),
|
ultralytics/engine/results.py
CHANGED
|
@@ -803,12 +803,17 @@ class Results(SimpleClass, DataExportMixin):
|
|
|
803
803
|
"y": (self.masks.xy[i][:, 1] / h).round(decimals).tolist(),
|
|
804
804
|
}
|
|
805
805
|
if self.keypoints is not None:
|
|
806
|
-
|
|
806
|
+
kpt = self.keypoints[i]
|
|
807
|
+
if kpt.has_visible:
|
|
808
|
+
x, y, visible = kpt.data[0].cpu().unbind(dim=1)
|
|
809
|
+
else:
|
|
810
|
+
x, y = kpt.data[0].cpu().unbind(dim=1)
|
|
807
811
|
result["keypoints"] = {
|
|
808
|
-
"x": (x / w).numpy().round(decimals).tolist(),
|
|
812
|
+
"x": (x / w).numpy().round(decimals).tolist(),
|
|
809
813
|
"y": (y / h).numpy().round(decimals).tolist(),
|
|
810
|
-
"visible": visible.numpy().round(decimals).tolist(),
|
|
811
814
|
}
|
|
815
|
+
if kpt.has_visible:
|
|
816
|
+
result["keypoints"]["visible"] = visible.numpy().round(decimals).tolist()
|
|
812
817
|
results.append(result)
|
|
813
818
|
|
|
814
819
|
return results
|
ultralytics/engine/trainer.py
CHANGED
|
@@ -948,7 +948,7 @@ class BaseTrainer:
|
|
|
948
948
|
)
|
|
949
949
|
nc = self.data.get("nc", 10) # number of classes
|
|
950
950
|
lr_fit = round(0.002 * 5 / (4 + nc), 6) # lr0 fit equation to 6 decimal places
|
|
951
|
-
name, lr, momentum = ("MuSGD", 0.01 if iterations > 10000 else lr_fit, 0.9)
|
|
951
|
+
name, lr, momentum = ("MuSGD", 0.01, 0.9) if iterations > 10000 else ("AdamW", lr_fit, 0.9)
|
|
952
952
|
self.args.warmup_bias_lr = 0.0 # no higher than 0.01 for Adam
|
|
953
953
|
|
|
954
954
|
use_muon = name == "MuSGD"
|
|
@@ -981,16 +981,18 @@ class BaseTrainer:
|
|
|
981
981
|
"Request support for addition optimizers at https://github.com/ultralytics/ultralytics."
|
|
982
982
|
)
|
|
983
983
|
|
|
984
|
+
num_params = [len(g[0]), len(g[1]), len(g[2])] # number of param groups
|
|
984
985
|
g[2] = {"params": g[2], **optim_args, "param_group": "bias"}
|
|
985
986
|
g[0] = {"params": g[0], **optim_args, "weight_decay": decay, "param_group": "weight"}
|
|
986
987
|
g[1] = {"params": g[1], **optim_args, "weight_decay": 0.0, "param_group": "bn"}
|
|
987
|
-
muon, sgd = (0.
|
|
988
|
+
muon, sgd = (0.2, 1.0)
|
|
988
989
|
if use_muon:
|
|
990
|
+
num_params[0] = len(g[3]) # update number of params
|
|
989
991
|
g[3] = {"params": g[3], **optim_args, "weight_decay": decay, "use_muon": True, "param_group": "muon"}
|
|
990
992
|
import re
|
|
991
993
|
|
|
992
994
|
# higher lr for certain parameters in MuSGD when funetuning
|
|
993
|
-
pattern = re.compile(r"(?=.*23)(?=.*cv3)|proto\.semseg
|
|
995
|
+
pattern = re.compile(r"(?=.*23)(?=.*cv3)|proto\.semseg")
|
|
994
996
|
g_ = [] # new param groups
|
|
995
997
|
for x in g:
|
|
996
998
|
p = x.pop("params")
|
|
@@ -1002,6 +1004,6 @@ class BaseTrainer:
|
|
|
1002
1004
|
|
|
1003
1005
|
LOGGER.info(
|
|
1004
1006
|
f"{colorstr('optimizer:')} {type(optimizer).__name__}(lr={lr}, momentum={momentum}) with parameter groups "
|
|
1005
|
-
f"{
|
|
1007
|
+
f"{num_params[1]} weight(decay=0.0), {num_params[0]} weight(decay={decay}), {num_params[2]} bias(decay=0.0)"
|
|
1006
1008
|
)
|
|
1007
1009
|
return optimizer
|
ultralytics/engine/tuner.py
CHANGED
|
@@ -26,7 +26,7 @@ from datetime import datetime
|
|
|
26
26
|
import numpy as np
|
|
27
27
|
import torch
|
|
28
28
|
|
|
29
|
-
from ultralytics.cfg import get_cfg, get_save_dir
|
|
29
|
+
from ultralytics.cfg import CFG_INT_KEYS, get_cfg, get_save_dir
|
|
30
30
|
from ultralytics.utils import DEFAULT_CFG, LOGGER, YAML, callbacks, colorstr, remove_colorstr
|
|
31
31
|
from ultralytics.utils.checks import check_requirements
|
|
32
32
|
from ultralytics.utils.patches import torch_load
|
|
@@ -448,7 +448,7 @@ class Tuner:
|
|
|
448
448
|
f"{self.prefix}Best fitness model is {best_save_dir}"
|
|
449
449
|
)
|
|
450
450
|
LOGGER.info("\n" + header)
|
|
451
|
-
data = {k:
|
|
451
|
+
data = {k: int(v) if k in CFG_INT_KEYS else float(v) for k, v in zip(self.space.keys(), x[best_idx, 1:])}
|
|
452
452
|
YAML.save(
|
|
453
453
|
self.tune_dir / "best_hyperparameters.yaml",
|
|
454
454
|
data=data,
|
ultralytics/engine/validator.py
CHANGED
|
@@ -156,6 +156,11 @@ class BaseValidator:
|
|
|
156
156
|
if str(self.args.model).endswith(".yaml") and model is None:
|
|
157
157
|
LOGGER.warning("validating an untrained model YAML will result in 0 mAP.")
|
|
158
158
|
callbacks.add_integration_callbacks(self)
|
|
159
|
+
if hasattr(model, "end2end"):
|
|
160
|
+
if self.args.end2end is not None:
|
|
161
|
+
model.end2end = self.args.end2end
|
|
162
|
+
if model.end2end:
|
|
163
|
+
model.set_head_attr(max_det=self.args.max_det, agnostic_nms=self.args.agnostic_nms)
|
|
159
164
|
model = AutoBackend(
|
|
160
165
|
model=model or self.args.model,
|
|
161
166
|
device=select_device(self.args.device) if RANK == -1 else torch.device("cuda", RANK),
|
|
@@ -2619,6 +2619,7 @@ class SAM3VideoSemanticPredictor(SAM3SemanticPredictor):
|
|
|
2619
2619
|
if not isinstance(orig_imgs, list): # input images are a torch.Tensor, not a list
|
|
2620
2620
|
orig_imgs = ops.convert_torch2numpy_batch(orig_imgs)
|
|
2621
2621
|
|
|
2622
|
+
names = []
|
|
2622
2623
|
if len(curr_obj_ids) == 0:
|
|
2623
2624
|
pred_masks, pred_boxes = None, torch.zeros((0, 7), device=self.device)
|
|
2624
2625
|
else:
|
|
@@ -2656,9 +2657,8 @@ class SAM3VideoSemanticPredictor(SAM3SemanticPredictor):
|
|
|
2656
2657
|
background_value=0,
|
|
2657
2658
|
).squeeze(1)
|
|
2658
2659
|
) > 0
|
|
2660
|
+
names = self.model.names or dict(enumerate(str(i) for i in range(pred_boxes[:, 6].int().max())))
|
|
2659
2661
|
|
|
2660
|
-
# names = getattr(self.model, "names", [str(i) for i in range(pred_scores.shape[0])])
|
|
2661
|
-
names = dict(enumerate(str(i) for i in range(pred_boxes.shape[0])))
|
|
2662
2662
|
results = []
|
|
2663
2663
|
for masks, boxes, orig_img, img_path in zip([pred_masks], [pred_boxes], orig_imgs, self.batch[0]):
|
|
2664
2664
|
results.append(Results(orig_img, path=img_path, names=names, masks=masks, boxes=boxes))
|
|
@@ -11,7 +11,7 @@ from ultralytics.data import ClassificationDataset, build_dataloader
|
|
|
11
11
|
from ultralytics.engine.trainer import BaseTrainer
|
|
12
12
|
from ultralytics.models import yolo
|
|
13
13
|
from ultralytics.nn.tasks import ClassificationModel
|
|
14
|
-
from ultralytics.utils import DEFAULT_CFG, RANK
|
|
14
|
+
from ultralytics.utils import DEFAULT_CFG, LOGGER, RANK
|
|
15
15
|
from ultralytics.utils.plotting import plot_images
|
|
16
16
|
from ultralytics.utils.torch_utils import is_parallel, torch_distributed_zero_first
|
|
17
17
|
|
|
@@ -138,6 +138,19 @@ class ClassificationTrainer(BaseTrainer):
|
|
|
138
138
|
with torch_distributed_zero_first(rank): # init dataset *.cache only once if DDP
|
|
139
139
|
dataset = self.build_dataset(dataset_path, mode)
|
|
140
140
|
|
|
141
|
+
# Filter out samples with class indices >= nc (prevents CUDA assertion errors)
|
|
142
|
+
nc = self.data.get("nc", 0)
|
|
143
|
+
dataset_nc = len(dataset.base.classes)
|
|
144
|
+
if nc and dataset_nc > nc:
|
|
145
|
+
extra_classes = dataset.base.classes[nc:]
|
|
146
|
+
original_count = len(dataset.samples)
|
|
147
|
+
dataset.samples = [s for s in dataset.samples if s[1] < nc]
|
|
148
|
+
skipped = original_count - len(dataset.samples)
|
|
149
|
+
LOGGER.warning(
|
|
150
|
+
f"{mode} split has {dataset_nc} classes but model expects {nc}. "
|
|
151
|
+
f"Skipping {skipped} samples from extra classes: {extra_classes}"
|
|
152
|
+
)
|
|
153
|
+
|
|
141
154
|
loader = build_dataloader(dataset, batch_size, self.args.workers, rank=rank, drop_last=self.args.compile)
|
|
142
155
|
# Attach inference transforms
|
|
143
156
|
if mode != "train":
|
|
@@ -92,7 +92,7 @@ class DetectionTrainer(BaseTrainer):
|
|
|
92
92
|
with torch_distributed_zero_first(rank): # init dataset *.cache only once if DDP
|
|
93
93
|
dataset = self.build_dataset(dataset_path, mode, batch_size)
|
|
94
94
|
shuffle = mode == "train"
|
|
95
|
-
if getattr(dataset, "rect", False) and shuffle:
|
|
95
|
+
if getattr(dataset, "rect", False) and shuffle and not np.all(dataset.batch_shapes == dataset.batch_shapes[0]):
|
|
96
96
|
LOGGER.warning("'rect=True' is incompatible with DataLoader shuffle, setting shuffle=False")
|
|
97
97
|
shuffle = False
|
|
98
98
|
return build_dataloader(
|
|
@@ -117,11 +117,13 @@ class DetectionTrainer(BaseTrainer):
|
|
|
117
117
|
if isinstance(v, torch.Tensor):
|
|
118
118
|
batch[k] = v.to(self.device, non_blocking=self.device.type == "cuda")
|
|
119
119
|
batch["img"] = batch["img"].float() / 255
|
|
120
|
-
|
|
121
|
-
if random.random() < multi_scale:
|
|
120
|
+
if self.args.multi_scale > 0.0:
|
|
122
121
|
imgs = batch["img"]
|
|
123
122
|
sz = (
|
|
124
|
-
random.randrange(
|
|
123
|
+
random.randrange(
|
|
124
|
+
int(self.args.imgsz * (1.0 - self.args.multi_scale)),
|
|
125
|
+
int(self.args.imgsz * (1.0 + self.args.multi_scale) + self.stride),
|
|
126
|
+
)
|
|
125
127
|
// self.stride
|
|
126
128
|
* self.stride
|
|
127
129
|
) # size
|
|
@@ -143,6 +145,8 @@ class DetectionTrainer(BaseTrainer):
|
|
|
143
145
|
self.model.nc = self.data["nc"] # attach number of classes to model
|
|
144
146
|
self.model.names = self.data["names"] # attach class names to model
|
|
145
147
|
self.model.args = self.args # attach hyperparameters to model
|
|
148
|
+
if getattr(self.model, "end2end"):
|
|
149
|
+
self.model.set_head_attr(max_det=self.args.max_det)
|
|
146
150
|
# TODO: self.model.class_weights = labels_to_class_weights(dataset.labels, nc).to(device) * nc
|
|
147
151
|
|
|
148
152
|
def get_model(self, cfg: str | None = None, weights: str | None = None, verbose: bool = True):
|
|
@@ -9,6 +9,7 @@ from typing import Any
|
|
|
9
9
|
from ultralytics.models import yolo
|
|
10
10
|
from ultralytics.nn.tasks import PoseModel
|
|
11
11
|
from ultralytics.utils import DEFAULT_CFG
|
|
12
|
+
from ultralytics.utils.torch_utils import unwrap_model
|
|
12
13
|
|
|
13
14
|
|
|
14
15
|
class PoseTrainer(yolo.detect.DetectionTrainer):
|
|
@@ -91,7 +92,7 @@ class PoseTrainer(yolo.detect.DetectionTrainer):
|
|
|
91
92
|
def get_validator(self):
|
|
92
93
|
"""Return an instance of the PoseValidator class for validation."""
|
|
93
94
|
self.loss_names = "box_loss", "pose_loss", "kobj_loss", "cls_loss", "dfl_loss"
|
|
94
|
-
if getattr(self.model.model[-1], "flow_model", None) is not None:
|
|
95
|
+
if getattr(unwrap_model(self.model).model[-1], "flow_model", None) is not None:
|
|
95
96
|
self.loss_names += ("rle_loss",)
|
|
96
97
|
return yolo.pose.PoseValidator(
|
|
97
98
|
self.test_loader, save_dir=self.save_dir, args=copy(self.args), _callbacks=self.callbacks
|
|
@@ -60,7 +60,7 @@ class SegmentationPredictor(DetectionPredictor):
|
|
|
60
60
|
>>> results = predictor.postprocess(preds, img, orig_img)
|
|
61
61
|
"""
|
|
62
62
|
# Extract protos - tuple if PyTorch model or array if exported
|
|
63
|
-
protos = preds[0][
|
|
63
|
+
protos = preds[0][1] if isinstance(preds[0], tuple) else preds[1]
|
|
64
64
|
return super().postprocess(preds[0], img, orig_imgs, protos=protos)
|
|
65
65
|
|
|
66
66
|
def construct_results(self, preds, img, orig_imgs, protos):
|
|
@@ -99,9 +99,7 @@ class SegmentationValidator(DetectionValidator):
|
|
|
99
99
|
Returns:
|
|
100
100
|
list[dict[str, torch.Tensor]]: Processed detection predictions with masks.
|
|
101
101
|
"""
|
|
102
|
-
proto = (
|
|
103
|
-
preds[0][-1] if isinstance(preds[0], tuple) else preds[-1]
|
|
104
|
-
) # second output is len 3 if pt, but only 1 if exported
|
|
102
|
+
proto = preds[0][1] if isinstance(preds[0], tuple) else preds[1]
|
|
105
103
|
preds = super().postprocess(preds[0])
|
|
106
104
|
imgsz = [4 * x for x in proto.shape[2:]] # get image size from proto
|
|
107
105
|
for i, pred in enumerate(preds):
|
|
@@ -1,11 +1,14 @@
|
|
|
1
1
|
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
|
|
2
2
|
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
3
5
|
from pathlib import Path
|
|
4
6
|
|
|
5
7
|
from ultralytics.data import YOLOConcatDataset, build_grounding, build_yolo_dataset
|
|
6
8
|
from ultralytics.data.utils import check_det_dataset
|
|
7
9
|
from ultralytics.models.yolo.world import WorldTrainer
|
|
8
10
|
from ultralytics.utils import DATASETS_DIR, DEFAULT_CFG, LOGGER
|
|
11
|
+
from ultralytics.utils.checks import check_file
|
|
9
12
|
from ultralytics.utils.torch_utils import unwrap_model
|
|
10
13
|
|
|
11
14
|
|
|
@@ -100,6 +103,23 @@ class WorldTrainerFromScratch(WorldTrainer):
|
|
|
100
103
|
self.set_text_embeddings(datasets, batch) # cache text embeddings to accelerate training
|
|
101
104
|
return YOLOConcatDataset(datasets) if len(datasets) > 1 else datasets[0]
|
|
102
105
|
|
|
106
|
+
@staticmethod
|
|
107
|
+
def check_data_config(data: dict | str | Path) -> dict:
|
|
108
|
+
"""Check and load the data configuration from a YAML file or dictionary.
|
|
109
|
+
|
|
110
|
+
Args:
|
|
111
|
+
data (dict | str | Path): Data configuration as a dictionary or path to a YAML file.
|
|
112
|
+
|
|
113
|
+
Returns:
|
|
114
|
+
(dict): Data configuration dictionary loaded from YAML file or passed directly.
|
|
115
|
+
"""
|
|
116
|
+
# If string, load from YAML file
|
|
117
|
+
if not isinstance(data, dict):
|
|
118
|
+
from ultralytics.utils import YAML
|
|
119
|
+
|
|
120
|
+
return YAML.load(check_file(data))
|
|
121
|
+
return data
|
|
122
|
+
|
|
103
123
|
def get_dataset(self):
|
|
104
124
|
"""Get train and validation paths from data dictionary.
|
|
105
125
|
|
|
@@ -114,7 +134,7 @@ class WorldTrainerFromScratch(WorldTrainer):
|
|
|
114
134
|
AssertionError: If train or validation datasets are not found, or if validation has multiple datasets.
|
|
115
135
|
"""
|
|
116
136
|
final_data = {}
|
|
117
|
-
data_yaml = self.args.data
|
|
137
|
+
self.args.data = data_yaml = self.check_data_config(self.args.data)
|
|
118
138
|
assert data_yaml.get("train", False), "train dataset not found" # object365.yaml
|
|
119
139
|
assert data_yaml.get("val", False), "validation dataset not found" # lvis.yaml
|
|
120
140
|
data = {k: [check_det_dataset(d) for d in v.get("yolo_data", [])] for k, v in data_yaml.items()}
|
|
@@ -196,7 +196,7 @@ class YOLOETrainerFromScratch(YOLOETrainer, WorldTrainerFromScratch):
|
|
|
196
196
|
Returns:
|
|
197
197
|
(dict): Dictionary mapping text samples to their embeddings.
|
|
198
198
|
"""
|
|
199
|
-
model =
|
|
199
|
+
model = unwrap_model(self.model).text_model
|
|
200
200
|
cache_path = cache_dir / f"text_embeddings_{model.replace(':', '_').replace('/', '_')}.pt"
|
|
201
201
|
if cache_path.exists():
|
|
202
202
|
LOGGER.info(f"Reading existed cache from '{cache_path}'")
|
|
@@ -204,7 +204,6 @@ class YOLOETrainerFromScratch(YOLOETrainer, WorldTrainerFromScratch):
|
|
|
204
204
|
if sorted(txt_map.keys()) == sorted(texts):
|
|
205
205
|
return txt_map
|
|
206
206
|
LOGGER.info(f"Caching text embeddings to '{cache_path}'")
|
|
207
|
-
assert self.model is not None
|
|
208
207
|
txt_feats = unwrap_model(self.model).get_text_pe(texts, batch, without_reprta=True, cache_clip_model=False)
|
|
209
208
|
txt_map = dict(zip(texts, txt_feats.squeeze(0)))
|
|
210
209
|
torch.save(txt_map, cache_path)
|
ultralytics/nn/autobackend.py
CHANGED
|
@@ -648,7 +648,7 @@ class AutoBackend(nn.Module):
|
|
|
648
648
|
for k, v in metadata.items():
|
|
649
649
|
if k in {"stride", "batch", "channels"}:
|
|
650
650
|
metadata[k] = int(v)
|
|
651
|
-
elif k in {"imgsz", "names", "kpt_shape", "kpt_names", "args"} and isinstance(v, str):
|
|
651
|
+
elif k in {"imgsz", "names", "kpt_shape", "kpt_names", "args", "end2end"} and isinstance(v, str):
|
|
652
652
|
metadata[k] = ast.literal_eval(v)
|
|
653
653
|
stride = metadata["stride"]
|
|
654
654
|
task = metadata["task"]
|
|
@@ -887,7 +887,7 @@ class AutoBackend(nn.Module):
|
|
|
887
887
|
x[:, 6::3] *= h
|
|
888
888
|
y.append(x)
|
|
889
889
|
# TF segment fixes: export is reversed vs ONNX export and protos are transposed
|
|
890
|
-
if
|
|
890
|
+
if self.task == "segment": # segment with (det, proto) output order reversed
|
|
891
891
|
if len(y[1].shape) != 4:
|
|
892
892
|
y = list(reversed(y)) # should be y = (1, 116, 8400), (1, 160, 160, 32)
|
|
893
893
|
if y[1].shape[-1] == 6: # end-to-end model
|
ultralytics/nn/modules/head.py
CHANGED
|
@@ -69,6 +69,7 @@ class Detect(nn.Module):
|
|
|
69
69
|
export = False # export mode
|
|
70
70
|
format = None # export format
|
|
71
71
|
max_det = 300 # max_det
|
|
72
|
+
agnostic_nms = False
|
|
72
73
|
shape = None
|
|
73
74
|
anchors = torch.empty(0) # init
|
|
74
75
|
strides = torch.empty(0) # init
|
|
@@ -125,7 +126,12 @@ class Detect(nn.Module):
|
|
|
125
126
|
@property
|
|
126
127
|
def end2end(self):
|
|
127
128
|
"""Checks if the model has one2one for v5/v5/v8/v9/11 backward compatibility."""
|
|
128
|
-
return hasattr(self, "one2one")
|
|
129
|
+
return getattr(self, "_end2end", True) and hasattr(self, "one2one")
|
|
130
|
+
|
|
131
|
+
@end2end.setter
|
|
132
|
+
def end2end(self, value):
|
|
133
|
+
"""Override the end-to-end detection mode."""
|
|
134
|
+
self._end2end = value
|
|
129
135
|
|
|
130
136
|
def forward_head(
|
|
131
137
|
self, x: list[torch.Tensor], box_head: torch.nn.Module = None, cls_head: torch.nn.Module = None
|
|
@@ -230,6 +236,11 @@ class Detect(nn.Module):
|
|
|
230
236
|
# Use max_det directly during export for TensorRT compatibility (requires k to be constant),
|
|
231
237
|
# otherwise use min(max_det, anchors) for safety with small inputs during Python inference
|
|
232
238
|
k = max_det if self.export else min(max_det, anchors)
|
|
239
|
+
if self.agnostic_nms:
|
|
240
|
+
scores, labels = scores.max(dim=-1, keepdim=True)
|
|
241
|
+
scores, indices = scores.topk(k, dim=1)
|
|
242
|
+
labels = labels.gather(1, indices)
|
|
243
|
+
return scores, labels, indices
|
|
233
244
|
ori_index = scores.max(dim=-1)[0].topk(k)[1].unsqueeze(-1)
|
|
234
245
|
scores = scores.gather(dim=1, index=ori_index.repeat(1, 1, nc))
|
|
235
246
|
scores, index = scores.flatten(1).topk(k)
|
|
@@ -1098,7 +1109,7 @@ class YOLOEDetect(Detect):
|
|
|
1098
1109
|
boxes, scores, index = [], [], []
|
|
1099
1110
|
bs = x[0].shape[0]
|
|
1100
1111
|
cv2 = self.cv2 if not self.end2end else self.one2one_cv2
|
|
1101
|
-
cv3 = self.cv3 if not self.end2end else self.
|
|
1112
|
+
cv3 = self.cv3 if not self.end2end else self.one2one_cv3
|
|
1102
1113
|
for i in range(self.nl):
|
|
1103
1114
|
cls_feat = cv3[i](x[i])
|
|
1104
1115
|
loc_feat = cv2[i](x[i])
|
ultralytics/nn/tasks.py
CHANGED
|
@@ -425,6 +425,24 @@ class DetectionModel(BaseModel):
|
|
|
425
425
|
"""Return whether the model uses end-to-end NMS-free detection."""
|
|
426
426
|
return getattr(self.model[-1], "end2end", False)
|
|
427
427
|
|
|
428
|
+
@end2end.setter
|
|
429
|
+
def end2end(self, value):
|
|
430
|
+
"""Override the end-to-end detection mode."""
|
|
431
|
+
self.set_head_attr(end2end=value)
|
|
432
|
+
|
|
433
|
+
def set_head_attr(self, **kwargs):
|
|
434
|
+
"""Set attributes of the model head (last layer).
|
|
435
|
+
|
|
436
|
+
Args:
|
|
437
|
+
**kwargs: Arbitrary keyword arguments representing attributes to set.
|
|
438
|
+
"""
|
|
439
|
+
head = self.model[-1]
|
|
440
|
+
for k, v in kwargs.items():
|
|
441
|
+
if not hasattr(head, k):
|
|
442
|
+
LOGGER.warning(f"Head has no attribute '{k}'.")
|
|
443
|
+
continue
|
|
444
|
+
setattr(head, k, v)
|
|
445
|
+
|
|
428
446
|
def _predict_augment(self, x):
|
|
429
447
|
"""Perform augmentations on input image x and return augmented inference and train outputs.
|
|
430
448
|
|
|
@@ -62,7 +62,7 @@ class SecurityAlarm(BaseSolution):
|
|
|
62
62
|
"""
|
|
63
63
|
import smtplib
|
|
64
64
|
|
|
65
|
-
self.server = smtplib.SMTP("smtp.gmail.com
|
|
65
|
+
self.server = smtplib.SMTP("smtp.gmail.com", 587)
|
|
66
66
|
self.server.starttls()
|
|
67
67
|
self.server.login(from_email, password)
|
|
68
68
|
self.to_email = to_email
|
|
@@ -270,9 +270,9 @@ class BYTETracker:
|
|
|
270
270
|
args (Namespace): Command-line arguments containing tracking parameters.
|
|
271
271
|
frame_rate (int): Frame rate of the video sequence.
|
|
272
272
|
"""
|
|
273
|
-
self.tracked_stracks
|
|
274
|
-
self.lost_stracks
|
|
275
|
-
self.removed_stracks
|
|
273
|
+
self.tracked_stracks: list[STrack] = []
|
|
274
|
+
self.lost_stracks: list[STrack] = []
|
|
275
|
+
self.removed_stracks: list[STrack] = []
|
|
276
276
|
|
|
277
277
|
self.frame_id = 0
|
|
278
278
|
self.args = args
|
|
@@ -304,7 +304,7 @@ class BYTETracker:
|
|
|
304
304
|
detections = self.init_track(results, feats_keep)
|
|
305
305
|
# Add newly detected tracklets to tracked_stracks
|
|
306
306
|
unconfirmed = []
|
|
307
|
-
tracked_stracks
|
|
307
|
+
tracked_stracks: list[STrack] = []
|
|
308
308
|
for track in self.tracked_stracks:
|
|
309
309
|
if not track.is_activated:
|
|
310
310
|
unconfirmed.append(track)
|
|
@@ -423,9 +423,9 @@ class BYTETracker:
|
|
|
423
423
|
|
|
424
424
|
def reset(self):
|
|
425
425
|
"""Reset the tracker by clearing all tracked, lost, and removed tracks and reinitializing the Kalman filter."""
|
|
426
|
-
self.tracked_stracks
|
|
427
|
-
self.lost_stracks
|
|
428
|
-
self.removed_stracks
|
|
426
|
+
self.tracked_stracks: list[STrack] = []
|
|
427
|
+
self.lost_stracks: list[STrack] = []
|
|
428
|
+
self.removed_stracks: list[STrack] = []
|
|
429
429
|
self.frame_id = 0
|
|
430
430
|
self.kalman_filter = self.get_kalmanfilter()
|
|
431
431
|
self.reset_id()
|
ultralytics/utils/benchmarks.py
CHANGED
|
@@ -36,6 +36,7 @@ import platform
|
|
|
36
36
|
import re
|
|
37
37
|
import shutil
|
|
38
38
|
import time
|
|
39
|
+
from copy import deepcopy
|
|
39
40
|
from pathlib import Path
|
|
40
41
|
|
|
41
42
|
import numpy as np
|
|
@@ -101,7 +102,6 @@ def benchmark(
|
|
|
101
102
|
device = select_device(device, verbose=False)
|
|
102
103
|
if isinstance(model, (str, Path)):
|
|
103
104
|
model = YOLO(model)
|
|
104
|
-
is_end2end = getattr(model.model.model[-1], "end2end", False)
|
|
105
105
|
data = data or TASK2DATA[model.task] # task to dataset, i.e. coco8.yaml for task=detect
|
|
106
106
|
key = TASK2METRIC[model.task] # task to metric, i.e. metrics/mAP50-95(B) for task=detect
|
|
107
107
|
|
|
@@ -135,14 +135,12 @@ def benchmark(
|
|
|
135
135
|
if format == "paddle":
|
|
136
136
|
assert not isinstance(model, YOLOWorld), "YOLOWorldv2 Paddle exports not supported yet"
|
|
137
137
|
assert model.task != "obb", "Paddle OBB bug https://github.com/PaddlePaddle/Paddle/issues/72024"
|
|
138
|
-
assert not is_end2end, "End-to-end models not supported by PaddlePaddle yet"
|
|
139
138
|
assert (LINUX and not IS_JETSON) or MACOS, "Windows and Jetson Paddle exports not supported yet"
|
|
140
139
|
if format == "mnn":
|
|
141
140
|
assert not isinstance(model, YOLOWorld), "YOLOWorldv2 MNN exports not supported yet"
|
|
142
141
|
if format == "ncnn":
|
|
143
142
|
assert not isinstance(model, YOLOWorld), "YOLOWorldv2 NCNN exports not supported yet"
|
|
144
143
|
if format == "imx":
|
|
145
|
-
assert not is_end2end
|
|
146
144
|
assert not isinstance(model, YOLOWorld), "YOLOWorldv2 IMX exports not supported"
|
|
147
145
|
assert model.task in {"detect", "classify", "pose"}, (
|
|
148
146
|
"IMX export is only supported for detection, classification and pose estimation tasks"
|
|
@@ -150,12 +148,10 @@ def benchmark(
|
|
|
150
148
|
assert "C2f" in model.__str__(), "IMX only supported for YOLOv8n and YOLO11n"
|
|
151
149
|
if format == "rknn":
|
|
152
150
|
assert not isinstance(model, YOLOWorld), "YOLOWorldv2 RKNN exports not supported yet"
|
|
153
|
-
assert not is_end2end, "End-to-end models not supported by RKNN yet"
|
|
154
151
|
assert LINUX, "RKNN only supported on Linux"
|
|
155
152
|
assert not is_rockchip(), "RKNN Inference only supported on Rockchip devices"
|
|
156
153
|
if format == "executorch":
|
|
157
154
|
assert not isinstance(model, YOLOWorld), "YOLOWorldv2 ExecuTorch exports not supported yet"
|
|
158
|
-
assert not is_end2end, "End-to-end models not supported by ExecuTorch yet"
|
|
159
155
|
if "cpu" in device.type:
|
|
160
156
|
assert cpu, "inference not supported on CPU"
|
|
161
157
|
if "cuda" in device.type:
|
|
@@ -164,9 +160,9 @@ def benchmark(
|
|
|
164
160
|
# Export
|
|
165
161
|
if format == "-":
|
|
166
162
|
filename = model.pt_path or model.ckpt_path or model.model_name
|
|
167
|
-
exported_model = model # PyTorch format
|
|
163
|
+
exported_model = deepcopy(model) # PyTorch format
|
|
168
164
|
else:
|
|
169
|
-
filename = model.export(
|
|
165
|
+
filename = deepcopy(model).export(
|
|
170
166
|
imgsz=imgsz, format=format, half=half, int8=int8, data=data, device=device, verbose=False, **kwargs
|
|
171
167
|
)
|
|
172
168
|
exported_model = YOLO(filename, task=model.task)
|
|
@@ -178,8 +174,6 @@ def benchmark(
|
|
|
178
174
|
assert model.task != "pose" or format != "executorch", "ExecuTorch Pose inference is not supported"
|
|
179
175
|
assert format not in {"edgetpu", "tfjs"}, "inference not supported"
|
|
180
176
|
assert format != "coreml" or platform.system() == "Darwin", "inference only supported on macOS>=10.13"
|
|
181
|
-
if format == "ncnn":
|
|
182
|
-
assert not is_end2end, "End-to-end torch.topk operation is not supported for NCNN prediction yet"
|
|
183
177
|
exported_model.predict(ASSETS / "bus.jpg", imgsz=imgsz, device=device, half=half, verbose=False)
|
|
184
178
|
|
|
185
179
|
# Validate
|
|
@@ -89,7 +89,8 @@ def resolve_platform_uri(uri, hard=True):
|
|
|
89
89
|
raise ValueError(f"Invalid platform URI: {uri}. Use ul://user/datasets/name or ul://user/project/model")
|
|
90
90
|
|
|
91
91
|
try:
|
|
92
|
-
|
|
92
|
+
timeout = 3600 if "/datasets/" in url else 90 # NDJSON generation can be slow for large datasets
|
|
93
|
+
r = requests.head(url, headers=headers, allow_redirects=False, timeout=timeout)
|
|
93
94
|
|
|
94
95
|
# Handle redirect responses (301, 302, 303, 307, 308)
|
|
95
96
|
if 300 <= r.status_code < 400 and "location" in r.headers:
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
|
|
2
2
|
|
|
3
3
|
from ultralytics.utils import LOGGER, SETTINGS, TESTS_RUNNING, colorstr, torch_utils
|
|
4
|
+
from ultralytics.utils.torch_utils import smart_inference_mode
|
|
4
5
|
|
|
5
6
|
try:
|
|
6
7
|
assert not TESTS_RUNNING # do not log pytest
|
|
@@ -38,6 +39,7 @@ def _log_scalars(scalars: dict, step: int = 0) -> None:
|
|
|
38
39
|
WRITER.add_scalar(k, v, step)
|
|
39
40
|
|
|
40
41
|
|
|
42
|
+
@smart_inference_mode()
|
|
41
43
|
def _log_tensorboard_graph(trainer) -> None:
|
|
42
44
|
"""Log model graph to TensorBoard.
|
|
43
45
|
|
|
@@ -128,10 +128,15 @@ def _log_plots(plots, step):
|
|
|
128
128
|
def on_pretrain_routine_start(trainer):
|
|
129
129
|
"""Initialize and start wandb project if module is present."""
|
|
130
130
|
if not wb.run:
|
|
131
|
+
from datetime import datetime
|
|
132
|
+
|
|
133
|
+
name = str(trainer.args.name).replace("/", "-").replace(" ", "_")
|
|
131
134
|
wb.init(
|
|
132
135
|
project=str(trainer.args.project).replace("/", "-") if trainer.args.project else "Ultralytics",
|
|
133
|
-
name=
|
|
136
|
+
name=name,
|
|
134
137
|
config=vars(trainer.args),
|
|
138
|
+
id=f"{name}_{datetime.now().strftime('%Y%m%d_%H%M%S')}", # add unique id
|
|
139
|
+
dir=str(trainer.save_dir),
|
|
135
140
|
)
|
|
136
141
|
|
|
137
142
|
|
ultralytics/utils/dist.py
CHANGED
|
@@ -49,6 +49,7 @@ def generate_ddp_file(trainer):
|
|
|
49
49
|
|
|
50
50
|
content = f"""
|
|
51
51
|
# Ultralytics Multi-GPU training temp file (should be automatically deleted after use)
|
|
52
|
+
from pathlib import Path, PosixPath # For model arguments stored as Path instead of str
|
|
52
53
|
overrides = {vars(trainer.args)}
|
|
53
54
|
|
|
54
55
|
if __name__ == "__main__":
|
ultralytics/utils/export/imx.py
CHANGED
|
@@ -23,25 +23,37 @@ MCT_CONFIG = {
|
|
|
23
23
|
"detect": {
|
|
24
24
|
"layer_names": ["sub", "mul_2", "add_14", "cat_19"],
|
|
25
25
|
"weights_memory": 2585350.2439,
|
|
26
|
-
"n_layers": 238,
|
|
26
|
+
"n_layers": {238, 239},
|
|
27
27
|
},
|
|
28
28
|
"pose": {
|
|
29
29
|
"layer_names": ["sub", "mul_2", "add_14", "cat_21", "cat_22", "mul_4", "add_15"],
|
|
30
30
|
"weights_memory": 2437771.67,
|
|
31
|
-
"n_layers": 257,
|
|
31
|
+
"n_layers": {257, 258},
|
|
32
|
+
},
|
|
33
|
+
"classify": {"layer_names": [], "weights_memory": np.inf, "n_layers": {112}},
|
|
34
|
+
"segment": {
|
|
35
|
+
"layer_names": ["sub", "mul_2", "add_14", "cat_21"],
|
|
36
|
+
"weights_memory": 2466604.8,
|
|
37
|
+
"n_layers": {265, 266},
|
|
32
38
|
},
|
|
33
|
-
"classify": {"layer_names": [], "weights_memory": np.inf, "n_layers": 112},
|
|
34
|
-
"segment": {"layer_names": ["sub", "mul_2", "add_14", "cat_21"], "weights_memory": 2466604.8, "n_layers": 265},
|
|
35
39
|
},
|
|
36
40
|
"YOLOv8": {
|
|
37
|
-
"detect": {
|
|
41
|
+
"detect": {
|
|
42
|
+
"layer_names": ["sub", "mul", "add_6", "cat_15"],
|
|
43
|
+
"weights_memory": 2550540.8,
|
|
44
|
+
"n_layers": {168, 169},
|
|
45
|
+
},
|
|
38
46
|
"pose": {
|
|
39
47
|
"layer_names": ["add_7", "mul_2", "cat_17", "mul", "sub", "add_6", "cat_18"],
|
|
40
48
|
"weights_memory": 2482451.85,
|
|
41
|
-
"n_layers": 187,
|
|
49
|
+
"n_layers": {187, 188},
|
|
50
|
+
},
|
|
51
|
+
"classify": {"layer_names": [], "weights_memory": np.inf, "n_layers": {73}},
|
|
52
|
+
"segment": {
|
|
53
|
+
"layer_names": ["sub", "mul", "add_6", "cat_17"],
|
|
54
|
+
"weights_memory": 2580060.0,
|
|
55
|
+
"n_layers": {195, 196},
|
|
42
56
|
},
|
|
43
|
-
"classify": {"layer_names": [], "weights_memory": np.inf, "n_layers": 73},
|
|
44
|
-
"segment": {"layer_names": ["sub", "mul", "add_6", "cat_17"], "weights_memory": 2580060.0, "n_layers": 195},
|
|
45
57
|
},
|
|
46
58
|
}
|
|
47
59
|
|
|
@@ -251,7 +263,7 @@ def torch2imx(
|
|
|
251
263
|
mct_config = MCT_CONFIG["YOLO11" if "C2PSA" in model.__str__() else "YOLOv8"][model.task]
|
|
252
264
|
|
|
253
265
|
# Check if the model has the expected number of layers
|
|
254
|
-
if len(list(model.modules()))
|
|
266
|
+
if len(list(model.modules())) not in mct_config["n_layers"]:
|
|
255
267
|
raise ValueError("IMX export only supported for YOLOv8n and YOLO11n models.")
|
|
256
268
|
|
|
257
269
|
for layer_name in mct_config["layer_names"]:
|