ultralytics 8.3.118__py3-none-any.whl → 8.3.120__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ultralytics/__init__.py +1 -1
- ultralytics/cfg/__init__.py +1 -0
- ultralytics/cfg/default.yaml +1 -0
- ultralytics/data/augment.py +125 -32
- ultralytics/data/dataset.py +7 -5
- ultralytics/engine/exporter.py +9 -87
- ultralytics/engine/tuner.py +2 -1
- ultralytics/models/rtdetr/val.py +1 -0
- ultralytics/models/yolo/world/train_world.py +17 -5
- ultralytics/models/yolo/yoloe/__init__.py +2 -1
- ultralytics/models/yolo/yoloe/train.py +12 -113
- ultralytics/models/yolo/yoloe/train_seg.py +1 -17
- ultralytics/trackers/bot_sort.py +5 -5
- ultralytics/utils/loss.py +4 -3
- ultralytics/utils/ops.py +1 -1
- ultralytics/utils/tuner.py +2 -1
- {ultralytics-8.3.118.dist-info → ultralytics-8.3.120.dist-info}/METADATA +6 -7
- {ultralytics-8.3.118.dist-info → ultralytics-8.3.120.dist-info}/RECORD +22 -22
- {ultralytics-8.3.118.dist-info → ultralytics-8.3.120.dist-info}/WHEEL +1 -1
- {ultralytics-8.3.118.dist-info → ultralytics-8.3.120.dist-info}/entry_points.txt +0 -0
- {ultralytics-8.3.118.dist-info → ultralytics-8.3.120.dist-info}/licenses/LICENSE +0 -0
- {ultralytics-8.3.118.dist-info → ultralytics-8.3.120.dist-info}/top_level.txt +0 -0
ultralytics/__init__.py
CHANGED
ultralytics/cfg/__init__.py
CHANGED
ultralytics/cfg/default.yaml
CHANGED
@@ -114,6 +114,7 @@ fliplr: 0.5 # (float) image flip left-right (probability)
|
|
114
114
|
bgr: 0.0 # (float) image channel BGR (probability)
|
115
115
|
mosaic: 1.0 # (float) image mosaic (probability)
|
116
116
|
mixup: 0.0 # (float) image mixup (probability)
|
117
|
+
cutmix: 0.0 # (float) image cutmix (probability)
|
117
118
|
copy_paste: 0.0 # (float) segment copy-paste (probability)
|
118
119
|
copy_paste_mode: "flip" # (str) the method to do copy_paste augmentation (flip, mixup)
|
119
120
|
auto_augment: randaugment # (str) auto augmentation policy for classification (randaugment, autoaugment, augmix)
|
ultralytics/data/augment.py
CHANGED
@@ -317,7 +317,7 @@ class Compose:
|
|
317
317
|
|
318
318
|
class BaseMixTransform:
|
319
319
|
"""
|
320
|
-
Base class for mix transformations like MixUp and Mosaic.
|
320
|
+
Base class for mix transformations like Cutmix, MixUp and Mosaic.
|
321
321
|
|
322
322
|
This class provides a foundation for implementing mix transformations on datasets. It handles the
|
323
323
|
probability-based application of transforms and manages the mixing of multiple images and labels.
|
@@ -348,7 +348,7 @@ class BaseMixTransform:
|
|
348
348
|
|
349
349
|
def __init__(self, dataset, pre_transform=None, p=0.0) -> None:
|
350
350
|
"""
|
351
|
-
Initializes the BaseMixTransform object for mix transformations like MixUp and Mosaic.
|
351
|
+
Initializes the BaseMixTransform object for mix transformations like CutMix, MixUp and Mosaic.
|
352
352
|
|
353
353
|
This class serves as a base for implementing mix transformations in image processing pipelines.
|
354
354
|
|
@@ -368,7 +368,7 @@ class BaseMixTransform:
|
|
368
368
|
|
369
369
|
def __call__(self, labels):
|
370
370
|
"""
|
371
|
-
Applies pre-processing transforms and mixup/mosaic transforms to labels data.
|
371
|
+
Applies pre-processing transforms and cutmix/mixup/mosaic transforms to labels data.
|
372
372
|
|
373
373
|
This method determines whether to apply the mix transform based on a probability factor. If applied, it
|
374
374
|
selects additional images, applies pre-transforms if specified, and then performs the mix transform.
|
@@ -391,7 +391,7 @@ class BaseMixTransform:
|
|
391
391
|
if isinstance(indexes, int):
|
392
392
|
indexes = [indexes]
|
393
393
|
|
394
|
-
# Get images information will be used for Mosaic or MixUp
|
394
|
+
# Get images information will be used for Mosaic, CutMix or MixUp
|
395
395
|
mix_labels = [self.dataset.get_image_and_label(i) for i in indexes]
|
396
396
|
|
397
397
|
if self.pre_transform is not None:
|
@@ -401,16 +401,16 @@ class BaseMixTransform:
|
|
401
401
|
|
402
402
|
# Update cls and texts
|
403
403
|
labels = self._update_label_text(labels)
|
404
|
-
# Mosaic or MixUp
|
404
|
+
# Mosaic, CutMix or MixUp
|
405
405
|
labels = self._mix_transform(labels)
|
406
406
|
labels.pop("mix_labels", None)
|
407
407
|
return labels
|
408
408
|
|
409
409
|
def _mix_transform(self, labels):
|
410
410
|
"""
|
411
|
-
Applies MixUp or Mosaic augmentation to the label dictionary.
|
411
|
+
Applies CutMix, MixUp or Mosaic augmentation to the label dictionary.
|
412
412
|
|
413
|
-
This method should be implemented by subclasses to perform specific mix transformations like MixUp or
|
413
|
+
This method should be implemented by subclasses to perform specific mix transformations like CutMix, MixUp or
|
414
414
|
Mosaic. It modifies the input label dictionary in-place with the augmented data.
|
415
415
|
|
416
416
|
Args:
|
@@ -439,7 +439,7 @@ class BaseMixTransform:
|
|
439
439
|
>>> indexes = transform.get_indexes()
|
440
440
|
>>> print(indexes) # [3, 18, 7, 2]
|
441
441
|
"""
|
442
|
-
|
442
|
+
return random.randint(0, len(self.dataset) - 1)
|
443
443
|
|
444
444
|
@staticmethod
|
445
445
|
def _update_label_text(labels):
|
@@ -877,7 +877,6 @@ class MixUp(BaseMixTransform):
|
|
877
877
|
p (float): Probability of applying MixUp augmentation.
|
878
878
|
|
879
879
|
Methods:
|
880
|
-
get_indexes: Returns a random index from the dataset.
|
881
880
|
_mix_transform: Applies MixUp augmentation to the input labels.
|
882
881
|
|
883
882
|
Examples:
|
@@ -906,24 +905,6 @@ class MixUp(BaseMixTransform):
|
|
906
905
|
"""
|
907
906
|
super().__init__(dataset=dataset, pre_transform=pre_transform, p=p)
|
908
907
|
|
909
|
-
def get_indexes(self):
|
910
|
-
"""
|
911
|
-
Get a random index from the dataset.
|
912
|
-
|
913
|
-
This method returns a single random index from the dataset, which is used to select an image for MixUp
|
914
|
-
augmentation.
|
915
|
-
|
916
|
-
Returns:
|
917
|
-
(int): A random integer index within the range of the dataset length.
|
918
|
-
|
919
|
-
Examples:
|
920
|
-
>>> mixup = MixUp(dataset)
|
921
|
-
>>> index = mixup.get_indexes()
|
922
|
-
>>> print(index)
|
923
|
-
42
|
924
|
-
"""
|
925
|
-
return random.randint(0, len(self.dataset) - 1)
|
926
|
-
|
927
908
|
def _mix_transform(self, labels):
|
928
909
|
"""
|
929
910
|
Applies MixUp augmentation to the input labels.
|
@@ -949,6 +930,122 @@ class MixUp(BaseMixTransform):
|
|
949
930
|
return labels
|
950
931
|
|
951
932
|
|
933
|
+
class CutMix(BaseMixTransform):
|
934
|
+
"""
|
935
|
+
Applies CutMix augmentation to image datasets as described in the paper https://arxiv.org/abs/1905.04899.
|
936
|
+
|
937
|
+
CutMix combines two images by replacing a random rectangular region of one image with the corresponding region from another image,
|
938
|
+
and adjusts the labels proportionally to the area of the mixed region.
|
939
|
+
|
940
|
+
Attributes:
|
941
|
+
dataset (Any): The dataset to which CutMix augmentation will be applied.
|
942
|
+
pre_transform (Callable | None): Optional transform to apply before CutMix.
|
943
|
+
p (float): Probability of applying CutMix augmentation.
|
944
|
+
beta (float): Beta distribution parameter for sampling the mixing ratio (default=1.0).
|
945
|
+
num_areas (int): Number of areas to try to cut and mix (default=3).
|
946
|
+
|
947
|
+
Methods:
|
948
|
+
_mix_transform: Applies CutMix augmentation to the input labels.
|
949
|
+
_rand_bbox: Generates random bounding box coordinates for the cut region.
|
950
|
+
|
951
|
+
Examples:
|
952
|
+
>>> from ultralytics.data.augment import CutMix
|
953
|
+
>>> dataset = YourDataset(...) # Your image dataset
|
954
|
+
>>> cutmix = CutMix(dataset, p=0.5)
|
955
|
+
>>> augmented_labels = cutmix(original_labels)
|
956
|
+
"""
|
957
|
+
|
958
|
+
def __init__(self, dataset, pre_transform=None, p=0.0, beta=1.0, num_areas=3) -> None:
|
959
|
+
"""
|
960
|
+
Initializes the CutMix augmentation object.
|
961
|
+
|
962
|
+
Args:
|
963
|
+
dataset (Any): The dataset to which CutMix augmentation will be applied.
|
964
|
+
pre_transform (Callable | None): Optional transform to apply before CutMix.
|
965
|
+
p (float): Probability of applying CutMix augmentation.
|
966
|
+
beta (float): Beta distribution parameter for sampling the mixing ratio (default=1.0).
|
967
|
+
num_areas (int): Number of areas to try to cut and mix (default=3).
|
968
|
+
"""
|
969
|
+
super().__init__(dataset=dataset, pre_transform=pre_transform, p=p)
|
970
|
+
self.beta = beta
|
971
|
+
self.num_areas = num_areas
|
972
|
+
|
973
|
+
def _rand_bbox(self, width, height):
|
974
|
+
"""
|
975
|
+
Generates random bounding box coordinates for the cut region.
|
976
|
+
|
977
|
+
Args:
|
978
|
+
width (int): Width of the image.
|
979
|
+
height (int): Height of the image.
|
980
|
+
|
981
|
+
Returns:
|
982
|
+
(tuple): (x1, y1, x2, y2) coordinates of the bounding box.
|
983
|
+
"""
|
984
|
+
# Sample mixing ratio from Beta distribution
|
985
|
+
lam = np.random.beta(self.beta, self.beta)
|
986
|
+
|
987
|
+
cut_ratio = np.sqrt(1.0 - lam)
|
988
|
+
cut_w = int(width * cut_ratio)
|
989
|
+
cut_h = int(height * cut_ratio)
|
990
|
+
|
991
|
+
# Random center
|
992
|
+
cx = np.random.randint(width)
|
993
|
+
cy = np.random.randint(height)
|
994
|
+
|
995
|
+
# Bounding box coordinates
|
996
|
+
x1 = np.clip(cx - cut_w // 2, 0, width)
|
997
|
+
y1 = np.clip(cy - cut_h // 2, 0, height)
|
998
|
+
x2 = np.clip(cx + cut_w // 2, 0, width)
|
999
|
+
y2 = np.clip(cy + cut_h // 2, 0, height)
|
1000
|
+
|
1001
|
+
return x1, y1, x2, y2
|
1002
|
+
|
1003
|
+
def _mix_transform(self, labels):
|
1004
|
+
"""
|
1005
|
+
Applies CutMix augmentation to the input labels.
|
1006
|
+
|
1007
|
+
Args:
|
1008
|
+
labels (dict): A dictionary containing the original image and label information.
|
1009
|
+
|
1010
|
+
Returns:
|
1011
|
+
(dict): A dictionary containing the mixed image and adjusted labels.
|
1012
|
+
|
1013
|
+
Examples:
|
1014
|
+
>>> cutter = CutMix(dataset)
|
1015
|
+
>>> mixed_labels = cutter._mix_transform(labels)
|
1016
|
+
"""
|
1017
|
+
# Get a random second image
|
1018
|
+
h, w = labels["img"].shape[:2]
|
1019
|
+
|
1020
|
+
cut_areas = np.asarray([self._rand_bbox(w, h) for _ in range(self.num_areas)], dtype=np.float32)
|
1021
|
+
ioa1 = bbox_ioa(cut_areas, labels["instances"].bboxes) # (self.num_areas, num_boxes)
|
1022
|
+
idx = np.nonzero(ioa1.sum(axis=1) <= 0)[0]
|
1023
|
+
if len(idx) == 0:
|
1024
|
+
return labels
|
1025
|
+
|
1026
|
+
labels2 = labels.pop("mix_labels")[0]
|
1027
|
+
area = cut_areas[np.random.choice(idx)] # randomle select one
|
1028
|
+
ioa2 = bbox_ioa(area[None], labels2["instances"].bboxes).squeeze(0)
|
1029
|
+
indexes2 = np.nonzero(ioa2 >= 0.01 if len(labels["instances"].segments) else 0.1)[0]
|
1030
|
+
|
1031
|
+
instances2 = labels2["instances"][indexes2]
|
1032
|
+
instances2.convert_bbox("xyxy")
|
1033
|
+
instances2.denormalize(w, h)
|
1034
|
+
|
1035
|
+
# Apply CutMix
|
1036
|
+
x1, y1, x2, y2 = area.astype(np.int32)
|
1037
|
+
labels["img"][y1:y2, x1:x2] = labels2["img"][y1:y2, x1:x2]
|
1038
|
+
|
1039
|
+
# Restrain instances2 to the random bounding border
|
1040
|
+
instances2.add_padding(-x1, -y1)
|
1041
|
+
instances2.clip(x2 - x1, y2 - y1)
|
1042
|
+
instances2.add_padding(x1, y1)
|
1043
|
+
|
1044
|
+
labels["cls"] = np.concatenate([labels["cls"], labels2["cls"][indexes2]], axis=0)
|
1045
|
+
labels["instances"] = Instances.concatenate([labels["instances"], instances2], axis=0)
|
1046
|
+
return labels
|
1047
|
+
|
1048
|
+
|
952
1049
|
class RandomPerspective:
|
953
1050
|
"""
|
954
1051
|
Implements random perspective and affine transformations on images and corresponding annotations.
|
@@ -1655,7 +1752,6 @@ class CopyPaste(BaseMixTransform):
|
|
1655
1752
|
p (float): Probability of applying Copy-Paste augmentation.
|
1656
1753
|
|
1657
1754
|
Methods:
|
1658
|
-
get_indexes: Returns a random index from the dataset.
|
1659
1755
|
_mix_transform: Applies Copy-Paste augmentation to the input labels.
|
1660
1756
|
__call__: Applies the Copy-Paste transformation to images and annotations.
|
1661
1757
|
|
@@ -1672,10 +1768,6 @@ class CopyPaste(BaseMixTransform):
|
|
1672
1768
|
assert mode in {"flip", "mixup"}, f"Expected `mode` to be `flip` or `mixup`, but got {mode}."
|
1673
1769
|
self.mode = mode
|
1674
1770
|
|
1675
|
-
def get_indexes(self):
|
1676
|
-
"""Returns a list of random indexes from the dataset for CopyPaste augmentation."""
|
1677
|
-
return random.randint(0, len(self.dataset) - 1)
|
1678
|
-
|
1679
1771
|
def _mix_transform(self, labels):
|
1680
1772
|
"""Applies Copy-Paste augmentation to combine objects from another image into the current image."""
|
1681
1773
|
labels2 = labels["mix_labels"][0]
|
@@ -2445,6 +2537,7 @@ def v8_transforms(dataset, imgsz, hyp, stretch=False):
|
|
2445
2537
|
[
|
2446
2538
|
pre_transform,
|
2447
2539
|
MixUp(dataset, pre_transform=pre_transform, p=hyp.mixup),
|
2540
|
+
CutMix(dataset, pre_transform=pre_transform, p=hyp.cutmix),
|
2448
2541
|
Albumentations(p=1.0),
|
2449
2542
|
RandomHSV(hgain=hyp.hsv_h, sgain=hyp.hsv_s, vgain=hyp.hsv_v),
|
2450
2543
|
RandomFlip(direction="vertical", p=hyp.flipud),
|
ultralytics/data/dataset.py
CHANGED
@@ -215,6 +215,7 @@ class YOLODataset(BaseDataset):
|
|
215
215
|
if self.augment:
|
216
216
|
hyp.mosaic = hyp.mosaic if self.augment and not self.rect else 0.0
|
217
217
|
hyp.mixup = hyp.mixup if self.augment and not self.rect else 0.0
|
218
|
+
hyp.cutmix = hyp.cutmix if self.augment and not self.rect else 0.0
|
218
219
|
transforms = v8_transforms(self, self.imgsz, hyp)
|
219
220
|
else:
|
220
221
|
transforms = Compose([LetterBox(new_shape=(self.imgsz, self.imgsz), scaleup=False)])
|
@@ -235,14 +236,15 @@ class YOLODataset(BaseDataset):
|
|
235
236
|
|
236
237
|
def close_mosaic(self, hyp):
|
237
238
|
"""
|
238
|
-
|
239
|
+
Disable mosaic, copy_paste, mixup and cutmix augmentations by setting their probabilities to 0.0.
|
239
240
|
|
240
241
|
Args:
|
241
242
|
hyp (dict): Hyperparameters for transforms.
|
242
243
|
"""
|
243
|
-
hyp.mosaic = 0.0
|
244
|
-
hyp.copy_paste = 0.0
|
245
|
-
hyp.mixup = 0.0
|
244
|
+
hyp.mosaic = 0.0
|
245
|
+
hyp.copy_paste = 0.0
|
246
|
+
hyp.mixup = 0.0
|
247
|
+
hyp.cutmix = 0.0
|
246
248
|
self.transforms = self.build_transforms(hyp)
|
247
249
|
|
248
250
|
def update_labels_info(self, label):
|
@@ -441,7 +443,7 @@ class GroundingDataset(YOLODataset):
|
|
441
443
|
"""
|
442
444
|
assert task in {"detect", "segment"}, "GroundingDataset currently only supports `detect` and `segment` tasks"
|
443
445
|
self.json_file = json_file
|
444
|
-
super().__init__(*args, task=task, data={}, **kwargs)
|
446
|
+
super().__init__(*args, task=task, data={"channels": 3}, **kwargs)
|
445
447
|
|
446
448
|
def get_img_files(self, img_path):
|
447
449
|
"""
|
ultralytics/engine/exporter.py
CHANGED
@@ -95,7 +95,6 @@ from ultralytics.utils import (
|
|
95
95
|
yaml_save,
|
96
96
|
)
|
97
97
|
from ultralytics.utils.checks import (
|
98
|
-
IS_PYTHON_MINIMUM_3_12,
|
99
98
|
check_imgsz,
|
100
99
|
check_is_path_safe,
|
101
100
|
check_requirements,
|
@@ -549,7 +548,7 @@ class Exporter:
|
|
549
548
|
"""YOLO ONNX export."""
|
550
549
|
requirements = ["onnx>=1.12.0"]
|
551
550
|
if self.args.simplify:
|
552
|
-
requirements += ["onnxslim", "onnxruntime" + ("-gpu" if torch.cuda.is_available() else "")]
|
551
|
+
requirements += ["onnxslim>=0.1.46", "onnxruntime" + ("-gpu" if torch.cuda.is_available() else "")]
|
553
552
|
check_requirements(requirements)
|
554
553
|
import onnx # noqa
|
555
554
|
|
@@ -569,12 +568,6 @@ class Exporter:
|
|
569
568
|
dynamic["output0"].pop(2)
|
570
569
|
if self.args.nms and self.model.task == "obb":
|
571
570
|
self.args.opset = opset_version # for NMSModel
|
572
|
-
# OBB error https://github.com/pytorch/pytorch/issues/110859#issuecomment-1757841865
|
573
|
-
try:
|
574
|
-
torch.onnx.register_custom_op_symbolic("aten::lift_fresh", lambda g, x: x, opset_version)
|
575
|
-
except RuntimeError: # it will fail if it's already registered
|
576
|
-
pass
|
577
|
-
check_requirements("onnxslim>=0.1.46") # Older versions has bug with OBB
|
578
571
|
|
579
572
|
with arange_patch(self.args):
|
580
573
|
export_onnx(
|
@@ -650,7 +643,7 @@ class Exporter:
|
|
650
643
|
"""Quantization transform function."""
|
651
644
|
data_item: torch.Tensor = data_item["img"] if isinstance(data_item, dict) else data_item
|
652
645
|
assert data_item.dtype == torch.uint8, "Input image must be uint8 for the quantization preprocessing"
|
653
|
-
im = data_item.numpy().astype(np.float32) / 255.0 # uint8 to fp16/32 and 0
|
646
|
+
im = data_item.numpy().astype(np.float32) / 255.0 # uint8 to fp16/32 and 0-255 to 0.0-1.0
|
654
647
|
return np.expand_dims(im, 0) if im.ndim == 3 else im
|
655
648
|
|
656
649
|
# Generate calibration data for integer quantization
|
@@ -914,14 +907,13 @@ class Exporter:
|
|
914
907
|
import tensorflow as tf # noqa
|
915
908
|
check_requirements(
|
916
909
|
(
|
917
|
-
"keras", # required by 'onnx2tf' package
|
918
910
|
"tf_keras", # required by 'onnx2tf' package
|
919
911
|
"sng4onnx>=1.0.1", # required by 'onnx2tf' package
|
920
912
|
"onnx_graphsurgeon>=0.3.26", # required by 'onnx2tf' package
|
921
913
|
"ai-edge-litert>=1.2.0", # required by 'onnx2tf' package
|
922
914
|
"onnx>=1.12.0",
|
923
915
|
"onnx2tf>=1.26.3",
|
924
|
-
"onnxslim>=0.1.
|
916
|
+
"onnxslim>=0.1.46",
|
925
917
|
"onnxruntime-gpu" if cuda else "onnxruntime",
|
926
918
|
"protobuf>=5",
|
927
919
|
),
|
@@ -1027,8 +1019,6 @@ class Exporter:
|
|
1027
1019
|
@try_export
|
1028
1020
|
def export_edgetpu(self, tflite_model="", prefix=colorstr("Edge TPU:")):
|
1029
1021
|
"""YOLO Edge TPU export https://coral.ai/docs/edgetpu/models-intro/."""
|
1030
|
-
LOGGER.warning(f"{prefix} Edge TPU known bug https://github.com/ultralytics/ultralytics/issues/1185")
|
1031
|
-
|
1032
1022
|
cmd = "edgetpu_compiler --version"
|
1033
1023
|
help_url = "https://coral.ai/docs/edgetpu/compiler/"
|
1034
1024
|
assert LINUX, f"export only supported on Linux. See {help_url}"
|
@@ -1126,7 +1116,8 @@ class Exporter:
|
|
1126
1116
|
"""YOLO IMX export."""
|
1127
1117
|
gptq = False
|
1128
1118
|
assert LINUX, (
|
1129
|
-
"export only supported on Linux.
|
1119
|
+
"export only supported on Linux. "
|
1120
|
+
"See https://developer.aitrios.sony-semicon.com/en/raspberrypi-ai-camera/documentation/imx500-converter"
|
1130
1121
|
)
|
1131
1122
|
if getattr(self.model, "end2end", False):
|
1132
1123
|
raise ValueError("IMX export is not supported for end2end models.")
|
@@ -1274,81 +1265,12 @@ class Exporter:
|
|
1274
1265
|
|
1275
1266
|
return f, None
|
1276
1267
|
|
1277
|
-
def _add_tflite_metadata(self, file
|
1268
|
+
def _add_tflite_metadata(self, file):
|
1278
1269
|
"""Add metadata to *.tflite models per https://ai.google.dev/edge/litert/models/metadata."""
|
1279
|
-
|
1280
|
-
import zipfile
|
1281
|
-
|
1282
|
-
with zipfile.ZipFile(file, "a", zipfile.ZIP_DEFLATED) as zf:
|
1283
|
-
zf.writestr("metadata.json", json.dumps(self.metadata, indent=2))
|
1284
|
-
return
|
1270
|
+
import zipfile
|
1285
1271
|
|
1286
|
-
|
1287
|
-
|
1288
|
-
|
1289
|
-
# Update old 'flatbuffers' included inside tensorflow package
|
1290
|
-
check_requirements(("tflite_support", "flatbuffers>=23.5.26,<100; platform_machine == 'aarch64'"))
|
1291
|
-
import flatbuffers
|
1292
|
-
|
1293
|
-
try:
|
1294
|
-
# TFLite Support bug https://github.com/tensorflow/tflite-support/issues/954#issuecomment-2108570845
|
1295
|
-
from tensorflow_lite_support.metadata import metadata_schema_py_generated as schema # noqa
|
1296
|
-
from tensorflow_lite_support.metadata.python import metadata # noqa
|
1297
|
-
except ImportError: # ARM64 systems may not have the 'tensorflow_lite_support' package available
|
1298
|
-
from tflite_support import metadata # noqa
|
1299
|
-
from tflite_support import metadata_schema_py_generated as schema # noqa
|
1300
|
-
|
1301
|
-
# Create model info
|
1302
|
-
model_meta = schema.ModelMetadataT()
|
1303
|
-
model_meta.name = self.metadata["description"]
|
1304
|
-
model_meta.version = self.metadata["version"]
|
1305
|
-
model_meta.author = self.metadata["author"]
|
1306
|
-
model_meta.license = self.metadata["license"]
|
1307
|
-
|
1308
|
-
# Label file
|
1309
|
-
tmp_file = Path(file).parent / "temp_meta.txt"
|
1310
|
-
with open(tmp_file, "w", encoding="utf-8") as f:
|
1311
|
-
f.write(str(self.metadata))
|
1312
|
-
|
1313
|
-
label_file = schema.AssociatedFileT()
|
1314
|
-
label_file.name = tmp_file.name
|
1315
|
-
label_file.type = schema.AssociatedFileType.TENSOR_AXIS_LABELS
|
1316
|
-
|
1317
|
-
# Create input info
|
1318
|
-
input_meta = schema.TensorMetadataT()
|
1319
|
-
input_meta.name = "image"
|
1320
|
-
input_meta.description = "Input image to be detected."
|
1321
|
-
input_meta.content = schema.ContentT()
|
1322
|
-
input_meta.content.contentProperties = schema.ImagePropertiesT()
|
1323
|
-
input_meta.content.contentProperties.colorSpace = schema.ColorSpaceType.RGB
|
1324
|
-
input_meta.content.contentPropertiesType = schema.ContentProperties.ImageProperties
|
1325
|
-
|
1326
|
-
# Create output info
|
1327
|
-
output1 = schema.TensorMetadataT()
|
1328
|
-
output1.name = "output"
|
1329
|
-
output1.description = "Coordinates of detected objects, class labels, and confidence score"
|
1330
|
-
output1.associatedFiles = [label_file]
|
1331
|
-
if self.model.task == "segment":
|
1332
|
-
output2 = schema.TensorMetadataT()
|
1333
|
-
output2.name = "output"
|
1334
|
-
output2.description = "Mask protos"
|
1335
|
-
output2.associatedFiles = [label_file]
|
1336
|
-
|
1337
|
-
# Create subgraph info
|
1338
|
-
subgraph = schema.SubGraphMetadataT()
|
1339
|
-
subgraph.inputTensorMetadata = [input_meta]
|
1340
|
-
subgraph.outputTensorMetadata = [output1, output2] if self.model.task == "segment" else [output1]
|
1341
|
-
model_meta.subgraphMetadata = [subgraph]
|
1342
|
-
|
1343
|
-
b = flatbuffers.Builder(0)
|
1344
|
-
b.Finish(model_meta.Pack(b), metadata.MetadataPopulator.METADATA_FILE_IDENTIFIER)
|
1345
|
-
metadata_buf = b.Output()
|
1346
|
-
|
1347
|
-
populator = metadata.MetadataPopulator.with_model_file(str(file))
|
1348
|
-
populator.load_metadata_buffer(metadata_buf)
|
1349
|
-
populator.load_associated_files([str(tmp_file)])
|
1350
|
-
populator.populate()
|
1351
|
-
tmp_file.unlink()
|
1272
|
+
with zipfile.ZipFile(file, "a", zipfile.ZIP_DEFLATED) as zf:
|
1273
|
+
zf.writestr("metadata.json", json.dumps(self.metadata, indent=2))
|
1352
1274
|
|
1353
1275
|
def _pipeline_coreml(self, model, weights_dir=None, prefix=colorstr("CoreML Pipeline:")):
|
1354
1276
|
"""YOLO CoreML pipeline."""
|
ultralytics/engine/tuner.py
CHANGED
@@ -88,8 +88,9 @@ class Tuner:
|
|
88
88
|
"flipud": (0.0, 1.0), # image flip up-down (probability)
|
89
89
|
"fliplr": (0.0, 1.0), # image flip left-right (probability)
|
90
90
|
"bgr": (0.0, 1.0), # image channel bgr (probability)
|
91
|
-
"mosaic": (0.0, 1.0), # image
|
91
|
+
"mosaic": (0.0, 1.0), # image mosaic (probability)
|
92
92
|
"mixup": (0.0, 1.0), # image mixup (probability)
|
93
|
+
"cutmix": (0.0, 1.0), # image cutmix (probability)
|
93
94
|
"copy_paste": (0.0, 1.0), # segment copy-paste (probability)
|
94
95
|
}
|
95
96
|
self.args = get_cfg(overrides=args)
|
ultralytics/models/rtdetr/val.py
CHANGED
@@ -63,6 +63,7 @@ class RTDETRDataset(YOLODataset):
|
|
63
63
|
if self.augment:
|
64
64
|
hyp.mosaic = hyp.mosaic if self.augment and not self.rect else 0.0
|
65
65
|
hyp.mixup = hyp.mixup if self.augment and not self.rect else 0.0
|
66
|
+
hyp.cutmix = hyp.cutmix if self.augment and not self.rect else 0.0
|
66
67
|
transforms = v8_transforms(self, self.imgsz, hyp, stretch=True)
|
67
68
|
else:
|
68
69
|
# transforms = Compose([LetterBox(new_shape=(self.imgsz, self.imgsz), auto=False, scale_fill=True)])
|
@@ -3,7 +3,7 @@
|
|
3
3
|
from ultralytics.data import YOLOConcatDataset, build_grounding, build_yolo_dataset
|
4
4
|
from ultralytics.data.utils import check_det_dataset
|
5
5
|
from ultralytics.models.yolo.world import WorldTrainer
|
6
|
-
from ultralytics.utils import DEFAULT_CFG
|
6
|
+
from ultralytics.utils import DEFAULT_CFG, LOGGER
|
7
7
|
from ultralytics.utils.torch_utils import de_parallel
|
8
8
|
|
9
9
|
|
@@ -93,14 +93,14 @@ class WorldTrainerFromScratch(WorldTrainer):
|
|
93
93
|
"""
|
94
94
|
gs = max(int(de_parallel(self.model).stride.max() if self.model else 0), 32)
|
95
95
|
if mode != "train":
|
96
|
-
return build_yolo_dataset(self.args, img_path, batch, self.data, mode=mode, rect=
|
97
|
-
|
98
|
-
build_yolo_dataset(self.args, im_path, batch, self.
|
96
|
+
return build_yolo_dataset(self.args, img_path, batch, self.data, mode=mode, rect=False, stride=gs)
|
97
|
+
datasets = [
|
98
|
+
build_yolo_dataset(self.args, im_path, batch, self.training_data[im_path], stride=gs, multi_modal=True)
|
99
99
|
if isinstance(im_path, str)
|
100
100
|
else build_grounding(self.args, im_path["img_path"], im_path["json_file"], batch, stride=gs)
|
101
101
|
for im_path in img_path
|
102
102
|
]
|
103
|
-
return YOLOConcatDataset(
|
103
|
+
return YOLOConcatDataset(datasets) if len(datasets) > 1 else datasets[0]
|
104
104
|
|
105
105
|
def get_dataset(self):
|
106
106
|
"""
|
@@ -140,8 +140,20 @@ class WorldTrainerFromScratch(WorldTrainer):
|
|
140
140
|
# NOTE: to make training work properly, set `nc` and `names`
|
141
141
|
final_data["nc"] = data["val"][0]["nc"]
|
142
142
|
final_data["names"] = data["val"][0]["names"]
|
143
|
+
# NOTE: add path with lvis path
|
144
|
+
final_data["path"] = data["val"][0]["path"]
|
143
145
|
final_data["channels"] = data["val"][0]["channels"]
|
144
146
|
self.data = final_data
|
147
|
+
if self.args.single_cls: # consistent with base trainer
|
148
|
+
LOGGER.info("Overriding class names with single class.")
|
149
|
+
self.data["names"] = {0: "object"}
|
150
|
+
self.data["nc"] = 1
|
151
|
+
self.training_data = {}
|
152
|
+
for d in data["train"]:
|
153
|
+
if self.args.single_cls:
|
154
|
+
d["names"] = {0: "object"}
|
155
|
+
d["nc"] = 1
|
156
|
+
self.training_data[d["train"]] = d
|
145
157
|
return final_data["train"], final_data["val"][0]
|
146
158
|
|
147
159
|
def plot_training_labels(self):
|
@@ -1,7 +1,7 @@
|
|
1
1
|
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
|
2
2
|
|
3
3
|
from .predict import YOLOEVPDetectPredictor, YOLOEVPSegPredictor
|
4
|
-
from .train import YOLOEPEFreeTrainer, YOLOEPETrainer, YOLOETrainer, YOLOEVPTrainer
|
4
|
+
from .train import YOLOEPEFreeTrainer, YOLOEPETrainer, YOLOETrainer, YOLOETrainerFromScratch, YOLOEVPTrainer
|
5
5
|
from .train_seg import YOLOEPESegTrainer, YOLOESegTrainer, YOLOESegTrainerFromScratch, YOLOESegVPTrainer
|
6
6
|
from .val import YOLOEDetectValidator, YOLOESegValidator
|
7
7
|
|
@@ -18,4 +18,5 @@ __all__ = [
|
|
18
18
|
"YOLOEPEFreeTrainer",
|
19
19
|
"YOLOEVPDetectPredictor",
|
20
20
|
"YOLOEVPSegPredictor",
|
21
|
+
"YOLOETrainerFromScratch",
|
21
22
|
]
|
@@ -6,14 +6,14 @@ from pathlib import Path
|
|
6
6
|
|
7
7
|
import torch
|
8
8
|
|
9
|
-
from ultralytics.data import YOLOConcatDataset,
|
9
|
+
from ultralytics.data import YOLOConcatDataset, build_yolo_dataset
|
10
10
|
from ultralytics.data.augment import LoadVisualPrompt
|
11
|
-
from ultralytics.data.utils import check_det_dataset
|
12
11
|
from ultralytics.models.yolo.detect import DetectionTrainer, DetectionValidator
|
13
12
|
from ultralytics.nn.tasks import YOLOEModel
|
14
13
|
from ultralytics.utils import DEFAULT_CFG, LOGGER, RANK
|
15
14
|
from ultralytics.utils.torch_utils import de_parallel
|
16
15
|
|
16
|
+
from ..world.train_world import WorldTrainerFromScratch
|
17
17
|
from .val import YOLOEDetectValidator
|
18
18
|
|
19
19
|
|
@@ -92,11 +92,6 @@ class YOLOETrainer(DetectionTrainer):
|
|
92
92
|
self.args, img_path, batch, self.data, mode=mode, rect=mode == "val", stride=gs, multi_modal=mode == "train"
|
93
93
|
)
|
94
94
|
|
95
|
-
def preprocess_batch(self, batch):
|
96
|
-
"""Process batch for training, moving text features to the appropriate device."""
|
97
|
-
batch = super().preprocess_batch(batch)
|
98
|
-
return batch
|
99
|
-
|
100
95
|
|
101
96
|
class YOLOEPETrainer(DetectionTrainer):
|
102
97
|
"""Fine-tune YOLOE model in linear probing way."""
|
@@ -144,30 +139,9 @@ class YOLOEPETrainer(DetectionTrainer):
|
|
144
139
|
return model
|
145
140
|
|
146
141
|
|
147
|
-
class YOLOETrainerFromScratch(YOLOETrainer):
|
142
|
+
class YOLOETrainerFromScratch(YOLOETrainer, WorldTrainerFromScratch):
|
148
143
|
"""Train YOLOE models from scratch."""
|
149
144
|
|
150
|
-
def __init__(self, cfg=DEFAULT_CFG, overrides=None, _callbacks=None):
|
151
|
-
"""
|
152
|
-
Initialize the YOLOETrainerFromScratch class.
|
153
|
-
|
154
|
-
This class extends YOLOETrainer to train YOLOE models from scratch. It inherits all functionality from
|
155
|
-
the parent class while providing specialized initialization for training without pre-trained weights.
|
156
|
-
|
157
|
-
Args:
|
158
|
-
cfg (dict, optional): Configuration dictionary with training parameters. Defaults to DEFAULT_CFG.
|
159
|
-
overrides (dict, optional): Dictionary of parameter overrides for configuration.
|
160
|
-
_callbacks (list, optional): List of callback functions to be executed during training.
|
161
|
-
|
162
|
-
Examples:
|
163
|
-
>>> from ultralytics.models.yoloe.train import YOLOETrainerFromScratch
|
164
|
-
>>> trainer = YOLOETrainerFromScratch()
|
165
|
-
>>> trainer.train()
|
166
|
-
"""
|
167
|
-
if overrides is None:
|
168
|
-
overrides = {}
|
169
|
-
super().__init__(cfg, overrides, _callbacks)
|
170
|
-
|
171
145
|
def build_dataset(self, img_path, mode="train", batch=None):
|
172
146
|
"""
|
173
147
|
Build YOLO Dataset for training or validation.
|
@@ -183,17 +157,12 @@ class YOLOETrainerFromScratch(YOLOETrainer):
|
|
183
157
|
Returns:
|
184
158
|
(YOLOConcatDataset | Dataset): The constructed dataset for training or validation.
|
185
159
|
"""
|
186
|
-
|
187
|
-
if mode
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
192
|
-
else build_grounding(self.args, im_path["img_path"], im_path["json_file"], batch, stride=gs)
|
193
|
-
for im_path in img_path
|
194
|
-
]
|
195
|
-
self.set_text_embeddings(datasets, batch) # cache text embeddings to accelerate training
|
196
|
-
return YOLOConcatDataset(datasets) if len(datasets) > 1 else datasets[0]
|
160
|
+
datasets = WorldTrainerFromScratch.build_dataset(self, img_path, mode, batch)
|
161
|
+
if mode == "train":
|
162
|
+
self.set_text_embeddings(
|
163
|
+
datasets.datasets if hasattr(datasets, "datasets") else [datasets], batch
|
164
|
+
) # cache text embeddings to accelerate training
|
165
|
+
return datasets
|
197
166
|
|
198
167
|
def set_text_embeddings(self, datasets, batch):
|
199
168
|
"""
|
@@ -225,7 +194,7 @@ class YOLOETrainerFromScratch(YOLOETrainer):
|
|
225
194
|
|
226
195
|
def preprocess_batch(self, batch):
|
227
196
|
"""Process batch for training, moving text features to the appropriate device."""
|
228
|
-
batch =
|
197
|
+
batch = DetectionTrainer.preprocess_batch(self, batch)
|
229
198
|
|
230
199
|
texts = list(itertools.chain(*batch["texts"]))
|
231
200
|
txt_feats = torch.stack([self.text_embeddings[text] for text in texts]).to(self.device)
|
@@ -246,6 +215,7 @@ class YOLOETrainerFromScratch(YOLOETrainer):
|
|
246
215
|
(dict): Dictionary mapping text samples to their embeddings.
|
247
216
|
"""
|
248
217
|
if cache_path.exists():
|
218
|
+
LOGGER.info(f"Reading existed cache from '{cache_path}'")
|
249
219
|
return torch.load(cache_path)
|
250
220
|
assert self.model is not None
|
251
221
|
txt_feats = self.model.get_text_pe(texts, batch, without_reprta=True)
|
@@ -253,77 +223,6 @@ class YOLOETrainerFromScratch(YOLOETrainer):
|
|
253
223
|
torch.save(txt_map, cache_path)
|
254
224
|
return txt_map
|
255
225
|
|
256
|
-
def get_dataset(self):
|
257
|
-
"""
|
258
|
-
Get train and validation paths from data dictionary.
|
259
|
-
|
260
|
-
Processes the data configuration to extract paths for training and validation datasets,
|
261
|
-
handling both YOLO detection datasets and grounding datasets.
|
262
|
-
|
263
|
-
Returns:
|
264
|
-
(str): Train dataset path.
|
265
|
-
(str): Validation dataset path.
|
266
|
-
|
267
|
-
Raises:
|
268
|
-
AssertionError: If train or validation datasets are not found, or if validation has multiple datasets.
|
269
|
-
"""
|
270
|
-
final_data = {}
|
271
|
-
data_yaml = self.args.data
|
272
|
-
assert data_yaml.get("train", False), "train dataset not found" # object365.yaml
|
273
|
-
assert data_yaml.get("val", False), "validation dataset not found" # lvis.yaml
|
274
|
-
data = {k: [check_det_dataset(d) for d in v.get("yolo_data", [])] for k, v in data_yaml.items()}
|
275
|
-
assert len(data["val"]) == 1, f"Only support validating on 1 dataset for now, but got {len(data['val'])}."
|
276
|
-
val_split = "minival" if "lvis" in data["val"][0]["val"] else "val"
|
277
|
-
for d in data["val"]:
|
278
|
-
if d.get("minival") is None: # for lvis dataset
|
279
|
-
continue
|
280
|
-
d["minival"] = str(d["path"] / d["minival"])
|
281
|
-
for s in ["train", "val"]:
|
282
|
-
final_data[s] = [d["train" if s == "train" else val_split] for d in data[s]]
|
283
|
-
# save grounding data if there's one
|
284
|
-
grounding_data = data_yaml[s].get("grounding_data")
|
285
|
-
if grounding_data is None:
|
286
|
-
continue
|
287
|
-
grounding_data = grounding_data if isinstance(grounding_data, list) else [grounding_data]
|
288
|
-
for g in grounding_data:
|
289
|
-
assert isinstance(g, dict), f"Grounding data should be provided in dict format, but got {type(g)}"
|
290
|
-
final_data[s] += grounding_data
|
291
|
-
# NOTE: to make training work properly, set `nc` and `names`
|
292
|
-
final_data["nc"] = data["val"][0]["nc"]
|
293
|
-
final_data["names"] = data["val"][0]["names"]
|
294
|
-
# NOTE: add path with lvis path
|
295
|
-
final_data["path"] = data["val"][0]["path"]
|
296
|
-
self.data = final_data
|
297
|
-
if self.args.single_cls: # consistent with base trainer
|
298
|
-
LOGGER.info("Overriding class names with single class.")
|
299
|
-
self.data["names"] = {0: "object"}
|
300
|
-
self.data["nc"] = 1
|
301
|
-
self.training_data = {}
|
302
|
-
for d in data["train"]:
|
303
|
-
if self.args.single_cls:
|
304
|
-
d["names"] = {0: "object"}
|
305
|
-
d["nc"] = 1
|
306
|
-
self.training_data[d["train"]] = d
|
307
|
-
return final_data["train"], final_data["val"][0]
|
308
|
-
|
309
|
-
def plot_training_labels(self):
|
310
|
-
"""Do not plot labels for YOLO-World training."""
|
311
|
-
pass
|
312
|
-
|
313
|
-
def final_eval(self):
|
314
|
-
"""
|
315
|
-
Perform final evaluation on the validation dataset.
|
316
|
-
|
317
|
-
Configures the validator with the appropriate dataset and split before running evaluation.
|
318
|
-
|
319
|
-
Returns:
|
320
|
-
(dict): Evaluation metrics.
|
321
|
-
"""
|
322
|
-
val = self.args.data["val"]["yolo_data"][0]
|
323
|
-
self.validator.args.data = val
|
324
|
-
self.validator.args.split = "minival" if isinstance(val, str) and "lvis" in val else "val"
|
325
|
-
return super().final_eval()
|
326
|
-
|
327
226
|
|
328
227
|
class YOLOEPEFreeTrainer(YOLOEPETrainer, YOLOETrainerFromScratch):
|
329
228
|
"""Train prompt-free YOLOE model."""
|
@@ -337,7 +236,7 @@ class YOLOEPEFreeTrainer(YOLOEPETrainer, YOLOETrainerFromScratch):
|
|
337
236
|
|
338
237
|
def preprocess_batch(self, batch):
|
339
238
|
"""Preprocesses a batch of images for YOLOE training, adjusting formatting and dimensions as needed."""
|
340
|
-
batch =
|
239
|
+
batch = DetectionTrainer.preprocess_batch(self, batch)
|
341
240
|
return batch
|
342
241
|
|
343
242
|
def set_text_embeddings(self, datasets, batch):
|
@@ -5,7 +5,7 @@ from copy import copy, deepcopy
|
|
5
5
|
|
6
6
|
from ultralytics.models.yolo.segment import SegmentationTrainer
|
7
7
|
from ultralytics.nn.tasks import YOLOESegModel
|
8
|
-
from ultralytics.utils import
|
8
|
+
from ultralytics.utils import RANK
|
9
9
|
|
10
10
|
from .train import YOLOETrainer, YOLOETrainerFromScratch, YOLOEVPTrainer
|
11
11
|
from .val import YOLOESegValidator
|
@@ -24,22 +24,6 @@ class YOLOESegTrainer(YOLOETrainer, SegmentationTrainer):
|
|
24
24
|
_callbacks (list): List of callback functions for training events.
|
25
25
|
"""
|
26
26
|
|
27
|
-
def __init__(self, cfg=DEFAULT_CFG, overrides=None, _callbacks=None):
|
28
|
-
"""
|
29
|
-
Initialize the YOLOESegTrainer class.
|
30
|
-
|
31
|
-
This class combines YOLOETrainer and SegmentationTrainer to provide training functionality
|
32
|
-
specifically for YOLOE segmentation models.
|
33
|
-
|
34
|
-
Args:
|
35
|
-
cfg (Dict): Configuration dictionary with training parameters.
|
36
|
-
overrides (Dict, optional): Dictionary with parameter overrides.
|
37
|
-
_callbacks (List, optional): List of callback functions for training events.
|
38
|
-
"""
|
39
|
-
if overrides is None:
|
40
|
-
overrides = {}
|
41
|
-
super().__init__(cfg, overrides, _callbacks)
|
42
|
-
|
43
27
|
def get_model(self, cfg=None, weights=None, verbose=True):
|
44
28
|
"""
|
45
29
|
Return YOLOESegModel initialized with specified config and weights.
|
ultralytics/trackers/bot_sort.py
CHANGED
@@ -17,7 +17,7 @@ from .utils.kalman_filter import KalmanFilterXYWH
|
|
17
17
|
|
18
18
|
class BOTrack(STrack):
|
19
19
|
"""
|
20
|
-
An extended version of the STrack class for
|
20
|
+
An extended version of the STrack class for YOLO, adding object tracking features.
|
21
21
|
|
22
22
|
This class extends the STrack class to include additional functionalities for object tracking, such as feature
|
23
23
|
smoothing, Kalman filter prediction, and reactivation of tracks.
|
@@ -150,7 +150,7 @@ class BOTrack(STrack):
|
|
150
150
|
|
151
151
|
class BOTSORT(BYTETracker):
|
152
152
|
"""
|
153
|
-
An extended version of the BYTETracker class for
|
153
|
+
An extended version of the BYTETracker class for YOLO, designed for object tracking with ReID and GMC algorithm.
|
154
154
|
|
155
155
|
Attributes:
|
156
156
|
proximity_thresh (float): Threshold for spatial proximity (IoU) between tracks and detections.
|
@@ -163,7 +163,7 @@ class BOTSORT(BYTETracker):
|
|
163
163
|
get_kalmanfilter: Return an instance of KalmanFilterXYWH for object tracking.
|
164
164
|
init_track: Initialize track with detections, scores, and classes.
|
165
165
|
get_dists: Get distances between tracks and detections using IoU and (optionally) ReID.
|
166
|
-
multi_predict: Predict and track multiple objects with
|
166
|
+
multi_predict: Predict and track multiple objects with a YOLO model.
|
167
167
|
reset: Reset the BOTSORT tracker to its initial state.
|
168
168
|
|
169
169
|
Examples:
|
@@ -173,7 +173,7 @@ class BOTSORT(BYTETracker):
|
|
173
173
|
>>> bot_sort.multi_predict(tracks)
|
174
174
|
|
175
175
|
Note:
|
176
|
-
The class is designed to work with
|
176
|
+
The class is designed to work with a YOLO object detection model and supports ReID only if enabled via args.
|
177
177
|
"""
|
178
178
|
|
179
179
|
def __init__(self, args, frame_rate=30):
|
@@ -197,7 +197,7 @@ class BOTSORT(BYTETracker):
|
|
197
197
|
self.appearance_thresh = args.appearance_thresh
|
198
198
|
self.encoder = (
|
199
199
|
(lambda feats, s: [f.cpu().numpy() for f in feats]) # native features do not require any model
|
200
|
-
if self.args.model == "auto"
|
200
|
+
if args.with_reid and self.args.model == "auto"
|
201
201
|
else ReID(args.model)
|
202
202
|
if args.with_reid
|
203
203
|
else None
|
ultralytics/utils/loss.py
CHANGED
@@ -794,15 +794,16 @@ class TVPSegmentLoss(TVPDetectLoss):
|
|
794
794
|
|
795
795
|
def __init__(self, model):
|
796
796
|
"""Initialize TVPSegmentLoss with task-prompt and visual-prompt criteria using the provided model."""
|
797
|
+
super().__init__(model)
|
797
798
|
self.vp_criterion = v8SegmentationLoss(model)
|
798
799
|
|
799
800
|
def __call__(self, preds, batch):
|
800
801
|
"""Calculate the loss for text-visual prompt segmentation."""
|
801
802
|
feats, pred_masks, proto = preds if len(preds) == 3 else preds[1]
|
802
|
-
assert self.
|
803
|
+
assert self.ori_reg_max == self.vp_criterion.reg_max # TODO: remove it
|
803
804
|
|
804
|
-
if self.
|
805
|
-
loss = torch.zeros(4, device=self.
|
805
|
+
if self.ori_reg_max * 4 + self.ori_nc == feats[0].shape[1]:
|
806
|
+
loss = torch.zeros(4, device=self.vp_criterion.device, requires_grad=True)
|
806
807
|
return loss, loss.detach()
|
807
808
|
|
808
809
|
vp_feats = self._get_vp_features(feats)
|
ultralytics/utils/ops.py
CHANGED
@@ -213,7 +213,7 @@ def non_max_suppression(
|
|
213
213
|
multi_label (bool): If True, each box may have multiple labels.
|
214
214
|
labels (List[List[Union[int, float, torch.Tensor]]]): A list of lists, where each inner
|
215
215
|
list contains the apriori labels for a given image. The list should be in the format
|
216
|
-
output by a dataloader, with each label being a tuple of (class_index,
|
216
|
+
output by a dataloader, with each label being a tuple of (class_index, x, y, w, h).
|
217
217
|
max_det (int): The maximum number of boxes to keep after NMS.
|
218
218
|
nc (int): The number of classes output by the model. Any indices after this will be considered masks.
|
219
219
|
max_time_img (float): The maximum time (seconds) for processing one image.
|
ultralytics/utils/tuner.py
CHANGED
@@ -77,8 +77,9 @@ def run_ray_tune(
|
|
77
77
|
"flipud": tune.uniform(0.0, 1.0), # image flip up-down (probability)
|
78
78
|
"fliplr": tune.uniform(0.0, 1.0), # image flip left-right (probability)
|
79
79
|
"bgr": tune.uniform(0.0, 1.0), # image channel BGR (probability)
|
80
|
-
"mosaic": tune.uniform(0.0, 1.0), # image
|
80
|
+
"mosaic": tune.uniform(0.0, 1.0), # image mosaic (probability)
|
81
81
|
"mixup": tune.uniform(0.0, 1.0), # image mixup (probability)
|
82
|
+
"cutmix": tune.uniform(0.0, 1.0), # image cutmix (probability)
|
82
83
|
"copy_paste": tune.uniform(0.0, 1.0), # segment copy-paste (probability)
|
83
84
|
}
|
84
85
|
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: ultralytics
|
3
|
-
Version: 8.3.
|
3
|
+
Version: 8.3.120
|
4
4
|
Summary: Ultralytics YOLO 🚀 for SOTA object detection, multi-object tracking, instance segmentation, pose estimation and image classification.
|
5
5
|
Author-email: Glenn Jocher <glenn.jocher@ultralytics.com>, Jing Qiu <jing.qiu@ultralytics.com>
|
6
6
|
Maintainer-email: Ultralytics <hello@ultralytics.com>
|
@@ -32,7 +32,7 @@ Classifier: Operating System :: Microsoft :: Windows
|
|
32
32
|
Requires-Python: >=3.8
|
33
33
|
Description-Content-Type: text/markdown
|
34
34
|
License-File: LICENSE
|
35
|
-
Requires-Dist: numpy
|
35
|
+
Requires-Dist: numpy>=1.23.0
|
36
36
|
Requires-Dist: matplotlib>=3.3.0
|
37
37
|
Requires-Dist: opencv-python>=4.6.0
|
38
38
|
Requires-Dist: pillow>=7.1.2
|
@@ -61,20 +61,19 @@ Requires-Dist: mkdocs-macros-plugin>=1.0.5; extra == "dev"
|
|
61
61
|
Provides-Extra: export
|
62
62
|
Requires-Dist: onnx>=1.12.0; extra == "export"
|
63
63
|
Requires-Dist: coremltools>=8.0; (platform_system != "Windows" and python_version <= "3.13") and extra == "export"
|
64
|
-
Requires-Dist: scikit-learn>=1.3.2; (platform_system != "Windows" and python_version <= "3.
|
64
|
+
Requires-Dist: scikit-learn>=1.3.2; (platform_system != "Windows" and python_version <= "3.13") and extra == "export"
|
65
65
|
Requires-Dist: openvino>=2024.0.0; extra == "export"
|
66
66
|
Requires-Dist: tensorflow>=2.0.0; extra == "export"
|
67
67
|
Requires-Dist: tensorflowjs>=2.0.0; extra == "export"
|
68
68
|
Requires-Dist: tensorstore>=0.1.63; (platform_machine == "aarch64" and python_version >= "3.9") and extra == "export"
|
69
|
-
Requires-Dist: keras; extra == "export"
|
70
69
|
Requires-Dist: h5py!=3.11.0; platform_machine == "aarch64" and extra == "export"
|
71
70
|
Provides-Extra: solutions
|
72
71
|
Requires-Dist: shapely<2.1.0,>=2.0.0; extra == "solutions"
|
73
72
|
Requires-Dist: streamlit<1.44.0,>=1.29.0; extra == "solutions"
|
74
73
|
Provides-Extra: logging
|
75
|
-
Requires-Dist:
|
76
|
-
Requires-Dist: tensorboard
|
77
|
-
Requires-Dist:
|
74
|
+
Requires-Dist: wandb; extra == "logging"
|
75
|
+
Requires-Dist: tensorboard; extra == "logging"
|
76
|
+
Requires-Dist: mlflow; extra == "logging"
|
78
77
|
Provides-Extra: extra
|
79
78
|
Requires-Dist: hub-sdk>=0.0.12; extra == "extra"
|
80
79
|
Requires-Dist: ipython; extra == "extra"
|
@@ -7,11 +7,11 @@ tests/test_exports.py,sha256=dhZn86LdbapW15RthQF870LGxDjC1MUZhlGdBgPmgIQ,9716
|
|
7
7
|
tests/test_integrations.py,sha256=dQteeRsRVuT_p5-T88-7jqT65Zm9iAXkyKg-KQ1_TQ8,6341
|
8
8
|
tests/test_python.py,sha256=ok2xp7zwPOwcyl4yNawlx1uJ5HETn9eU-jyTPYzA0fI,25491
|
9
9
|
tests/test_solutions.py,sha256=BIvg9zW0a_ggEmrPKgB_Y0MncveH-eYuN5KlqdJ6nHs,5726
|
10
|
-
ultralytics/__init__.py,sha256=
|
10
|
+
ultralytics/__init__.py,sha256=s0vdAaSAbSlu_5DON0IAjl8rb6whRMR18hW9WkNVsfw,730
|
11
11
|
ultralytics/assets/bus.jpg,sha256=wCAZxJecGR63Od3ZRERe9Aja1Weayrb9Ug751DS_vGM,137419
|
12
12
|
ultralytics/assets/zidane.jpg,sha256=Ftc4aeMmen1O0A3o6GCDO9FlfBslLpTAw0gnetx7bts,50427
|
13
|
-
ultralytics/cfg/__init__.py,sha256=
|
14
|
-
ultralytics/cfg/default.yaml,sha256=
|
13
|
+
ultralytics/cfg/__init__.py,sha256=eZ7exHSsrTLY72atmmHKatJgJYLjfZDPXMWVmpZF9Qw,39683
|
14
|
+
ultralytics/cfg/default.yaml,sha256=zSiCmQp_HRlh0gZe_AZSjNQNe1aNDoX2vcNUo5oJs2Q,8306
|
15
15
|
ultralytics/cfg/datasets/Argoverse.yaml,sha256=_xlEDIJ9XkUo0v_iNL7FW079BoSeZtKSuLteKTtGbA8,3275
|
16
16
|
ultralytics/cfg/datasets/DOTAv1.5.yaml,sha256=SHND_CFkojxw5iQD5Mcgju2kCZIl0gW2ajuzv1cqoL0,1224
|
17
17
|
ultralytics/cfg/datasets/DOTAv1.yaml,sha256=j_DvXVQzZ4dQmf8I7oPX4v9xO3WZXztxV4Xo9VhUTsM,1194
|
@@ -104,11 +104,11 @@ ultralytics/cfg/trackers/botsort.yaml,sha256=8fM3y4TXKKT_5aWsqmQw5JEgwNlBGlRaf8L
|
|
104
104
|
ultralytics/cfg/trackers/bytetrack.yaml,sha256=6u-tiZlk16EqEwkNXaMrza6PAQmWj_ypgv26LGCtPDg,886
|
105
105
|
ultralytics/data/__init__.py,sha256=nAXaL1puCc7z_NjzQNlJnhbVhT9Fla2u7Dsqo7q1dAc,644
|
106
106
|
ultralytics/data/annotator.py,sha256=VEwb11FsEZm75qlEp8XDHFGKW0_rGsEaFDaBVd771Kw,2902
|
107
|
-
ultralytics/data/augment.py,sha256=
|
107
|
+
ultralytics/data/augment.py,sha256=m0haieHkMrpe-nEApapfR4mEuOnCw8I4k-tvTVJpOnU,129172
|
108
108
|
ultralytics/data/base.py,sha256=uMh_xzs6ci1hciDLpbVW2ZQr7js0o8jctE8KhL2T7Z4,19015
|
109
109
|
ultralytics/data/build.py,sha256=FVIkgLGv5n1C7SRDrQiKOMDcI7V59WmEihKslzvEISg,9651
|
110
110
|
ultralytics/data/converter.py,sha256=znXH2XTdo0Q4NDHMny1ydVBvrxKn2kbbwI-X5bn1MlQ,26890
|
111
|
-
ultralytics/data/dataset.py,sha256=
|
111
|
+
ultralytics/data/dataset.py,sha256=hbsjhmZBO-T1_gkUAm128kKowdwsLNwnK2lhnzmxJB8,34826
|
112
112
|
ultralytics/data/loaders.py,sha256=o844tZlfZEhXop16t-hwaEQHhbfP3_bQMS0whF_NSos,28531
|
113
113
|
ultralytics/data/split.py,sha256=6LHB1z8woXurWjXfM-Zm2thRr1KXvzR18CFJA-SDUvE,4677
|
114
114
|
ultralytics/data/split_dota.py,sha256=p8eVGht9tABSVbf9vwvxA_AQYEva3IGHePKlMeNrn64,11872
|
@@ -118,12 +118,12 @@ ultralytics/data/scripts/get_coco.sh,sha256=UuJpJeo3qQpTHVINeOpmP0NYmg8PhEFE3A8J
|
|
118
118
|
ultralytics/data/scripts/get_coco128.sh,sha256=qmRQl_hOKrsdHrTrnyQuFIH01oDz3lfaz138OgGfLt8,650
|
119
119
|
ultralytics/data/scripts/get_imagenet.sh,sha256=hr42H16bM47iT27rgS7MpEo-GeOZAYUQXgr0B2cwn48,1705
|
120
120
|
ultralytics/engine/__init__.py,sha256=lm6MckFYCPTbqIoX7w0s_daxdjNeBeKW6DXppv1-QUM,70
|
121
|
-
ultralytics/engine/exporter.py,sha256=
|
121
|
+
ultralytics/engine/exporter.py,sha256=d-L46TSA2U36k6LowP1t1DJqXWndsvNVxXR54a70V8Y,69771
|
122
122
|
ultralytics/engine/model.py,sha256=wS1cwgv0iyhsslMAZYMGlYDWitDIRW96d7MxwW-Sw5o,52817
|
123
123
|
ultralytics/engine/predictor.py,sha256=YJ5l-0qIpr6JAJxowswtZ0IqmXBqVTvAA9vR40v0sCM,21752
|
124
124
|
ultralytics/engine/results.py,sha256=MZkhI0CCOkBQPR-EzswymVqvqeyk35EkESGUQ_08r8k,79738
|
125
125
|
ultralytics/engine/trainer.py,sha256=fdB8H6brnnQAL-ZFP6nmNmKMze0_qy0OT3jJg1B5uhQ,38864
|
126
|
-
ultralytics/engine/tuner.py,sha256=
|
126
|
+
ultralytics/engine/tuner.py,sha256=IyFKsh4Q4a1DsjfK02DdN9cufAiBDhdhIq7F7ddguys,12646
|
127
127
|
ultralytics/engine/validator.py,sha256=jfV81wuFDgrVVXEcPzgOpxAPrAZn-1LgpKwu9l_1-ts,17050
|
128
128
|
ultralytics/hub/__init__.py,sha256=wDtAUKdfqob95tfFHgDJFXcsNSDSdoIQkJTm-CfIUTI,6616
|
129
129
|
ultralytics/hub/auth.py,sha256=_bGQVLTgP-ina4fQxq2M7qkj9zKKfxb99_VWgN3S_4k,5549
|
@@ -144,7 +144,7 @@ ultralytics/models/rtdetr/__init__.py,sha256=_jEHmOjI_QP_nT3XJXLgYHQ6bXG4EL8Gnvn
|
|
144
144
|
ultralytics/models/rtdetr/model.py,sha256=zx9UKpReYCRL7Is2DXIX9ZcJE25KE_fPZ-NYx5vF6E4,2119
|
145
145
|
ultralytics/models/rtdetr/predict.py,sha256=5VNvyULxegg_NfGo7ugfIKHrtKhpaspJZdagU1haQmo,3942
|
146
146
|
ultralytics/models/rtdetr/train.py,sha256=-c0DZNRscWXRNHddwHHY_OH5nLUb4LLoLyn2yIohGTg,3395
|
147
|
-
ultralytics/models/rtdetr/val.py,sha256=
|
147
|
+
ultralytics/models/rtdetr/val.py,sha256=4KsGuWOsik7JXpU8mUY6ts7_wWuPvcNSxiAGIiGSuxA,7380
|
148
148
|
ultralytics/models/sam/__init__.py,sha256=iR7B06rAEni21eptg8n4rLOP0Z_qV9y9PL-L93n4_7s,266
|
149
149
|
ultralytics/models/sam/amg.py,sha256=r_duG0DCeCyTYfhcVh-ti10FPMl4VGL4SKc8yvbQpNU,11050
|
150
150
|
ultralytics/models/sam/build.py,sha256=Vhml3zBGDcRO-efauNdM0ZlKTV10ADAj_aT823lPJv8,12515
|
@@ -186,11 +186,11 @@ ultralytics/models/yolo/segment/train.py,sha256=EIyIAjYp127Mb-DomyjPORaONu57OY_g
|
|
186
186
|
ultralytics/models/yolo/segment/val.py,sha256=cXJM1JNuzDraU0SJQRIdzNxabd0bfcxiRE8wozHZChY,18415
|
187
187
|
ultralytics/models/yolo/world/__init__.py,sha256=nlh8I6t8hMGz_vZg8QSlsUW1R-2eKvn9CGUoPPQEGhA,131
|
188
188
|
ultralytics/models/yolo/world/train.py,sha256=HUJ0XiJIGx_FA9kqNYnSFsaKWMiZUDxgkpfGoBH6UNc,4896
|
189
|
-
ultralytics/models/yolo/world/train_world.py,sha256
|
190
|
-
ultralytics/models/yolo/yoloe/__init__.py,sha256=
|
189
|
+
ultralytics/models/yolo/world/train_world.py,sha256=-o_-85zoczEvXZHWHJaVhXQ_hAIGTFtPlgSWJmUf5AU,8287
|
190
|
+
ultralytics/models/yolo/yoloe/__init__.py,sha256=6SLytdJtwu37qewf7CobG7C7Wl1m-xtNdvCXEasfPDE,760
|
191
191
|
ultralytics/models/yolo/yoloe/predict.py,sha256=pjvQ8TKlAe_KIFo70qiNdOrSTITU3pcJ4VE_k7uJjDk,6994
|
192
|
-
ultralytics/models/yolo/yoloe/train.py,sha256=
|
193
|
-
ultralytics/models/yolo/yoloe/train_seg.py,sha256=
|
192
|
+
ultralytics/models/yolo/yoloe/train.py,sha256=St3zw_XWRol9pODWU4lvKlJnWYr1lmWQNuhLFwWMge4,12989
|
193
|
+
ultralytics/models/yolo/yoloe/train_seg.py,sha256=l0SOMQQd0Y_EBBHhTNekgrQsftqhYyK4oWTdCg1dLrE,4633
|
194
194
|
ultralytics/models/yolo/yoloe/val.py,sha256=oA8cVT3pBXF6aPZy7ITq0mDcktRuIgks8tTtqMRISyY,8431
|
195
195
|
ultralytics/nn/__init__.py,sha256=rjociYD9lo_K-d-1s6TbdWklPLjTcEHk7OIlRDJstIE,615
|
196
196
|
ultralytics/nn/autobackend.py,sha256=tnYxzboWGBgNvUYrz2zokPH1Bw__GD2ZQro1gO-ZIF8,39298
|
@@ -223,7 +223,7 @@ ultralytics/solutions/trackzone.py,sha256=efko4U8zT8lyNLLo9zF543rTXHefeYthxf9GV3
|
|
223
223
|
ultralytics/solutions/vision_eye.py,sha256=DHf3pQzNqP71oYx3QXflvcGsg4nEYJCD1SOdSOxiWBk,2965
|
224
224
|
ultralytics/trackers/__init__.py,sha256=Zlu_Ig5osn7hqch_g5Be_e4pwZUkeeTQiesJCi0pFGI,255
|
225
225
|
ultralytics/trackers/basetrack.py,sha256=LYvWB5d7Woyrz_RlxaopjV07RQKH3sff_lZJfMcMxcA,4450
|
226
|
-
ultralytics/trackers/bot_sort.py,sha256=
|
226
|
+
ultralytics/trackers/bot_sort.py,sha256=iDYLk3VAfsdCHisy4l9_JKxJw3YEBMgGdK0E6HthoXo,11307
|
227
227
|
ultralytics/trackers/byte_tracker.py,sha256=D7JQ_6V8OUMQryxTrAr010UXMSaboQnI7T1xppzHXYg,20921
|
228
228
|
ultralytics/trackers/track.py,sha256=mu6L9RWAW8Nq0vJanX-hTTUST-OmLq49d8VV96-J9u8,4817
|
229
229
|
ultralytics/trackers/utils/__init__.py,sha256=lm6MckFYCPTbqIoX7w0s_daxdjNeBeKW6DXppv1-QUM,70
|
@@ -240,15 +240,15 @@ ultralytics/utils/errors.py,sha256=vY9h2evFSrHnZdHJVVrmm8Zzw4qVDLyo9DeYW5g0dFk,1
|
|
240
240
|
ultralytics/utils/export.py,sha256=mTkebwilsT1jwIfTLgAQdkbrnZr9Sm96W-Vi7B1j5wQ,8817
|
241
241
|
ultralytics/utils/files.py,sha256=0K4O1cgqRiXaDw7EQK13TqA5SME_RrvfDVQSPetNr5w,8042
|
242
242
|
ultralytics/utils/instance.py,sha256=UOEsXR9V-bXNRk6BTonASBEgeMqvzzAk4S7VdXZJUAM,18090
|
243
|
-
ultralytics/utils/loss.py,sha256=
|
243
|
+
ultralytics/utils/loss.py,sha256=s9LT-zz1zc81Kak0xt1O5HQlv8z0Br-EvudklYs6A6E,37501
|
244
244
|
ultralytics/utils/metrics.py,sha256=uv5O-2Ft8wYfTvDedFxiUqMZ6Nr2CL6I9ybGZiK3e2s,53773
|
245
|
-
ultralytics/utils/ops.py,sha256=
|
245
|
+
ultralytics/utils/ops.py,sha256=YFwPrKlPcgEmgAWqnJVR0Ccx5NQgp5e3P-YYHwVSP0k,34779
|
246
246
|
ultralytics/utils/patches.py,sha256=6rVT-l8WDp_Py3O-gZdv9t3PnrYRRkrX_lF3mZ1XS8c,4928
|
247
247
|
ultralytics/utils/plotting.py,sha256=5QPK1y-gm4T1mK3sjfRZhIUJAyP05D1cJ7h9wHPTifU,46616
|
248
248
|
ultralytics/utils/tal.py,sha256=P5nPoR9qNnFuDIda0fsn8WP6m1V8r7EbvXUuhNRFFTA,20805
|
249
249
|
ultralytics/utils/torch_utils.py,sha256=KUt2qoud3O2bb_cWv1TDjZloNKuLbWk0XJU97wlEdU4,39028
|
250
250
|
ultralytics/utils/triton.py,sha256=xK9Db_ZUVDnIK1u76S2G-6ulIBsLfj9HN_YOaSrnMuU,5304
|
251
|
-
ultralytics/utils/tuner.py,sha256=
|
251
|
+
ultralytics/utils/tuner.py,sha256=0Bp7l5dWZe1RzdvAIa11wQoX6eoAaoNRcA-EAnpofbk,6755
|
252
252
|
ultralytics/utils/callbacks/__init__.py,sha256=hzL63Rce6VkZhP4Lcim9LKjadixaQG86nKqPhk7IkS0,242
|
253
253
|
ultralytics/utils/callbacks/base.py,sha256=p8YCeYDp4GLcyHWFZxC2Wxr2IXLw_MfIE5ef1fOQcWk,6848
|
254
254
|
ultralytics/utils/callbacks/clearml.py,sha256=z-MmCALz1FcNSec8CmDiFHkRd_zTzzuPDCidq_xkUXY,5990
|
@@ -260,9 +260,9 @@ ultralytics/utils/callbacks/neptune.py,sha256=JaI95Cj2kIjUhlEEOiDN0-Drc-fDelLhNI
|
|
260
260
|
ultralytics/utils/callbacks/raytune.py,sha256=A8amUGpux7dYES-L1iSeMoMXBySGWCD1aUqT7vcG-pU,1284
|
261
261
|
ultralytics/utils/callbacks/tensorboard.py,sha256=jgYnym3cUQFAgN1GzTyO7l3jINtfAh8zhrllDvnLuVQ,5339
|
262
262
|
ultralytics/utils/callbacks/wb.py,sha256=iDRFXI4IIDm8R5OI89DMTmjs8aHLo1HRCLkOFKdaMG4,7507
|
263
|
-
ultralytics-8.3.
|
264
|
-
ultralytics-8.3.
|
265
|
-
ultralytics-8.3.
|
266
|
-
ultralytics-8.3.
|
267
|
-
ultralytics-8.3.
|
268
|
-
ultralytics-8.3.
|
263
|
+
ultralytics-8.3.120.dist-info/licenses/LICENSE,sha256=DZak_2itbUtvHzD3E7GNUYSRK6jdOJ-GqncQ2weavLA,34523
|
264
|
+
ultralytics-8.3.120.dist-info/METADATA,sha256=mP-Pkx1G9KDrCfpRBMWAJ9MlL1o-sF7NQHVUdXH-xas,37195
|
265
|
+
ultralytics-8.3.120.dist-info/WHEEL,sha256=ck4Vq1_RXyvS4Jt6SI0Vz6fyVs4GWg7AINwpsaGEgPE,91
|
266
|
+
ultralytics-8.3.120.dist-info/entry_points.txt,sha256=YM_wiKyTe9yRrsEfqvYolNO5ngwfoL4-NwgKzc8_7sI,93
|
267
|
+
ultralytics-8.3.120.dist-info/top_level.txt,sha256=XP49TwiMw4QGsvTLSYiJhz1xF_k7ev5mQ8jJXaXi45Q,12
|
268
|
+
ultralytics-8.3.120.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|