ultralytics 8.3.117__py3-none-any.whl → 8.3.119__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. tests/__init__.py +22 -0
  2. tests/conftest.py +83 -0
  3. tests/test_cli.py +128 -0
  4. tests/test_cuda.py +164 -0
  5. tests/test_engine.py +131 -0
  6. tests/test_exports.py +231 -0
  7. tests/test_integrations.py +154 -0
  8. tests/test_python.py +695 -0
  9. tests/test_solutions.py +176 -0
  10. ultralytics/__init__.py +1 -1
  11. ultralytics/cfg/__init__.py +1 -0
  12. ultralytics/cfg/default.yaml +1 -0
  13. ultralytics/data/augment.py +122 -7
  14. ultralytics/data/base.py +9 -2
  15. ultralytics/data/dataset.py +7 -5
  16. ultralytics/engine/exporter.py +10 -91
  17. ultralytics/engine/tuner.py +2 -1
  18. ultralytics/models/rtdetr/val.py +1 -0
  19. ultralytics/models/yolo/detect/predict.py +1 -1
  20. ultralytics/models/yolo/model.py +2 -3
  21. ultralytics/models/yolo/obb/train.py +1 -1
  22. ultralytics/models/yolo/pose/predict.py +1 -1
  23. ultralytics/models/yolo/pose/train.py +1 -1
  24. ultralytics/models/yolo/pose/val.py +1 -1
  25. ultralytics/models/yolo/segment/train.py +3 -3
  26. ultralytics/nn/autobackend.py +2 -5
  27. ultralytics/nn/text_model.py +97 -13
  28. ultralytics/utils/benchmarks.py +1 -1
  29. ultralytics/utils/downloads.py +1 -0
  30. ultralytics/utils/ops.py +1 -1
  31. ultralytics/utils/tuner.py +2 -1
  32. {ultralytics-8.3.117.dist-info → ultralytics-8.3.119.dist-info}/METADATA +6 -7
  33. {ultralytics-8.3.117.dist-info → ultralytics-8.3.119.dist-info}/RECORD +37 -28
  34. {ultralytics-8.3.117.dist-info → ultralytics-8.3.119.dist-info}/WHEEL +1 -1
  35. {ultralytics-8.3.117.dist-info → ultralytics-8.3.119.dist-info}/entry_points.txt +0 -0
  36. {ultralytics-8.3.117.dist-info → ultralytics-8.3.119.dist-info}/licenses/LICENSE +0 -0
  37. {ultralytics-8.3.117.dist-info → ultralytics-8.3.119.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,176 @@
1
+ # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
2
+
3
+ # Tests Ultralytics Solutions: https://docs.ultralytics.com/solutions/,
4
+ # including every solution excluding DistanceCalculation and Security Alarm System.
5
+
6
+ import cv2
7
+ import pytest
8
+
9
+ from tests import MODEL, TMP
10
+ from ultralytics import solutions
11
+ from ultralytics.utils import ASSETS_URL, IS_RASPBERRYPI, LINUX, checks
12
+ from ultralytics.utils.downloads import safe_download
13
+
14
+ # Pre-defined arguments values
15
+ SHOW = False
16
+ DEMO_VIDEO = "solutions_ci_demo.mp4" # for all the solutions, except workout, object cropping and parking management
17
+ CROP_VIDEO = "decelera_landscape_min.mov" # for object cropping solution
18
+ POSE_VIDEO = "solution_ci_pose_demo.mp4" # only for workouts monitoring solution
19
+ PARKING_VIDEO = "solution_ci_parking_demo.mp4" # only for parking management solution
20
+ PARKING_AREAS_JSON = "solution_ci_parking_areas.json" # only for parking management solution
21
+ PARKING_MODEL = "solutions_ci_parking_model.pt" # only for parking management solution
22
+ REGION = [(10, 200), (540, 200), (540, 180), (10, 180)] # for object counting, speed estimation and queue management
23
+
24
+ # Test configs for each solution : (name, class, needs_frame_count, video, kwargs)
25
+ SOLUTIONS = [
26
+ (
27
+ "ObjectCounter",
28
+ solutions.ObjectCounter,
29
+ False,
30
+ DEMO_VIDEO,
31
+ {"region": REGION, "model": MODEL, "show": SHOW},
32
+ ),
33
+ (
34
+ "Heatmap",
35
+ solutions.Heatmap,
36
+ False,
37
+ DEMO_VIDEO,
38
+ {"colormap": cv2.COLORMAP_PARULA, "model": MODEL, "show": SHOW, "region": None},
39
+ ),
40
+ (
41
+ "HeatmapWithRegion",
42
+ solutions.Heatmap,
43
+ False,
44
+ DEMO_VIDEO,
45
+ {"colormap": cv2.COLORMAP_PARULA, "region": REGION, "model": MODEL, "show": SHOW},
46
+ ),
47
+ (
48
+ "SpeedEstimator",
49
+ solutions.SpeedEstimator,
50
+ False,
51
+ DEMO_VIDEO,
52
+ {"region": REGION, "model": MODEL, "show": SHOW},
53
+ ),
54
+ (
55
+ "QueueManager",
56
+ solutions.QueueManager,
57
+ False,
58
+ DEMO_VIDEO,
59
+ {"region": REGION, "model": MODEL, "show": SHOW},
60
+ ),
61
+ (
62
+ "LineAnalytics",
63
+ solutions.Analytics,
64
+ True,
65
+ DEMO_VIDEO,
66
+ {"analytics_type": "line", "model": MODEL, "show": SHOW},
67
+ ),
68
+ (
69
+ "PieAnalytics",
70
+ solutions.Analytics,
71
+ True,
72
+ DEMO_VIDEO,
73
+ {"analytics_type": "pie", "model": MODEL, "show": SHOW},
74
+ ),
75
+ (
76
+ "BarAnalytics",
77
+ solutions.Analytics,
78
+ True,
79
+ DEMO_VIDEO,
80
+ {"analytics_type": "bar", "model": MODEL, "show": SHOW},
81
+ ),
82
+ (
83
+ "AreaAnalytics",
84
+ solutions.Analytics,
85
+ True,
86
+ DEMO_VIDEO,
87
+ {"analytics_type": "area", "model": MODEL, "show": SHOW},
88
+ ),
89
+ ("TrackZone", solutions.TrackZone, False, DEMO_VIDEO, {"region": REGION, "model": MODEL, "show": SHOW}),
90
+ (
91
+ "ObjectCropper",
92
+ solutions.ObjectCropper,
93
+ False,
94
+ CROP_VIDEO,
95
+ {"crop_dir": str(TMP / "cropped-detections"), "model": MODEL, "show": SHOW},
96
+ ),
97
+ (
98
+ "ObjectBlurrer",
99
+ solutions.ObjectBlurrer,
100
+ False,
101
+ DEMO_VIDEO,
102
+ {"blur_ratio": 0.5, "model": MODEL, "show": SHOW},
103
+ ),
104
+ (
105
+ "InstanceSegmentation",
106
+ solutions.InstanceSegmentation,
107
+ False,
108
+ DEMO_VIDEO,
109
+ {"model": "yolo11n-seg.pt", "show": SHOW},
110
+ ),
111
+ ("VisionEye", solutions.VisionEye, False, DEMO_VIDEO, {"model": MODEL, "show": SHOW}),
112
+ (
113
+ "RegionCounter",
114
+ solutions.RegionCounter,
115
+ False,
116
+ DEMO_VIDEO,
117
+ {"region": REGION, "model": MODEL, "show": SHOW},
118
+ ),
119
+ ("AIGym", solutions.AIGym, False, POSE_VIDEO, {"kpts": [6, 8, 10], "show": SHOW}),
120
+ (
121
+ "ParkingManager",
122
+ solutions.ParkingManagement,
123
+ False,
124
+ PARKING_VIDEO,
125
+ {"model": str(TMP / PARKING_MODEL), "show": SHOW, "json_file": str(TMP / PARKING_AREAS_JSON)},
126
+ ),
127
+ (
128
+ "StreamlitInference",
129
+ solutions.Inference,
130
+ False,
131
+ None, # streamlit application don't require video file
132
+ {}, # streamlit application don't accept arguments
133
+ ),
134
+ ]
135
+
136
+
137
+ def process_video(solution, video_path, needs_frame_count=False):
138
+ """Process video with solution, feeding frames and optional frame count."""
139
+ cap = cv2.VideoCapture(video_path)
140
+ assert cap.isOpened(), f"Error reading video file {video_path}"
141
+
142
+ frame_count = 0
143
+ while cap.isOpened():
144
+ success, im0 = cap.read()
145
+ if not success:
146
+ break
147
+ frame_count += 1
148
+ im_copy = im0.copy()
149
+ args = [im_copy, frame_count] if needs_frame_count else [im_copy]
150
+ _ = solution(*args)
151
+
152
+ cap.release()
153
+
154
+
155
+ @pytest.mark.skipif(
156
+ (LINUX and checks.IS_PYTHON_3_11) or IS_RASPBERRYPI,
157
+ reason="Disabled for testing due to --slow test errors after YOLOE PR.",
158
+ )
159
+ @pytest.mark.parametrize("name, solution_class, needs_frame_count, video, kwargs", SOLUTIONS)
160
+ def test_solution(name, solution_class, needs_frame_count, video, kwargs):
161
+ """Test individual Ultralytics solution."""
162
+ if video:
163
+ safe_download(url=f"{ASSETS_URL}/{video}", dir=TMP)
164
+ if name == "ParkingManager":
165
+ safe_download(url=f"{ASSETS_URL}/{PARKING_AREAS_JSON}", dir=TMP)
166
+ safe_download(url=f"{ASSETS_URL}/{PARKING_MODEL}", dir=TMP)
167
+ elif name == "StreamlitInference":
168
+ if checks.check_imshow(): # do not merge with elif above
169
+ solution_class(**kwargs).inference() # requires interactive GUI environment
170
+ return
171
+
172
+ process_video(
173
+ solution=solution_class(**kwargs),
174
+ video_path=str(TMP / video),
175
+ needs_frame_count=needs_frame_count,
176
+ )
ultralytics/__init__.py CHANGED
@@ -1,6 +1,6 @@
1
1
  # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
2
2
 
3
- __version__ = "8.3.117"
3
+ __version__ = "8.3.119"
4
4
 
5
5
  import os
6
6
 
@@ -181,6 +181,7 @@ CFG_FRACTION_KEYS = frozenset(
181
181
  "bgr",
182
182
  "mosaic",
183
183
  "mixup",
184
+ "cutmix",
184
185
  "copy_paste",
185
186
  "conf",
186
187
  "iou",
@@ -114,6 +114,7 @@ fliplr: 0.5 # (float) image flip left-right (probability)
114
114
  bgr: 0.0 # (float) image channel BGR (probability)
115
115
  mosaic: 1.0 # (float) image mosaic (probability)
116
116
  mixup: 0.0 # (float) image mixup (probability)
117
+ cutmix: 0.0 # (float) image cutmix (probability)
117
118
  copy_paste: 0.0 # (float) segment copy-paste (probability)
118
119
  copy_paste_mode: "flip" # (str) the method to do copy_paste augmentation (flip, mixup)
119
120
  auto_augment: randaugment # (str) auto augmentation policy for classification (randaugment, autoaugment, augmix)
@@ -317,7 +317,7 @@ class Compose:
317
317
 
318
318
  class BaseMixTransform:
319
319
  """
320
- Base class for mix transformations like MixUp and Mosaic.
320
+ Base class for mix transformations like Cutmix, MixUp and Mosaic.
321
321
 
322
322
  This class provides a foundation for implementing mix transformations on datasets. It handles the
323
323
  probability-based application of transforms and manages the mixing of multiple images and labels.
@@ -348,7 +348,7 @@ class BaseMixTransform:
348
348
 
349
349
  def __init__(self, dataset, pre_transform=None, p=0.0) -> None:
350
350
  """
351
- Initializes the BaseMixTransform object for mix transformations like MixUp and Mosaic.
351
+ Initializes the BaseMixTransform object for mix transformations like CutMix, MixUp and Mosaic.
352
352
 
353
353
  This class serves as a base for implementing mix transformations in image processing pipelines.
354
354
 
@@ -368,7 +368,7 @@ class BaseMixTransform:
368
368
 
369
369
  def __call__(self, labels):
370
370
  """
371
- Applies pre-processing transforms and mixup/mosaic transforms to labels data.
371
+ Applies pre-processing transforms and cutmix/mixup/mosaic transforms to labels data.
372
372
 
373
373
  This method determines whether to apply the mix transform based on a probability factor. If applied, it
374
374
  selects additional images, applies pre-transforms if specified, and then performs the mix transform.
@@ -391,7 +391,7 @@ class BaseMixTransform:
391
391
  if isinstance(indexes, int):
392
392
  indexes = [indexes]
393
393
 
394
- # Get images information will be used for Mosaic or MixUp
394
+ # Get images information will be used for Mosaic, CutMix or MixUp
395
395
  mix_labels = [self.dataset.get_image_and_label(i) for i in indexes]
396
396
 
397
397
  if self.pre_transform is not None:
@@ -401,16 +401,16 @@ class BaseMixTransform:
401
401
 
402
402
  # Update cls and texts
403
403
  labels = self._update_label_text(labels)
404
- # Mosaic or MixUp
404
+ # Mosaic, CutMix or MixUp
405
405
  labels = self._mix_transform(labels)
406
406
  labels.pop("mix_labels", None)
407
407
  return labels
408
408
 
409
409
  def _mix_transform(self, labels):
410
410
  """
411
- Applies MixUp or Mosaic augmentation to the label dictionary.
411
+ Applies CutMix, MixUp or Mosaic augmentation to the label dictionary.
412
412
 
413
- This method should be implemented by subclasses to perform specific mix transformations like MixUp or
413
+ This method should be implemented by subclasses to perform specific mix transformations like CutMix, MixUp or
414
414
  Mosaic. It modifies the input label dictionary in-place with the augmented data.
415
415
 
416
416
  Args:
@@ -949,6 +949,117 @@ class MixUp(BaseMixTransform):
949
949
  return labels
950
950
 
951
951
 
952
+ class CutMix(BaseMixTransform):
953
+ """
954
+ Applies CutMix augmentation to image datasets as described in the paper https://arxiv.org/abs/1905.04899.
955
+
956
+ CutMix combines two images by replacing a random rectangular region of one image with the corresponding region from another image,
957
+ and adjusts the labels proportionally to the area of the mixed region.
958
+
959
+ Attributes:
960
+ dataset (Any): The dataset to which CutMix augmentation will be applied.
961
+ pre_transform (Callable | None): Optional transform to apply before CutMix.
962
+ p (float): Probability of applying CutMix augmentation.
963
+ beta (float): Beta distribution parameter for sampling the mixing ratio (default=1.0).
964
+
965
+ Methods:
966
+ get_indexes: Returns a random index from the dataset.
967
+ _mix_transform: Applies CutMix augmentation to the input labels.
968
+ _rand_bbox: Generates random bounding box coordinates for the cut region.
969
+
970
+ Examples:
971
+ >>> from ultralytics.data.augment import CutMix
972
+ >>> dataset = YourDataset(...) # Your image dataset
973
+ >>> cutmix = CutMix(dataset, p=0.5)
974
+ >>> augmented_labels = cutmix(original_labels)
975
+ """
976
+
977
+ def __init__(self, dataset, pre_transform=None, p=0.0, beta=1.0) -> None:
978
+ """
979
+ Initializes the CutMix augmentation object.
980
+
981
+ Args:
982
+ dataset (Any): The dataset to which CutMix augmentation will be applied.
983
+ pre_transform (Callable | None): Optional transform to apply before CutMix.
984
+ p (float): Probability of applying CutMix augmentation.
985
+ beta (float): Beta distribution parameter for sampling the mixing ratio (default=1.0).
986
+ """
987
+ super().__init__(dataset=dataset, pre_transform=pre_transform, p=p)
988
+ self.beta = beta
989
+
990
+ def get_indexes(self):
991
+ """
992
+ Get a random index from the dataset.
993
+
994
+ Returns:
995
+ (int): A random integer index within the range of the dataset length.
996
+ """
997
+ return random.randint(0, len(self.dataset) - 1)
998
+
999
+ def _rand_bbox(self, width, height, lam):
1000
+ """
1001
+ Generates random bounding box coordinates for the cut region.
1002
+
1003
+ Args:
1004
+ width (int): Width of the image.
1005
+ height (int): Height of the image.
1006
+ lam (float): Mixing ratio from the Beta distribution.
1007
+
1008
+ Returns:
1009
+ (tuple): (x1, y1, x2, y2) coordinates of the bounding box.
1010
+ """
1011
+ cut_ratio = np.sqrt(1.0 - lam)
1012
+ cut_w = int(width * cut_ratio)
1013
+ cut_h = int(height * cut_ratio)
1014
+
1015
+ # Random center
1016
+ cx = np.random.randint(width)
1017
+ cy = np.random.randint(height)
1018
+
1019
+ # Bounding box coordinates
1020
+ x1 = np.clip(cx - cut_w // 2, 0, width)
1021
+ y1 = np.clip(cy - cut_h // 2, 0, height)
1022
+ x2 = np.clip(cx + cut_w // 2, 0, width)
1023
+ y2 = np.clip(cy + cut_h // 2, 0, height)
1024
+
1025
+ return x1, y1, x2, y2
1026
+
1027
+ def _mix_transform(self, labels):
1028
+ """
1029
+ Applies CutMix augmentation to the input labels.
1030
+
1031
+ Args:
1032
+ labels (dict): A dictionary containing the original image and label information.
1033
+
1034
+ Returns:
1035
+ (dict): A dictionary containing the mixed image and adjusted labels.
1036
+
1037
+ Examples:
1038
+ >>> cutter = CutMix(dataset)
1039
+ >>> mixed_labels = cutter._mix_transform(labels)
1040
+ """
1041
+ # Sample mixing ratio from Beta distribution
1042
+ lam = np.random.beta(self.beta, self.beta)
1043
+
1044
+ # Get a random second image
1045
+ labels2 = labels["mix_labels"][0]
1046
+ img2 = labels2["img"]
1047
+ h, w = labels["img"].shape[:2]
1048
+
1049
+ # Generate random bounding box
1050
+ x1, y1, x2, y2 = self._rand_bbox(w, h, lam)
1051
+
1052
+ # Apply CutMix
1053
+ labels["img"][y1:y2, x1:x2] = img2[y1:y2, x1:x2]
1054
+
1055
+ # Adjust lambda to match the actual area ratio
1056
+ lam = 1 - ((x2 - x1) * (y2 - y1) / (w * h))
1057
+
1058
+ labels["cls"] = np.concatenate([labels["cls"], labels2["cls"]], axis=0)
1059
+ labels["instances"] = Instances.concatenate([labels["instances"], labels2["instances"]], axis=0)
1060
+ return labels
1061
+
1062
+
952
1063
  class RandomPerspective:
953
1064
  """
954
1065
  Implements random perspective and affine transformations on images and corresponding annotations.
@@ -1586,6 +1697,9 @@ class LetterBox:
1586
1697
 
1587
1698
  if shape[::-1] != new_unpad: # resize
1588
1699
  img = cv2.resize(img, new_unpad, interpolation=cv2.INTER_LINEAR)
1700
+ if img.ndim == 2:
1701
+ img = img[..., None]
1702
+
1589
1703
  top, bottom = int(round(dh - 0.1)) if self.center else 0, int(round(dh + 0.1))
1590
1704
  left, right = int(round(dw - 0.1)) if self.center else 0, int(round(dw + 0.1))
1591
1705
  h, w, c = img.shape
@@ -2442,6 +2556,7 @@ def v8_transforms(dataset, imgsz, hyp, stretch=False):
2442
2556
  [
2443
2557
  pre_transform,
2444
2558
  MixUp(dataset, pre_transform=pre_transform, p=hyp.mixup),
2559
+ CutMix(dataset, pre_transform=pre_transform, p=hyp.cutmix),
2445
2560
  Albumentations(p=1.0),
2446
2561
  RandomHSV(hgain=hyp.hsv_h, sgain=hyp.hsv_s, vgain=hyp.hsv_v),
2447
2562
  RandomFlip(direction="vertical", p=hyp.flipud),
ultralytics/data/base.py CHANGED
@@ -33,6 +33,7 @@ class BaseDataset(Dataset):
33
33
  single_cls (bool): Whether to treat all objects as a single class.
34
34
  prefix (str): Prefix to print in log messages.
35
35
  fraction (float): Fraction of dataset to utilize.
36
+ cv2_flag (int): OpenCV flag for reading images.
36
37
  im_files (List[str]): List of image file paths.
37
38
  labels (List[Dict]): List of label data dictionaries.
38
39
  ni (int): Number of images in the dataset.
@@ -79,6 +80,7 @@ class BaseDataset(Dataset):
79
80
  single_cls=False,
80
81
  classes=None,
81
82
  fraction=1.0,
83
+ channels=3,
82
84
  ):
83
85
  """
84
86
  Initialize BaseDataset with given configuration and options.
@@ -97,6 +99,7 @@ class BaseDataset(Dataset):
97
99
  single_cls (bool, optional): If True, single class training is used.
98
100
  classes (list, optional): List of included classes.
99
101
  fraction (float, optional): Fraction of dataset to utilize.
102
+ channels (int, optional): Number of channels in the images (1 for grayscale, 3 for RGB).
100
103
  """
101
104
  super().__init__()
102
105
  self.img_path = img_path
@@ -105,6 +108,8 @@ class BaseDataset(Dataset):
105
108
  self.single_cls = single_cls
106
109
  self.prefix = prefix
107
110
  self.fraction = fraction
111
+ self.channels = channels
112
+ self.cv2_flag = cv2.IMREAD_GRAYSCALE if channels == 1 else cv2.IMREAD_COLOR
108
113
  self.im_files = self.get_img_files(self.img_path)
109
114
  self.labels = self.get_labels()
110
115
  self.update_labels(include_class=classes) # single_cls and include_class
@@ -224,9 +229,9 @@ class BaseDataset(Dataset):
224
229
  except Exception as e:
225
230
  LOGGER.warning(f"{self.prefix}Removing corrupt *.npy image file {fn} due to: {e}")
226
231
  Path(fn).unlink(missing_ok=True)
227
- im = imread(f) # BGR
232
+ im = imread(f, flags=self.cv2_flag) # BGR
228
233
  else: # read image
229
- im = imread(f) # BGR
234
+ im = imread(f, flags=self.cv2_flag) # BGR
230
235
  if im is None:
231
236
  raise FileNotFoundError(f"Image Not Found {f}")
232
237
 
@@ -238,6 +243,8 @@ class BaseDataset(Dataset):
238
243
  im = cv2.resize(im, (w, h), interpolation=cv2.INTER_LINEAR)
239
244
  elif not (h0 == w0 == self.imgsz): # resize by stretching image to square imgsz
240
245
  im = cv2.resize(im, (self.imgsz, self.imgsz), interpolation=cv2.INTER_LINEAR)
246
+ if im.ndim == 2:
247
+ im = im[..., None]
241
248
 
242
249
  # Add to buffer if training with augmentations
243
250
  if self.augment:
@@ -84,7 +84,7 @@ class YOLODataset(BaseDataset):
84
84
  self.use_obb = task == "obb"
85
85
  self.data = data
86
86
  assert not (self.use_segments and self.use_keypoints), "Can not use both segments and keypoints."
87
- super().__init__(*args, **kwargs)
87
+ super().__init__(*args, channels=self.data["channels"], **kwargs)
88
88
 
89
89
  def cache_labels(self, path=Path("./labels.cache")):
90
90
  """
@@ -215,6 +215,7 @@ class YOLODataset(BaseDataset):
215
215
  if self.augment:
216
216
  hyp.mosaic = hyp.mosaic if self.augment and not self.rect else 0.0
217
217
  hyp.mixup = hyp.mixup if self.augment and not self.rect else 0.0
218
+ hyp.cutmix = hyp.cutmix if self.augment and not self.rect else 0.0
218
219
  transforms = v8_transforms(self, self.imgsz, hyp)
219
220
  else:
220
221
  transforms = Compose([LetterBox(new_shape=(self.imgsz, self.imgsz), scaleup=False)])
@@ -235,14 +236,15 @@ class YOLODataset(BaseDataset):
235
236
 
236
237
  def close_mosaic(self, hyp):
237
238
  """
238
- Sets mosaic, copy_paste and mixup options to 0.0 and builds transformations.
239
+ Disable mosaic, copy_paste, mixup and cutmix augmentations by setting their probabilities to 0.0.
239
240
 
240
241
  Args:
241
242
  hyp (dict): Hyperparameters for transforms.
242
243
  """
243
- hyp.mosaic = 0.0 # set mosaic ratio=0.0
244
- hyp.copy_paste = 0.0 # keep the same behavior as previous v8 close-mosaic
245
- hyp.mixup = 0.0 # keep the same behavior as previous v8 close-mosaic
244
+ hyp.mosaic = 0.0
245
+ hyp.copy_paste = 0.0
246
+ hyp.mixup = 0.0
247
+ hyp.cutmix = 0.0
246
248
  self.transforms = self.build_transforms(hyp)
247
249
 
248
250
  def update_labels_info(self, label):
@@ -95,7 +95,6 @@ from ultralytics.utils import (
95
95
  yaml_save,
96
96
  )
97
97
  from ultralytics.utils.checks import (
98
- IS_PYTHON_MINIMUM_3_12,
99
98
  check_imgsz,
100
99
  check_is_path_safe,
101
100
  check_requirements,
@@ -238,9 +237,6 @@ class Exporter:
238
237
  _callbacks (dict, optional): Dictionary of callback functions.
239
238
  """
240
239
  self.args = get_cfg(cfg, overrides)
241
- if self.args.format.lower() in {"coreml", "mlmodel"}: # fix attempt for protobuf<3.20.x errors
242
- os.environ["PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION"] = "python" # must run before TensorBoard callback
243
-
244
240
  self.callbacks = _callbacks or callbacks.get_default_callbacks()
245
241
  callbacks.add_integration_callbacks(self)
246
242
 
@@ -552,7 +548,7 @@ class Exporter:
552
548
  """YOLO ONNX export."""
553
549
  requirements = ["onnx>=1.12.0"]
554
550
  if self.args.simplify:
555
- requirements += ["onnxslim", "onnxruntime" + ("-gpu" if torch.cuda.is_available() else "")]
551
+ requirements += ["onnxslim>=0.1.46", "onnxruntime" + ("-gpu" if torch.cuda.is_available() else "")]
556
552
  check_requirements(requirements)
557
553
  import onnx # noqa
558
554
 
@@ -572,12 +568,6 @@ class Exporter:
572
568
  dynamic["output0"].pop(2)
573
569
  if self.args.nms and self.model.task == "obb":
574
570
  self.args.opset = opset_version # for NMSModel
575
- # OBB error https://github.com/pytorch/pytorch/issues/110859#issuecomment-1757841865
576
- try:
577
- torch.onnx.register_custom_op_symbolic("aten::lift_fresh", lambda g, x: x, opset_version)
578
- except RuntimeError: # it will fail if it's already registered
579
- pass
580
- check_requirements("onnxslim>=0.1.46") # Older versions has bug with OBB
581
571
 
582
572
  with arange_patch(self.args):
583
573
  export_onnx(
@@ -653,7 +643,7 @@ class Exporter:
653
643
  """Quantization transform function."""
654
644
  data_item: torch.Tensor = data_item["img"] if isinstance(data_item, dict) else data_item
655
645
  assert data_item.dtype == torch.uint8, "Input image must be uint8 for the quantization preprocessing"
656
- im = data_item.numpy().astype(np.float32) / 255.0 # uint8 to fp16/32 and 0 - 255 to 0.0 - 1.0
646
+ im = data_item.numpy().astype(np.float32) / 255.0 # uint8 to fp16/32 and 0-255 to 0.0-1.0
657
647
  return np.expand_dims(im, 0) if im.ndim == 3 else im
658
648
 
659
649
  # Generate calibration data for integer quantization
@@ -703,7 +693,7 @@ class Exporter:
703
693
 
704
694
  @try_export
705
695
  def export_mnn(self, prefix=colorstr("MNN:")):
706
- """YOLOv8 MNN export using MNN https://github.com/alibaba/MNN."""
696
+ """YOLO MNN export using MNN https://github.com/alibaba/MNN."""
707
697
  f_onnx, _ = self.export_onnx() # get onnx model first
708
698
 
709
699
  check_requirements("MNN>=2.9.6")
@@ -917,14 +907,13 @@ class Exporter:
917
907
  import tensorflow as tf # noqa
918
908
  check_requirements(
919
909
  (
920
- "keras", # required by 'onnx2tf' package
921
910
  "tf_keras", # required by 'onnx2tf' package
922
911
  "sng4onnx>=1.0.1", # required by 'onnx2tf' package
923
912
  "onnx_graphsurgeon>=0.3.26", # required by 'onnx2tf' package
924
913
  "ai-edge-litert>=1.2.0", # required by 'onnx2tf' package
925
914
  "onnx>=1.12.0",
926
915
  "onnx2tf>=1.26.3",
927
- "onnxslim>=0.1.31",
916
+ "onnxslim>=0.1.46",
928
917
  "onnxruntime-gpu" if cuda else "onnxruntime",
929
918
  "protobuf>=5",
930
919
  ),
@@ -1030,8 +1019,6 @@ class Exporter:
1030
1019
  @try_export
1031
1020
  def export_edgetpu(self, tflite_model="", prefix=colorstr("Edge TPU:")):
1032
1021
  """YOLO Edge TPU export https://coral.ai/docs/edgetpu/models-intro/."""
1033
- LOGGER.warning(f"{prefix} Edge TPU known bug https://github.com/ultralytics/ultralytics/issues/1185")
1034
-
1035
1022
  cmd = "edgetpu_compiler --version"
1036
1023
  help_url = "https://coral.ai/docs/edgetpu/compiler/"
1037
1024
  assert LINUX, f"export only supported on Linux. See {help_url}"
@@ -1129,7 +1116,8 @@ class Exporter:
1129
1116
  """YOLO IMX export."""
1130
1117
  gptq = False
1131
1118
  assert LINUX, (
1132
- "export only supported on Linux. See https://developer.aitrios.sony-semicon.com/en/raspberrypi-ai-camera/documentation/imx500-converter"
1119
+ "export only supported on Linux. "
1120
+ "See https://developer.aitrios.sony-semicon.com/en/raspberrypi-ai-camera/documentation/imx500-converter"
1133
1121
  )
1134
1122
  if getattr(self.model, "end2end", False):
1135
1123
  raise ValueError("IMX export is not supported for end2end models.")
@@ -1277,81 +1265,12 @@ class Exporter:
1277
1265
 
1278
1266
  return f, None
1279
1267
 
1280
- def _add_tflite_metadata(self, file, use_flatbuffers=False):
1268
+ def _add_tflite_metadata(self, file):
1281
1269
  """Add metadata to *.tflite models per https://ai.google.dev/edge/litert/models/metadata."""
1282
- if not use_flatbuffers:
1283
- import zipfile
1284
-
1285
- with zipfile.ZipFile(file, "a", zipfile.ZIP_DEFLATED) as zf:
1286
- zf.writestr("metadata.json", json.dumps(self.metadata, indent=2))
1287
- return
1270
+ import zipfile
1288
1271
 
1289
- if IS_PYTHON_MINIMUM_3_12:
1290
- LOGGER.warning(f"TFLite Support package may not be compatible with Python>=3.12 environments for {file}")
1291
-
1292
- # Update old 'flatbuffers' included inside tensorflow package
1293
- check_requirements(("tflite_support", "flatbuffers>=23.5.26,<100; platform_machine == 'aarch64'"))
1294
- import flatbuffers
1295
-
1296
- try:
1297
- # TFLite Support bug https://github.com/tensorflow/tflite-support/issues/954#issuecomment-2108570845
1298
- from tensorflow_lite_support.metadata import metadata_schema_py_generated as schema # noqa
1299
- from tensorflow_lite_support.metadata.python import metadata # noqa
1300
- except ImportError: # ARM64 systems may not have the 'tensorflow_lite_support' package available
1301
- from tflite_support import metadata # noqa
1302
- from tflite_support import metadata_schema_py_generated as schema # noqa
1303
-
1304
- # Create model info
1305
- model_meta = schema.ModelMetadataT()
1306
- model_meta.name = self.metadata["description"]
1307
- model_meta.version = self.metadata["version"]
1308
- model_meta.author = self.metadata["author"]
1309
- model_meta.license = self.metadata["license"]
1310
-
1311
- # Label file
1312
- tmp_file = Path(file).parent / "temp_meta.txt"
1313
- with open(tmp_file, "w", encoding="utf-8") as f:
1314
- f.write(str(self.metadata))
1315
-
1316
- label_file = schema.AssociatedFileT()
1317
- label_file.name = tmp_file.name
1318
- label_file.type = schema.AssociatedFileType.TENSOR_AXIS_LABELS
1319
-
1320
- # Create input info
1321
- input_meta = schema.TensorMetadataT()
1322
- input_meta.name = "image"
1323
- input_meta.description = "Input image to be detected."
1324
- input_meta.content = schema.ContentT()
1325
- input_meta.content.contentProperties = schema.ImagePropertiesT()
1326
- input_meta.content.contentProperties.colorSpace = schema.ColorSpaceType.RGB
1327
- input_meta.content.contentPropertiesType = schema.ContentProperties.ImageProperties
1328
-
1329
- # Create output info
1330
- output1 = schema.TensorMetadataT()
1331
- output1.name = "output"
1332
- output1.description = "Coordinates of detected objects, class labels, and confidence score"
1333
- output1.associatedFiles = [label_file]
1334
- if self.model.task == "segment":
1335
- output2 = schema.TensorMetadataT()
1336
- output2.name = "output"
1337
- output2.description = "Mask protos"
1338
- output2.associatedFiles = [label_file]
1339
-
1340
- # Create subgraph info
1341
- subgraph = schema.SubGraphMetadataT()
1342
- subgraph.inputTensorMetadata = [input_meta]
1343
- subgraph.outputTensorMetadata = [output1, output2] if self.model.task == "segment" else [output1]
1344
- model_meta.subgraphMetadata = [subgraph]
1345
-
1346
- b = flatbuffers.Builder(0)
1347
- b.Finish(model_meta.Pack(b), metadata.MetadataPopulator.METADATA_FILE_IDENTIFIER)
1348
- metadata_buf = b.Output()
1349
-
1350
- populator = metadata.MetadataPopulator.with_model_file(str(file))
1351
- populator.load_metadata_buffer(metadata_buf)
1352
- populator.load_associated_files([str(tmp_file)])
1353
- populator.populate()
1354
- tmp_file.unlink()
1272
+ with zipfile.ZipFile(file, "a", zipfile.ZIP_DEFLATED) as zf:
1273
+ zf.writestr("metadata.json", json.dumps(self.metadata, indent=2))
1355
1274
 
1356
1275
  def _pipeline_coreml(self, model, weights_dir=None, prefix=colorstr("CoreML Pipeline:")):
1357
1276
  """YOLO CoreML pipeline."""
@@ -88,8 +88,9 @@ class Tuner:
88
88
  "flipud": (0.0, 1.0), # image flip up-down (probability)
89
89
  "fliplr": (0.0, 1.0), # image flip left-right (probability)
90
90
  "bgr": (0.0, 1.0), # image channel bgr (probability)
91
- "mosaic": (0.0, 1.0), # image mixup (probability)
91
+ "mosaic": (0.0, 1.0), # image mosaic (probability)
92
92
  "mixup": (0.0, 1.0), # image mixup (probability)
93
+ "cutmix": (0.0, 1.0), # image cutmix (probability)
93
94
  "copy_paste": (0.0, 1.0), # segment copy-paste (probability)
94
95
  }
95
96
  self.args = get_cfg(overrides=args)