ultralytics 8.3.117__py3-none-any.whl → 8.3.119__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tests/__init__.py +22 -0
- tests/conftest.py +83 -0
- tests/test_cli.py +128 -0
- tests/test_cuda.py +164 -0
- tests/test_engine.py +131 -0
- tests/test_exports.py +231 -0
- tests/test_integrations.py +154 -0
- tests/test_python.py +695 -0
- tests/test_solutions.py +176 -0
- ultralytics/__init__.py +1 -1
- ultralytics/cfg/__init__.py +1 -0
- ultralytics/cfg/default.yaml +1 -0
- ultralytics/data/augment.py +122 -7
- ultralytics/data/base.py +9 -2
- ultralytics/data/dataset.py +7 -5
- ultralytics/engine/exporter.py +10 -91
- ultralytics/engine/tuner.py +2 -1
- ultralytics/models/rtdetr/val.py +1 -0
- ultralytics/models/yolo/detect/predict.py +1 -1
- ultralytics/models/yolo/model.py +2 -3
- ultralytics/models/yolo/obb/train.py +1 -1
- ultralytics/models/yolo/pose/predict.py +1 -1
- ultralytics/models/yolo/pose/train.py +1 -1
- ultralytics/models/yolo/pose/val.py +1 -1
- ultralytics/models/yolo/segment/train.py +3 -3
- ultralytics/nn/autobackend.py +2 -5
- ultralytics/nn/text_model.py +97 -13
- ultralytics/utils/benchmarks.py +1 -1
- ultralytics/utils/downloads.py +1 -0
- ultralytics/utils/ops.py +1 -1
- ultralytics/utils/tuner.py +2 -1
- {ultralytics-8.3.117.dist-info → ultralytics-8.3.119.dist-info}/METADATA +6 -7
- {ultralytics-8.3.117.dist-info → ultralytics-8.3.119.dist-info}/RECORD +37 -28
- {ultralytics-8.3.117.dist-info → ultralytics-8.3.119.dist-info}/WHEEL +1 -1
- {ultralytics-8.3.117.dist-info → ultralytics-8.3.119.dist-info}/entry_points.txt +0 -0
- {ultralytics-8.3.117.dist-info → ultralytics-8.3.119.dist-info}/licenses/LICENSE +0 -0
- {ultralytics-8.3.117.dist-info → ultralytics-8.3.119.dist-info}/top_level.txt +0 -0
tests/test_solutions.py
ADDED
@@ -0,0 +1,176 @@
|
|
1
|
+
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
|
2
|
+
|
3
|
+
# Tests Ultralytics Solutions: https://docs.ultralytics.com/solutions/,
|
4
|
+
# including every solution excluding DistanceCalculation and Security Alarm System.
|
5
|
+
|
6
|
+
import cv2
|
7
|
+
import pytest
|
8
|
+
|
9
|
+
from tests import MODEL, TMP
|
10
|
+
from ultralytics import solutions
|
11
|
+
from ultralytics.utils import ASSETS_URL, IS_RASPBERRYPI, LINUX, checks
|
12
|
+
from ultralytics.utils.downloads import safe_download
|
13
|
+
|
14
|
+
# Pre-defined arguments values
|
15
|
+
SHOW = False
|
16
|
+
DEMO_VIDEO = "solutions_ci_demo.mp4" # for all the solutions, except workout, object cropping and parking management
|
17
|
+
CROP_VIDEO = "decelera_landscape_min.mov" # for object cropping solution
|
18
|
+
POSE_VIDEO = "solution_ci_pose_demo.mp4" # only for workouts monitoring solution
|
19
|
+
PARKING_VIDEO = "solution_ci_parking_demo.mp4" # only for parking management solution
|
20
|
+
PARKING_AREAS_JSON = "solution_ci_parking_areas.json" # only for parking management solution
|
21
|
+
PARKING_MODEL = "solutions_ci_parking_model.pt" # only for parking management solution
|
22
|
+
REGION = [(10, 200), (540, 200), (540, 180), (10, 180)] # for object counting, speed estimation and queue management
|
23
|
+
|
24
|
+
# Test configs for each solution : (name, class, needs_frame_count, video, kwargs)
|
25
|
+
SOLUTIONS = [
|
26
|
+
(
|
27
|
+
"ObjectCounter",
|
28
|
+
solutions.ObjectCounter,
|
29
|
+
False,
|
30
|
+
DEMO_VIDEO,
|
31
|
+
{"region": REGION, "model": MODEL, "show": SHOW},
|
32
|
+
),
|
33
|
+
(
|
34
|
+
"Heatmap",
|
35
|
+
solutions.Heatmap,
|
36
|
+
False,
|
37
|
+
DEMO_VIDEO,
|
38
|
+
{"colormap": cv2.COLORMAP_PARULA, "model": MODEL, "show": SHOW, "region": None},
|
39
|
+
),
|
40
|
+
(
|
41
|
+
"HeatmapWithRegion",
|
42
|
+
solutions.Heatmap,
|
43
|
+
False,
|
44
|
+
DEMO_VIDEO,
|
45
|
+
{"colormap": cv2.COLORMAP_PARULA, "region": REGION, "model": MODEL, "show": SHOW},
|
46
|
+
),
|
47
|
+
(
|
48
|
+
"SpeedEstimator",
|
49
|
+
solutions.SpeedEstimator,
|
50
|
+
False,
|
51
|
+
DEMO_VIDEO,
|
52
|
+
{"region": REGION, "model": MODEL, "show": SHOW},
|
53
|
+
),
|
54
|
+
(
|
55
|
+
"QueueManager",
|
56
|
+
solutions.QueueManager,
|
57
|
+
False,
|
58
|
+
DEMO_VIDEO,
|
59
|
+
{"region": REGION, "model": MODEL, "show": SHOW},
|
60
|
+
),
|
61
|
+
(
|
62
|
+
"LineAnalytics",
|
63
|
+
solutions.Analytics,
|
64
|
+
True,
|
65
|
+
DEMO_VIDEO,
|
66
|
+
{"analytics_type": "line", "model": MODEL, "show": SHOW},
|
67
|
+
),
|
68
|
+
(
|
69
|
+
"PieAnalytics",
|
70
|
+
solutions.Analytics,
|
71
|
+
True,
|
72
|
+
DEMO_VIDEO,
|
73
|
+
{"analytics_type": "pie", "model": MODEL, "show": SHOW},
|
74
|
+
),
|
75
|
+
(
|
76
|
+
"BarAnalytics",
|
77
|
+
solutions.Analytics,
|
78
|
+
True,
|
79
|
+
DEMO_VIDEO,
|
80
|
+
{"analytics_type": "bar", "model": MODEL, "show": SHOW},
|
81
|
+
),
|
82
|
+
(
|
83
|
+
"AreaAnalytics",
|
84
|
+
solutions.Analytics,
|
85
|
+
True,
|
86
|
+
DEMO_VIDEO,
|
87
|
+
{"analytics_type": "area", "model": MODEL, "show": SHOW},
|
88
|
+
),
|
89
|
+
("TrackZone", solutions.TrackZone, False, DEMO_VIDEO, {"region": REGION, "model": MODEL, "show": SHOW}),
|
90
|
+
(
|
91
|
+
"ObjectCropper",
|
92
|
+
solutions.ObjectCropper,
|
93
|
+
False,
|
94
|
+
CROP_VIDEO,
|
95
|
+
{"crop_dir": str(TMP / "cropped-detections"), "model": MODEL, "show": SHOW},
|
96
|
+
),
|
97
|
+
(
|
98
|
+
"ObjectBlurrer",
|
99
|
+
solutions.ObjectBlurrer,
|
100
|
+
False,
|
101
|
+
DEMO_VIDEO,
|
102
|
+
{"blur_ratio": 0.5, "model": MODEL, "show": SHOW},
|
103
|
+
),
|
104
|
+
(
|
105
|
+
"InstanceSegmentation",
|
106
|
+
solutions.InstanceSegmentation,
|
107
|
+
False,
|
108
|
+
DEMO_VIDEO,
|
109
|
+
{"model": "yolo11n-seg.pt", "show": SHOW},
|
110
|
+
),
|
111
|
+
("VisionEye", solutions.VisionEye, False, DEMO_VIDEO, {"model": MODEL, "show": SHOW}),
|
112
|
+
(
|
113
|
+
"RegionCounter",
|
114
|
+
solutions.RegionCounter,
|
115
|
+
False,
|
116
|
+
DEMO_VIDEO,
|
117
|
+
{"region": REGION, "model": MODEL, "show": SHOW},
|
118
|
+
),
|
119
|
+
("AIGym", solutions.AIGym, False, POSE_VIDEO, {"kpts": [6, 8, 10], "show": SHOW}),
|
120
|
+
(
|
121
|
+
"ParkingManager",
|
122
|
+
solutions.ParkingManagement,
|
123
|
+
False,
|
124
|
+
PARKING_VIDEO,
|
125
|
+
{"model": str(TMP / PARKING_MODEL), "show": SHOW, "json_file": str(TMP / PARKING_AREAS_JSON)},
|
126
|
+
),
|
127
|
+
(
|
128
|
+
"StreamlitInference",
|
129
|
+
solutions.Inference,
|
130
|
+
False,
|
131
|
+
None, # streamlit application don't require video file
|
132
|
+
{}, # streamlit application don't accept arguments
|
133
|
+
),
|
134
|
+
]
|
135
|
+
|
136
|
+
|
137
|
+
def process_video(solution, video_path, needs_frame_count=False):
|
138
|
+
"""Process video with solution, feeding frames and optional frame count."""
|
139
|
+
cap = cv2.VideoCapture(video_path)
|
140
|
+
assert cap.isOpened(), f"Error reading video file {video_path}"
|
141
|
+
|
142
|
+
frame_count = 0
|
143
|
+
while cap.isOpened():
|
144
|
+
success, im0 = cap.read()
|
145
|
+
if not success:
|
146
|
+
break
|
147
|
+
frame_count += 1
|
148
|
+
im_copy = im0.copy()
|
149
|
+
args = [im_copy, frame_count] if needs_frame_count else [im_copy]
|
150
|
+
_ = solution(*args)
|
151
|
+
|
152
|
+
cap.release()
|
153
|
+
|
154
|
+
|
155
|
+
@pytest.mark.skipif(
|
156
|
+
(LINUX and checks.IS_PYTHON_3_11) or IS_RASPBERRYPI,
|
157
|
+
reason="Disabled for testing due to --slow test errors after YOLOE PR.",
|
158
|
+
)
|
159
|
+
@pytest.mark.parametrize("name, solution_class, needs_frame_count, video, kwargs", SOLUTIONS)
|
160
|
+
def test_solution(name, solution_class, needs_frame_count, video, kwargs):
|
161
|
+
"""Test individual Ultralytics solution."""
|
162
|
+
if video:
|
163
|
+
safe_download(url=f"{ASSETS_URL}/{video}", dir=TMP)
|
164
|
+
if name == "ParkingManager":
|
165
|
+
safe_download(url=f"{ASSETS_URL}/{PARKING_AREAS_JSON}", dir=TMP)
|
166
|
+
safe_download(url=f"{ASSETS_URL}/{PARKING_MODEL}", dir=TMP)
|
167
|
+
elif name == "StreamlitInference":
|
168
|
+
if checks.check_imshow(): # do not merge with elif above
|
169
|
+
solution_class(**kwargs).inference() # requires interactive GUI environment
|
170
|
+
return
|
171
|
+
|
172
|
+
process_video(
|
173
|
+
solution=solution_class(**kwargs),
|
174
|
+
video_path=str(TMP / video),
|
175
|
+
needs_frame_count=needs_frame_count,
|
176
|
+
)
|
ultralytics/__init__.py
CHANGED
ultralytics/cfg/__init__.py
CHANGED
ultralytics/cfg/default.yaml
CHANGED
@@ -114,6 +114,7 @@ fliplr: 0.5 # (float) image flip left-right (probability)
|
|
114
114
|
bgr: 0.0 # (float) image channel BGR (probability)
|
115
115
|
mosaic: 1.0 # (float) image mosaic (probability)
|
116
116
|
mixup: 0.0 # (float) image mixup (probability)
|
117
|
+
cutmix: 0.0 # (float) image cutmix (probability)
|
117
118
|
copy_paste: 0.0 # (float) segment copy-paste (probability)
|
118
119
|
copy_paste_mode: "flip" # (str) the method to do copy_paste augmentation (flip, mixup)
|
119
120
|
auto_augment: randaugment # (str) auto augmentation policy for classification (randaugment, autoaugment, augmix)
|
ultralytics/data/augment.py
CHANGED
@@ -317,7 +317,7 @@ class Compose:
|
|
317
317
|
|
318
318
|
class BaseMixTransform:
|
319
319
|
"""
|
320
|
-
Base class for mix transformations like MixUp and Mosaic.
|
320
|
+
Base class for mix transformations like Cutmix, MixUp and Mosaic.
|
321
321
|
|
322
322
|
This class provides a foundation for implementing mix transformations on datasets. It handles the
|
323
323
|
probability-based application of transforms and manages the mixing of multiple images and labels.
|
@@ -348,7 +348,7 @@ class BaseMixTransform:
|
|
348
348
|
|
349
349
|
def __init__(self, dataset, pre_transform=None, p=0.0) -> None:
|
350
350
|
"""
|
351
|
-
Initializes the BaseMixTransform object for mix transformations like MixUp and Mosaic.
|
351
|
+
Initializes the BaseMixTransform object for mix transformations like CutMix, MixUp and Mosaic.
|
352
352
|
|
353
353
|
This class serves as a base for implementing mix transformations in image processing pipelines.
|
354
354
|
|
@@ -368,7 +368,7 @@ class BaseMixTransform:
|
|
368
368
|
|
369
369
|
def __call__(self, labels):
|
370
370
|
"""
|
371
|
-
Applies pre-processing transforms and mixup/mosaic transforms to labels data.
|
371
|
+
Applies pre-processing transforms and cutmix/mixup/mosaic transforms to labels data.
|
372
372
|
|
373
373
|
This method determines whether to apply the mix transform based on a probability factor. If applied, it
|
374
374
|
selects additional images, applies pre-transforms if specified, and then performs the mix transform.
|
@@ -391,7 +391,7 @@ class BaseMixTransform:
|
|
391
391
|
if isinstance(indexes, int):
|
392
392
|
indexes = [indexes]
|
393
393
|
|
394
|
-
# Get images information will be used for Mosaic or MixUp
|
394
|
+
# Get images information will be used for Mosaic, CutMix or MixUp
|
395
395
|
mix_labels = [self.dataset.get_image_and_label(i) for i in indexes]
|
396
396
|
|
397
397
|
if self.pre_transform is not None:
|
@@ -401,16 +401,16 @@ class BaseMixTransform:
|
|
401
401
|
|
402
402
|
# Update cls and texts
|
403
403
|
labels = self._update_label_text(labels)
|
404
|
-
# Mosaic or MixUp
|
404
|
+
# Mosaic, CutMix or MixUp
|
405
405
|
labels = self._mix_transform(labels)
|
406
406
|
labels.pop("mix_labels", None)
|
407
407
|
return labels
|
408
408
|
|
409
409
|
def _mix_transform(self, labels):
|
410
410
|
"""
|
411
|
-
Applies MixUp or Mosaic augmentation to the label dictionary.
|
411
|
+
Applies CutMix, MixUp or Mosaic augmentation to the label dictionary.
|
412
412
|
|
413
|
-
This method should be implemented by subclasses to perform specific mix transformations like MixUp or
|
413
|
+
This method should be implemented by subclasses to perform specific mix transformations like CutMix, MixUp or
|
414
414
|
Mosaic. It modifies the input label dictionary in-place with the augmented data.
|
415
415
|
|
416
416
|
Args:
|
@@ -949,6 +949,117 @@ class MixUp(BaseMixTransform):
|
|
949
949
|
return labels
|
950
950
|
|
951
951
|
|
952
|
+
class CutMix(BaseMixTransform):
|
953
|
+
"""
|
954
|
+
Applies CutMix augmentation to image datasets as described in the paper https://arxiv.org/abs/1905.04899.
|
955
|
+
|
956
|
+
CutMix combines two images by replacing a random rectangular region of one image with the corresponding region from another image,
|
957
|
+
and adjusts the labels proportionally to the area of the mixed region.
|
958
|
+
|
959
|
+
Attributes:
|
960
|
+
dataset (Any): The dataset to which CutMix augmentation will be applied.
|
961
|
+
pre_transform (Callable | None): Optional transform to apply before CutMix.
|
962
|
+
p (float): Probability of applying CutMix augmentation.
|
963
|
+
beta (float): Beta distribution parameter for sampling the mixing ratio (default=1.0).
|
964
|
+
|
965
|
+
Methods:
|
966
|
+
get_indexes: Returns a random index from the dataset.
|
967
|
+
_mix_transform: Applies CutMix augmentation to the input labels.
|
968
|
+
_rand_bbox: Generates random bounding box coordinates for the cut region.
|
969
|
+
|
970
|
+
Examples:
|
971
|
+
>>> from ultralytics.data.augment import CutMix
|
972
|
+
>>> dataset = YourDataset(...) # Your image dataset
|
973
|
+
>>> cutmix = CutMix(dataset, p=0.5)
|
974
|
+
>>> augmented_labels = cutmix(original_labels)
|
975
|
+
"""
|
976
|
+
|
977
|
+
def __init__(self, dataset, pre_transform=None, p=0.0, beta=1.0) -> None:
|
978
|
+
"""
|
979
|
+
Initializes the CutMix augmentation object.
|
980
|
+
|
981
|
+
Args:
|
982
|
+
dataset (Any): The dataset to which CutMix augmentation will be applied.
|
983
|
+
pre_transform (Callable | None): Optional transform to apply before CutMix.
|
984
|
+
p (float): Probability of applying CutMix augmentation.
|
985
|
+
beta (float): Beta distribution parameter for sampling the mixing ratio (default=1.0).
|
986
|
+
"""
|
987
|
+
super().__init__(dataset=dataset, pre_transform=pre_transform, p=p)
|
988
|
+
self.beta = beta
|
989
|
+
|
990
|
+
def get_indexes(self):
|
991
|
+
"""
|
992
|
+
Get a random index from the dataset.
|
993
|
+
|
994
|
+
Returns:
|
995
|
+
(int): A random integer index within the range of the dataset length.
|
996
|
+
"""
|
997
|
+
return random.randint(0, len(self.dataset) - 1)
|
998
|
+
|
999
|
+
def _rand_bbox(self, width, height, lam):
|
1000
|
+
"""
|
1001
|
+
Generates random bounding box coordinates for the cut region.
|
1002
|
+
|
1003
|
+
Args:
|
1004
|
+
width (int): Width of the image.
|
1005
|
+
height (int): Height of the image.
|
1006
|
+
lam (float): Mixing ratio from the Beta distribution.
|
1007
|
+
|
1008
|
+
Returns:
|
1009
|
+
(tuple): (x1, y1, x2, y2) coordinates of the bounding box.
|
1010
|
+
"""
|
1011
|
+
cut_ratio = np.sqrt(1.0 - lam)
|
1012
|
+
cut_w = int(width * cut_ratio)
|
1013
|
+
cut_h = int(height * cut_ratio)
|
1014
|
+
|
1015
|
+
# Random center
|
1016
|
+
cx = np.random.randint(width)
|
1017
|
+
cy = np.random.randint(height)
|
1018
|
+
|
1019
|
+
# Bounding box coordinates
|
1020
|
+
x1 = np.clip(cx - cut_w // 2, 0, width)
|
1021
|
+
y1 = np.clip(cy - cut_h // 2, 0, height)
|
1022
|
+
x2 = np.clip(cx + cut_w // 2, 0, width)
|
1023
|
+
y2 = np.clip(cy + cut_h // 2, 0, height)
|
1024
|
+
|
1025
|
+
return x1, y1, x2, y2
|
1026
|
+
|
1027
|
+
def _mix_transform(self, labels):
|
1028
|
+
"""
|
1029
|
+
Applies CutMix augmentation to the input labels.
|
1030
|
+
|
1031
|
+
Args:
|
1032
|
+
labels (dict): A dictionary containing the original image and label information.
|
1033
|
+
|
1034
|
+
Returns:
|
1035
|
+
(dict): A dictionary containing the mixed image and adjusted labels.
|
1036
|
+
|
1037
|
+
Examples:
|
1038
|
+
>>> cutter = CutMix(dataset)
|
1039
|
+
>>> mixed_labels = cutter._mix_transform(labels)
|
1040
|
+
"""
|
1041
|
+
# Sample mixing ratio from Beta distribution
|
1042
|
+
lam = np.random.beta(self.beta, self.beta)
|
1043
|
+
|
1044
|
+
# Get a random second image
|
1045
|
+
labels2 = labels["mix_labels"][0]
|
1046
|
+
img2 = labels2["img"]
|
1047
|
+
h, w = labels["img"].shape[:2]
|
1048
|
+
|
1049
|
+
# Generate random bounding box
|
1050
|
+
x1, y1, x2, y2 = self._rand_bbox(w, h, lam)
|
1051
|
+
|
1052
|
+
# Apply CutMix
|
1053
|
+
labels["img"][y1:y2, x1:x2] = img2[y1:y2, x1:x2]
|
1054
|
+
|
1055
|
+
# Adjust lambda to match the actual area ratio
|
1056
|
+
lam = 1 - ((x2 - x1) * (y2 - y1) / (w * h))
|
1057
|
+
|
1058
|
+
labels["cls"] = np.concatenate([labels["cls"], labels2["cls"]], axis=0)
|
1059
|
+
labels["instances"] = Instances.concatenate([labels["instances"], labels2["instances"]], axis=0)
|
1060
|
+
return labels
|
1061
|
+
|
1062
|
+
|
952
1063
|
class RandomPerspective:
|
953
1064
|
"""
|
954
1065
|
Implements random perspective and affine transformations on images and corresponding annotations.
|
@@ -1586,6 +1697,9 @@ class LetterBox:
|
|
1586
1697
|
|
1587
1698
|
if shape[::-1] != new_unpad: # resize
|
1588
1699
|
img = cv2.resize(img, new_unpad, interpolation=cv2.INTER_LINEAR)
|
1700
|
+
if img.ndim == 2:
|
1701
|
+
img = img[..., None]
|
1702
|
+
|
1589
1703
|
top, bottom = int(round(dh - 0.1)) if self.center else 0, int(round(dh + 0.1))
|
1590
1704
|
left, right = int(round(dw - 0.1)) if self.center else 0, int(round(dw + 0.1))
|
1591
1705
|
h, w, c = img.shape
|
@@ -2442,6 +2556,7 @@ def v8_transforms(dataset, imgsz, hyp, stretch=False):
|
|
2442
2556
|
[
|
2443
2557
|
pre_transform,
|
2444
2558
|
MixUp(dataset, pre_transform=pre_transform, p=hyp.mixup),
|
2559
|
+
CutMix(dataset, pre_transform=pre_transform, p=hyp.cutmix),
|
2445
2560
|
Albumentations(p=1.0),
|
2446
2561
|
RandomHSV(hgain=hyp.hsv_h, sgain=hyp.hsv_s, vgain=hyp.hsv_v),
|
2447
2562
|
RandomFlip(direction="vertical", p=hyp.flipud),
|
ultralytics/data/base.py
CHANGED
@@ -33,6 +33,7 @@ class BaseDataset(Dataset):
|
|
33
33
|
single_cls (bool): Whether to treat all objects as a single class.
|
34
34
|
prefix (str): Prefix to print in log messages.
|
35
35
|
fraction (float): Fraction of dataset to utilize.
|
36
|
+
cv2_flag (int): OpenCV flag for reading images.
|
36
37
|
im_files (List[str]): List of image file paths.
|
37
38
|
labels (List[Dict]): List of label data dictionaries.
|
38
39
|
ni (int): Number of images in the dataset.
|
@@ -79,6 +80,7 @@ class BaseDataset(Dataset):
|
|
79
80
|
single_cls=False,
|
80
81
|
classes=None,
|
81
82
|
fraction=1.0,
|
83
|
+
channels=3,
|
82
84
|
):
|
83
85
|
"""
|
84
86
|
Initialize BaseDataset with given configuration and options.
|
@@ -97,6 +99,7 @@ class BaseDataset(Dataset):
|
|
97
99
|
single_cls (bool, optional): If True, single class training is used.
|
98
100
|
classes (list, optional): List of included classes.
|
99
101
|
fraction (float, optional): Fraction of dataset to utilize.
|
102
|
+
channels (int, optional): Number of channels in the images (1 for grayscale, 3 for RGB).
|
100
103
|
"""
|
101
104
|
super().__init__()
|
102
105
|
self.img_path = img_path
|
@@ -105,6 +108,8 @@ class BaseDataset(Dataset):
|
|
105
108
|
self.single_cls = single_cls
|
106
109
|
self.prefix = prefix
|
107
110
|
self.fraction = fraction
|
111
|
+
self.channels = channels
|
112
|
+
self.cv2_flag = cv2.IMREAD_GRAYSCALE if channels == 1 else cv2.IMREAD_COLOR
|
108
113
|
self.im_files = self.get_img_files(self.img_path)
|
109
114
|
self.labels = self.get_labels()
|
110
115
|
self.update_labels(include_class=classes) # single_cls and include_class
|
@@ -224,9 +229,9 @@ class BaseDataset(Dataset):
|
|
224
229
|
except Exception as e:
|
225
230
|
LOGGER.warning(f"{self.prefix}Removing corrupt *.npy image file {fn} due to: {e}")
|
226
231
|
Path(fn).unlink(missing_ok=True)
|
227
|
-
im = imread(f) # BGR
|
232
|
+
im = imread(f, flags=self.cv2_flag) # BGR
|
228
233
|
else: # read image
|
229
|
-
im = imread(f) # BGR
|
234
|
+
im = imread(f, flags=self.cv2_flag) # BGR
|
230
235
|
if im is None:
|
231
236
|
raise FileNotFoundError(f"Image Not Found {f}")
|
232
237
|
|
@@ -238,6 +243,8 @@ class BaseDataset(Dataset):
|
|
238
243
|
im = cv2.resize(im, (w, h), interpolation=cv2.INTER_LINEAR)
|
239
244
|
elif not (h0 == w0 == self.imgsz): # resize by stretching image to square imgsz
|
240
245
|
im = cv2.resize(im, (self.imgsz, self.imgsz), interpolation=cv2.INTER_LINEAR)
|
246
|
+
if im.ndim == 2:
|
247
|
+
im = im[..., None]
|
241
248
|
|
242
249
|
# Add to buffer if training with augmentations
|
243
250
|
if self.augment:
|
ultralytics/data/dataset.py
CHANGED
@@ -84,7 +84,7 @@ class YOLODataset(BaseDataset):
|
|
84
84
|
self.use_obb = task == "obb"
|
85
85
|
self.data = data
|
86
86
|
assert not (self.use_segments and self.use_keypoints), "Can not use both segments and keypoints."
|
87
|
-
super().__init__(*args, **kwargs)
|
87
|
+
super().__init__(*args, channels=self.data["channels"], **kwargs)
|
88
88
|
|
89
89
|
def cache_labels(self, path=Path("./labels.cache")):
|
90
90
|
"""
|
@@ -215,6 +215,7 @@ class YOLODataset(BaseDataset):
|
|
215
215
|
if self.augment:
|
216
216
|
hyp.mosaic = hyp.mosaic if self.augment and not self.rect else 0.0
|
217
217
|
hyp.mixup = hyp.mixup if self.augment and not self.rect else 0.0
|
218
|
+
hyp.cutmix = hyp.cutmix if self.augment and not self.rect else 0.0
|
218
219
|
transforms = v8_transforms(self, self.imgsz, hyp)
|
219
220
|
else:
|
220
221
|
transforms = Compose([LetterBox(new_shape=(self.imgsz, self.imgsz), scaleup=False)])
|
@@ -235,14 +236,15 @@ class YOLODataset(BaseDataset):
|
|
235
236
|
|
236
237
|
def close_mosaic(self, hyp):
|
237
238
|
"""
|
238
|
-
|
239
|
+
Disable mosaic, copy_paste, mixup and cutmix augmentations by setting their probabilities to 0.0.
|
239
240
|
|
240
241
|
Args:
|
241
242
|
hyp (dict): Hyperparameters for transforms.
|
242
243
|
"""
|
243
|
-
hyp.mosaic = 0.0
|
244
|
-
hyp.copy_paste = 0.0
|
245
|
-
hyp.mixup = 0.0
|
244
|
+
hyp.mosaic = 0.0
|
245
|
+
hyp.copy_paste = 0.0
|
246
|
+
hyp.mixup = 0.0
|
247
|
+
hyp.cutmix = 0.0
|
246
248
|
self.transforms = self.build_transforms(hyp)
|
247
249
|
|
248
250
|
def update_labels_info(self, label):
|
ultralytics/engine/exporter.py
CHANGED
@@ -95,7 +95,6 @@ from ultralytics.utils import (
|
|
95
95
|
yaml_save,
|
96
96
|
)
|
97
97
|
from ultralytics.utils.checks import (
|
98
|
-
IS_PYTHON_MINIMUM_3_12,
|
99
98
|
check_imgsz,
|
100
99
|
check_is_path_safe,
|
101
100
|
check_requirements,
|
@@ -238,9 +237,6 @@ class Exporter:
|
|
238
237
|
_callbacks (dict, optional): Dictionary of callback functions.
|
239
238
|
"""
|
240
239
|
self.args = get_cfg(cfg, overrides)
|
241
|
-
if self.args.format.lower() in {"coreml", "mlmodel"}: # fix attempt for protobuf<3.20.x errors
|
242
|
-
os.environ["PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION"] = "python" # must run before TensorBoard callback
|
243
|
-
|
244
240
|
self.callbacks = _callbacks or callbacks.get_default_callbacks()
|
245
241
|
callbacks.add_integration_callbacks(self)
|
246
242
|
|
@@ -552,7 +548,7 @@ class Exporter:
|
|
552
548
|
"""YOLO ONNX export."""
|
553
549
|
requirements = ["onnx>=1.12.0"]
|
554
550
|
if self.args.simplify:
|
555
|
-
requirements += ["onnxslim", "onnxruntime" + ("-gpu" if torch.cuda.is_available() else "")]
|
551
|
+
requirements += ["onnxslim>=0.1.46", "onnxruntime" + ("-gpu" if torch.cuda.is_available() else "")]
|
556
552
|
check_requirements(requirements)
|
557
553
|
import onnx # noqa
|
558
554
|
|
@@ -572,12 +568,6 @@ class Exporter:
|
|
572
568
|
dynamic["output0"].pop(2)
|
573
569
|
if self.args.nms and self.model.task == "obb":
|
574
570
|
self.args.opset = opset_version # for NMSModel
|
575
|
-
# OBB error https://github.com/pytorch/pytorch/issues/110859#issuecomment-1757841865
|
576
|
-
try:
|
577
|
-
torch.onnx.register_custom_op_symbolic("aten::lift_fresh", lambda g, x: x, opset_version)
|
578
|
-
except RuntimeError: # it will fail if it's already registered
|
579
|
-
pass
|
580
|
-
check_requirements("onnxslim>=0.1.46") # Older versions has bug with OBB
|
581
571
|
|
582
572
|
with arange_patch(self.args):
|
583
573
|
export_onnx(
|
@@ -653,7 +643,7 @@ class Exporter:
|
|
653
643
|
"""Quantization transform function."""
|
654
644
|
data_item: torch.Tensor = data_item["img"] if isinstance(data_item, dict) else data_item
|
655
645
|
assert data_item.dtype == torch.uint8, "Input image must be uint8 for the quantization preprocessing"
|
656
|
-
im = data_item.numpy().astype(np.float32) / 255.0 # uint8 to fp16/32 and 0
|
646
|
+
im = data_item.numpy().astype(np.float32) / 255.0 # uint8 to fp16/32 and 0-255 to 0.0-1.0
|
657
647
|
return np.expand_dims(im, 0) if im.ndim == 3 else im
|
658
648
|
|
659
649
|
# Generate calibration data for integer quantization
|
@@ -703,7 +693,7 @@ class Exporter:
|
|
703
693
|
|
704
694
|
@try_export
|
705
695
|
def export_mnn(self, prefix=colorstr("MNN:")):
|
706
|
-
"""
|
696
|
+
"""YOLO MNN export using MNN https://github.com/alibaba/MNN."""
|
707
697
|
f_onnx, _ = self.export_onnx() # get onnx model first
|
708
698
|
|
709
699
|
check_requirements("MNN>=2.9.6")
|
@@ -917,14 +907,13 @@ class Exporter:
|
|
917
907
|
import tensorflow as tf # noqa
|
918
908
|
check_requirements(
|
919
909
|
(
|
920
|
-
"keras", # required by 'onnx2tf' package
|
921
910
|
"tf_keras", # required by 'onnx2tf' package
|
922
911
|
"sng4onnx>=1.0.1", # required by 'onnx2tf' package
|
923
912
|
"onnx_graphsurgeon>=0.3.26", # required by 'onnx2tf' package
|
924
913
|
"ai-edge-litert>=1.2.0", # required by 'onnx2tf' package
|
925
914
|
"onnx>=1.12.0",
|
926
915
|
"onnx2tf>=1.26.3",
|
927
|
-
"onnxslim>=0.1.
|
916
|
+
"onnxslim>=0.1.46",
|
928
917
|
"onnxruntime-gpu" if cuda else "onnxruntime",
|
929
918
|
"protobuf>=5",
|
930
919
|
),
|
@@ -1030,8 +1019,6 @@ class Exporter:
|
|
1030
1019
|
@try_export
|
1031
1020
|
def export_edgetpu(self, tflite_model="", prefix=colorstr("Edge TPU:")):
|
1032
1021
|
"""YOLO Edge TPU export https://coral.ai/docs/edgetpu/models-intro/."""
|
1033
|
-
LOGGER.warning(f"{prefix} Edge TPU known bug https://github.com/ultralytics/ultralytics/issues/1185")
|
1034
|
-
|
1035
1022
|
cmd = "edgetpu_compiler --version"
|
1036
1023
|
help_url = "https://coral.ai/docs/edgetpu/compiler/"
|
1037
1024
|
assert LINUX, f"export only supported on Linux. See {help_url}"
|
@@ -1129,7 +1116,8 @@ class Exporter:
|
|
1129
1116
|
"""YOLO IMX export."""
|
1130
1117
|
gptq = False
|
1131
1118
|
assert LINUX, (
|
1132
|
-
"export only supported on Linux.
|
1119
|
+
"export only supported on Linux. "
|
1120
|
+
"See https://developer.aitrios.sony-semicon.com/en/raspberrypi-ai-camera/documentation/imx500-converter"
|
1133
1121
|
)
|
1134
1122
|
if getattr(self.model, "end2end", False):
|
1135
1123
|
raise ValueError("IMX export is not supported for end2end models.")
|
@@ -1277,81 +1265,12 @@ class Exporter:
|
|
1277
1265
|
|
1278
1266
|
return f, None
|
1279
1267
|
|
1280
|
-
def _add_tflite_metadata(self, file
|
1268
|
+
def _add_tflite_metadata(self, file):
|
1281
1269
|
"""Add metadata to *.tflite models per https://ai.google.dev/edge/litert/models/metadata."""
|
1282
|
-
|
1283
|
-
import zipfile
|
1284
|
-
|
1285
|
-
with zipfile.ZipFile(file, "a", zipfile.ZIP_DEFLATED) as zf:
|
1286
|
-
zf.writestr("metadata.json", json.dumps(self.metadata, indent=2))
|
1287
|
-
return
|
1270
|
+
import zipfile
|
1288
1271
|
|
1289
|
-
|
1290
|
-
|
1291
|
-
|
1292
|
-
# Update old 'flatbuffers' included inside tensorflow package
|
1293
|
-
check_requirements(("tflite_support", "flatbuffers>=23.5.26,<100; platform_machine == 'aarch64'"))
|
1294
|
-
import flatbuffers
|
1295
|
-
|
1296
|
-
try:
|
1297
|
-
# TFLite Support bug https://github.com/tensorflow/tflite-support/issues/954#issuecomment-2108570845
|
1298
|
-
from tensorflow_lite_support.metadata import metadata_schema_py_generated as schema # noqa
|
1299
|
-
from tensorflow_lite_support.metadata.python import metadata # noqa
|
1300
|
-
except ImportError: # ARM64 systems may not have the 'tensorflow_lite_support' package available
|
1301
|
-
from tflite_support import metadata # noqa
|
1302
|
-
from tflite_support import metadata_schema_py_generated as schema # noqa
|
1303
|
-
|
1304
|
-
# Create model info
|
1305
|
-
model_meta = schema.ModelMetadataT()
|
1306
|
-
model_meta.name = self.metadata["description"]
|
1307
|
-
model_meta.version = self.metadata["version"]
|
1308
|
-
model_meta.author = self.metadata["author"]
|
1309
|
-
model_meta.license = self.metadata["license"]
|
1310
|
-
|
1311
|
-
# Label file
|
1312
|
-
tmp_file = Path(file).parent / "temp_meta.txt"
|
1313
|
-
with open(tmp_file, "w", encoding="utf-8") as f:
|
1314
|
-
f.write(str(self.metadata))
|
1315
|
-
|
1316
|
-
label_file = schema.AssociatedFileT()
|
1317
|
-
label_file.name = tmp_file.name
|
1318
|
-
label_file.type = schema.AssociatedFileType.TENSOR_AXIS_LABELS
|
1319
|
-
|
1320
|
-
# Create input info
|
1321
|
-
input_meta = schema.TensorMetadataT()
|
1322
|
-
input_meta.name = "image"
|
1323
|
-
input_meta.description = "Input image to be detected."
|
1324
|
-
input_meta.content = schema.ContentT()
|
1325
|
-
input_meta.content.contentProperties = schema.ImagePropertiesT()
|
1326
|
-
input_meta.content.contentProperties.colorSpace = schema.ColorSpaceType.RGB
|
1327
|
-
input_meta.content.contentPropertiesType = schema.ContentProperties.ImageProperties
|
1328
|
-
|
1329
|
-
# Create output info
|
1330
|
-
output1 = schema.TensorMetadataT()
|
1331
|
-
output1.name = "output"
|
1332
|
-
output1.description = "Coordinates of detected objects, class labels, and confidence score"
|
1333
|
-
output1.associatedFiles = [label_file]
|
1334
|
-
if self.model.task == "segment":
|
1335
|
-
output2 = schema.TensorMetadataT()
|
1336
|
-
output2.name = "output"
|
1337
|
-
output2.description = "Mask protos"
|
1338
|
-
output2.associatedFiles = [label_file]
|
1339
|
-
|
1340
|
-
# Create subgraph info
|
1341
|
-
subgraph = schema.SubGraphMetadataT()
|
1342
|
-
subgraph.inputTensorMetadata = [input_meta]
|
1343
|
-
subgraph.outputTensorMetadata = [output1, output2] if self.model.task == "segment" else [output1]
|
1344
|
-
model_meta.subgraphMetadata = [subgraph]
|
1345
|
-
|
1346
|
-
b = flatbuffers.Builder(0)
|
1347
|
-
b.Finish(model_meta.Pack(b), metadata.MetadataPopulator.METADATA_FILE_IDENTIFIER)
|
1348
|
-
metadata_buf = b.Output()
|
1349
|
-
|
1350
|
-
populator = metadata.MetadataPopulator.with_model_file(str(file))
|
1351
|
-
populator.load_metadata_buffer(metadata_buf)
|
1352
|
-
populator.load_associated_files([str(tmp_file)])
|
1353
|
-
populator.populate()
|
1354
|
-
tmp_file.unlink()
|
1272
|
+
with zipfile.ZipFile(file, "a", zipfile.ZIP_DEFLATED) as zf:
|
1273
|
+
zf.writestr("metadata.json", json.dumps(self.metadata, indent=2))
|
1355
1274
|
|
1356
1275
|
def _pipeline_coreml(self, model, weights_dir=None, prefix=colorstr("CoreML Pipeline:")):
|
1357
1276
|
"""YOLO CoreML pipeline."""
|
ultralytics/engine/tuner.py
CHANGED
@@ -88,8 +88,9 @@ class Tuner:
|
|
88
88
|
"flipud": (0.0, 1.0), # image flip up-down (probability)
|
89
89
|
"fliplr": (0.0, 1.0), # image flip left-right (probability)
|
90
90
|
"bgr": (0.0, 1.0), # image channel bgr (probability)
|
91
|
-
"mosaic": (0.0, 1.0), # image
|
91
|
+
"mosaic": (0.0, 1.0), # image mosaic (probability)
|
92
92
|
"mixup": (0.0, 1.0), # image mixup (probability)
|
93
|
+
"cutmix": (0.0, 1.0), # image cutmix (probability)
|
93
94
|
"copy_paste": (0.0, 1.0), # segment copy-paste (probability)
|
94
95
|
}
|
95
96
|
self.args = get_cfg(overrides=args)
|