ultralytics 8.3.142__py3-none-any.whl → 8.3.144__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tests/conftest.py +7 -24
- tests/test_cli.py +1 -1
- tests/test_cuda.py +7 -2
- tests/test_engine.py +7 -8
- tests/test_exports.py +16 -16
- tests/test_integrations.py +1 -1
- tests/test_solutions.py +12 -12
- ultralytics/__init__.py +1 -1
- ultralytics/cfg/__init__.py +16 -13
- ultralytics/data/annotator.py +6 -5
- ultralytics/data/augment.py +127 -126
- ultralytics/data/base.py +54 -51
- ultralytics/data/build.py +47 -23
- ultralytics/data/converter.py +47 -43
- ultralytics/data/dataset.py +51 -50
- ultralytics/data/loaders.py +77 -44
- ultralytics/data/split.py +22 -9
- ultralytics/data/split_dota.py +63 -39
- ultralytics/data/utils.py +59 -39
- ultralytics/engine/exporter.py +79 -27
- ultralytics/engine/model.py +39 -39
- ultralytics/engine/predictor.py +37 -28
- ultralytics/engine/results.py +187 -157
- ultralytics/engine/trainer.py +36 -19
- ultralytics/engine/tuner.py +12 -9
- ultralytics/engine/validator.py +7 -9
- ultralytics/hub/__init__.py +11 -13
- ultralytics/hub/auth.py +22 -2
- ultralytics/hub/google/__init__.py +19 -19
- ultralytics/hub/session.py +37 -51
- ultralytics/hub/utils.py +19 -5
- ultralytics/models/fastsam/model.py +30 -12
- ultralytics/models/fastsam/predict.py +5 -6
- ultralytics/models/fastsam/utils.py +3 -3
- ultralytics/models/fastsam/val.py +10 -6
- ultralytics/models/nas/model.py +9 -5
- ultralytics/models/nas/predict.py +6 -6
- ultralytics/models/nas/val.py +3 -3
- ultralytics/models/rtdetr/model.py +7 -6
- ultralytics/models/rtdetr/predict.py +14 -7
- ultralytics/models/rtdetr/train.py +10 -4
- ultralytics/models/rtdetr/val.py +36 -9
- ultralytics/models/sam/amg.py +30 -12
- ultralytics/models/sam/build.py +22 -22
- ultralytics/models/sam/model.py +10 -9
- ultralytics/models/sam/modules/blocks.py +76 -80
- ultralytics/models/sam/modules/decoders.py +6 -8
- ultralytics/models/sam/modules/encoders.py +23 -26
- ultralytics/models/sam/modules/memory_attention.py +13 -1
- ultralytics/models/sam/modules/sam.py +57 -26
- ultralytics/models/sam/modules/tiny_encoder.py +232 -237
- ultralytics/models/sam/modules/transformer.py +13 -13
- ultralytics/models/sam/modules/utils.py +11 -19
- ultralytics/models/sam/predict.py +114 -101
- ultralytics/models/utils/loss.py +98 -77
- ultralytics/models/utils/ops.py +116 -67
- ultralytics/models/yolo/classify/predict.py +5 -5
- ultralytics/models/yolo/classify/train.py +32 -28
- ultralytics/models/yolo/classify/val.py +7 -8
- ultralytics/models/yolo/detect/predict.py +1 -0
- ultralytics/models/yolo/detect/train.py +15 -14
- ultralytics/models/yolo/detect/val.py +37 -36
- ultralytics/models/yolo/model.py +106 -23
- ultralytics/models/yolo/obb/predict.py +3 -4
- ultralytics/models/yolo/obb/train.py +14 -6
- ultralytics/models/yolo/obb/val.py +29 -23
- ultralytics/models/yolo/pose/predict.py +9 -8
- ultralytics/models/yolo/pose/train.py +24 -16
- ultralytics/models/yolo/pose/val.py +44 -26
- ultralytics/models/yolo/segment/predict.py +5 -5
- ultralytics/models/yolo/segment/train.py +11 -7
- ultralytics/models/yolo/segment/val.py +2 -2
- ultralytics/models/yolo/world/train.py +33 -23
- ultralytics/models/yolo/world/train_world.py +11 -3
- ultralytics/models/yolo/yoloe/predict.py +11 -11
- ultralytics/models/yolo/yoloe/train.py +73 -21
- ultralytics/models/yolo/yoloe/train_seg.py +10 -7
- ultralytics/models/yolo/yoloe/val.py +42 -18
- ultralytics/nn/autobackend.py +59 -15
- ultralytics/nn/modules/__init__.py +4 -4
- ultralytics/nn/modules/activation.py +4 -1
- ultralytics/nn/modules/block.py +178 -111
- ultralytics/nn/modules/conv.py +6 -5
- ultralytics/nn/modules/head.py +469 -121
- ultralytics/nn/modules/transformer.py +147 -58
- ultralytics/nn/tasks.py +227 -20
- ultralytics/nn/text_model.py +30 -33
- ultralytics/solutions/ai_gym.py +1 -1
- ultralytics/solutions/analytics.py +7 -4
- ultralytics/solutions/config.py +10 -10
- ultralytics/solutions/distance_calculation.py +11 -10
- ultralytics/solutions/heatmap.py +1 -1
- ultralytics/solutions/instance_segmentation.py +6 -3
- ultralytics/solutions/object_blurrer.py +3 -3
- ultralytics/solutions/object_counter.py +16 -8
- ultralytics/solutions/object_cropper.py +12 -5
- ultralytics/solutions/parking_management.py +29 -28
- ultralytics/solutions/queue_management.py +6 -6
- ultralytics/solutions/region_counter.py +10 -3
- ultralytics/solutions/security_alarm.py +3 -3
- ultralytics/solutions/similarity_search.py +85 -24
- ultralytics/solutions/solutions.py +215 -85
- ultralytics/solutions/speed_estimation.py +28 -22
- ultralytics/solutions/streamlit_inference.py +17 -12
- ultralytics/solutions/trackzone.py +4 -4
- ultralytics/trackers/basetrack.py +16 -23
- ultralytics/trackers/bot_sort.py +30 -20
- ultralytics/trackers/byte_tracker.py +70 -64
- ultralytics/trackers/track.py +4 -8
- ultralytics/trackers/utils/gmc.py +31 -58
- ultralytics/trackers/utils/kalman_filter.py +37 -37
- ultralytics/trackers/utils/matching.py +1 -1
- ultralytics/utils/__init__.py +105 -89
- ultralytics/utils/autobatch.py +16 -3
- ultralytics/utils/autodevice.py +54 -24
- ultralytics/utils/benchmarks.py +42 -28
- ultralytics/utils/callbacks/base.py +3 -3
- ultralytics/utils/callbacks/clearml.py +9 -9
- ultralytics/utils/callbacks/comet.py +67 -25
- ultralytics/utils/callbacks/dvc.py +7 -10
- ultralytics/utils/callbacks/mlflow.py +2 -5
- ultralytics/utils/callbacks/neptune.py +7 -13
- ultralytics/utils/callbacks/raytune.py +1 -1
- ultralytics/utils/callbacks/tensorboard.py +5 -6
- ultralytics/utils/callbacks/wb.py +14 -14
- ultralytics/utils/checks.py +14 -13
- ultralytics/utils/dist.py +5 -5
- ultralytics/utils/downloads.py +94 -67
- ultralytics/utils/errors.py +5 -5
- ultralytics/utils/export.py +61 -47
- ultralytics/utils/files.py +23 -22
- ultralytics/utils/instance.py +48 -52
- ultralytics/utils/loss.py +78 -40
- ultralytics/utils/metrics.py +186 -130
- ultralytics/utils/ops.py +186 -190
- ultralytics/utils/patches.py +15 -17
- ultralytics/utils/plotting.py +71 -27
- ultralytics/utils/tal.py +21 -15
- ultralytics/utils/torch_utils.py +53 -50
- ultralytics/utils/triton.py +5 -4
- ultralytics/utils/tuner.py +5 -5
- {ultralytics-8.3.142.dist-info → ultralytics-8.3.144.dist-info}/METADATA +1 -1
- ultralytics-8.3.144.dist-info/RECORD +272 -0
- ultralytics-8.3.142.dist-info/RECORD +0 -272
- {ultralytics-8.3.142.dist-info → ultralytics-8.3.144.dist-info}/WHEEL +0 -0
- {ultralytics-8.3.142.dist-info → ultralytics-8.3.144.dist-info}/entry_points.txt +0 -0
- {ultralytics-8.3.142.dist-info → ultralytics-8.3.144.dist-info}/licenses/LICENSE +0 -0
- {ultralytics-8.3.142.dist-info → ultralytics-8.3.144.dist-info}/top_level.txt +0 -0
ultralytics/data/split_dota.py
CHANGED
@@ -4,6 +4,7 @@ import itertools
|
|
4
4
|
from glob import glob
|
5
5
|
from math import ceil
|
6
6
|
from pathlib import Path
|
7
|
+
from typing import Any, Dict, List, Tuple
|
7
8
|
|
8
9
|
import cv2
|
9
10
|
import numpy as np
|
@@ -14,17 +15,17 @@ from ultralytics.utils import TQDM
|
|
14
15
|
from ultralytics.utils.checks import check_requirements
|
15
16
|
|
16
17
|
|
17
|
-
def bbox_iof(polygon1, bbox2, eps=1e-6):
|
18
|
+
def bbox_iof(polygon1: np.ndarray, bbox2: np.ndarray, eps: float = 1e-6) -> np.ndarray:
|
18
19
|
"""
|
19
20
|
Calculate Intersection over Foreground (IoF) between polygons and bounding boxes.
|
20
21
|
|
21
22
|
Args:
|
22
|
-
polygon1 (np.ndarray): Polygon coordinates with shape (
|
23
|
-
bbox2 (np.ndarray): Bounding boxes with shape (
|
23
|
+
polygon1 (np.ndarray): Polygon coordinates with shape (N, 8).
|
24
|
+
bbox2 (np.ndarray): Bounding boxes with shape (N, 4).
|
24
25
|
eps (float, optional): Small value to prevent division by zero.
|
25
26
|
|
26
27
|
Returns:
|
27
|
-
(np.ndarray): IoF scores with shape (
|
28
|
+
(np.ndarray): IoF scores with shape (N, 1) or (N, M) if bbox2 is (M, 4).
|
28
29
|
|
29
30
|
Notes:
|
30
31
|
Polygon format: [x1, y1, x2, y2, x3, y3, x4, y4].
|
@@ -61,16 +62,16 @@ def bbox_iof(polygon1, bbox2, eps=1e-6):
|
|
61
62
|
return outputs
|
62
63
|
|
63
64
|
|
64
|
-
def load_yolo_dota(data_root, split="train"):
|
65
|
+
def load_yolo_dota(data_root: str, split: str = "train") -> List[Dict[str, Any]]:
|
65
66
|
"""
|
66
|
-
Load DOTA dataset.
|
67
|
+
Load DOTA dataset annotations and image information.
|
67
68
|
|
68
69
|
Args:
|
69
70
|
data_root (str): Data root directory.
|
70
|
-
split (str): The split data set, could be
|
71
|
+
split (str, optional): The split data set, could be 'train' or 'val'.
|
71
72
|
|
72
73
|
Returns:
|
73
|
-
(List[Dict]): List of annotation dictionaries containing image information.
|
74
|
+
(List[Dict[str, Any]]): List of annotation dictionaries containing image information.
|
74
75
|
|
75
76
|
Notes:
|
76
77
|
The directory structure assumed for the DOTA dataset:
|
@@ -97,19 +98,25 @@ def load_yolo_dota(data_root, split="train"):
|
|
97
98
|
return annos
|
98
99
|
|
99
100
|
|
100
|
-
def get_windows(
|
101
|
+
def get_windows(
|
102
|
+
im_size: Tuple[int, int],
|
103
|
+
crop_sizes: Tuple[int, ...] = (1024,),
|
104
|
+
gaps: Tuple[int, ...] = (200,),
|
105
|
+
im_rate_thr: float = 0.6,
|
106
|
+
eps: float = 0.01,
|
107
|
+
) -> np.ndarray:
|
101
108
|
"""
|
102
|
-
Get the coordinates of windows.
|
109
|
+
Get the coordinates of sliding windows for image cropping.
|
103
110
|
|
104
111
|
Args:
|
105
|
-
im_size (
|
106
|
-
crop_sizes (
|
107
|
-
gaps (
|
108
|
-
im_rate_thr (float): Threshold of windows areas divided by image areas.
|
109
|
-
eps (float): Epsilon value for math operations.
|
112
|
+
im_size (Tuple[int, int]): Original image size, (H, W).
|
113
|
+
crop_sizes (Tuple[int, ...], optional): Crop size of windows.
|
114
|
+
gaps (Tuple[int, ...], optional): Gap between crops.
|
115
|
+
im_rate_thr (float, optional): Threshold of windows areas divided by image areas.
|
116
|
+
eps (float, optional): Epsilon value for math operations.
|
110
117
|
|
111
118
|
Returns:
|
112
|
-
(np.ndarray): Array of window coordinates with shape (
|
119
|
+
(np.ndarray): Array of window coordinates with shape (N, 4) where each row is [x_start, y_start, x_stop, y_stop].
|
113
120
|
"""
|
114
121
|
h, w = im_size
|
115
122
|
windows = []
|
@@ -144,8 +151,8 @@ def get_windows(im_size, crop_sizes=(1024,), gaps=(200,), im_rate_thr=0.6, eps=0
|
|
144
151
|
return windows[im_rates > im_rate_thr]
|
145
152
|
|
146
153
|
|
147
|
-
def get_window_obj(anno, windows, iof_thr=0.7):
|
148
|
-
"""Get objects for each window."""
|
154
|
+
def get_window_obj(anno: Dict[str, Any], windows: np.ndarray, iof_thr: float = 0.7) -> List[np.ndarray]:
|
155
|
+
"""Get objects for each window based on IoF threshold."""
|
149
156
|
h, w = anno["ori_size"]
|
150
157
|
label = anno["label"]
|
151
158
|
if len(label):
|
@@ -158,17 +165,24 @@ def get_window_obj(anno, windows, iof_thr=0.7):
|
|
158
165
|
return [np.zeros((0, 9), dtype=np.float32) for _ in range(len(windows))] # window_anns
|
159
166
|
|
160
167
|
|
161
|
-
def crop_and_save(
|
168
|
+
def crop_and_save(
|
169
|
+
anno: Dict[str, Any],
|
170
|
+
windows: np.ndarray,
|
171
|
+
window_objs: List[np.ndarray],
|
172
|
+
im_dir: str,
|
173
|
+
lb_dir: str,
|
174
|
+
allow_background_images: bool = True,
|
175
|
+
) -> None:
|
162
176
|
"""
|
163
|
-
Crop images and save new labels.
|
177
|
+
Crop images and save new labels for each window.
|
164
178
|
|
165
179
|
Args:
|
166
|
-
anno (
|
167
|
-
windows (np.ndarray): Array of windows coordinates with shape (
|
168
|
-
window_objs (
|
180
|
+
anno (Dict[str, Any]): Annotation dict, including 'filepath', 'label', 'ori_size' as its keys.
|
181
|
+
windows (np.ndarray): Array of windows coordinates with shape (N, 4).
|
182
|
+
window_objs (List[np.ndarray]): A list of labels inside each window.
|
169
183
|
im_dir (str): The output directory path of images.
|
170
184
|
lb_dir (str): The output directory path of labels.
|
171
|
-
allow_background_images (bool): Whether to include background images without labels.
|
185
|
+
allow_background_images (bool, optional): Whether to include background images without labels.
|
172
186
|
|
173
187
|
Notes:
|
174
188
|
The directory structure assumed for the DOTA dataset:
|
@@ -203,16 +217,22 @@ def crop_and_save(anno, windows, window_objs, im_dir, lb_dir, allow_background_i
|
|
203
217
|
f.write(f"{int(lb[0])} {' '.join(formatted_coords)}\n")
|
204
218
|
|
205
219
|
|
206
|
-
def split_images_and_labels(
|
220
|
+
def split_images_and_labels(
|
221
|
+
data_root: str,
|
222
|
+
save_dir: str,
|
223
|
+
split: str = "train",
|
224
|
+
crop_sizes: Tuple[int, ...] = (1024,),
|
225
|
+
gaps: Tuple[int, ...] = (200,),
|
226
|
+
) -> None:
|
207
227
|
"""
|
208
|
-
Split both images and labels.
|
228
|
+
Split both images and labels for a given dataset split.
|
209
229
|
|
210
230
|
Args:
|
211
231
|
data_root (str): Root directory of the dataset.
|
212
232
|
save_dir (str): Directory to save the split dataset.
|
213
|
-
split (str): The split data set, could be
|
214
|
-
crop_sizes (
|
215
|
-
gaps (
|
233
|
+
split (str, optional): The split data set, could be 'train' or 'val'.
|
234
|
+
crop_sizes (Tuple[int, ...], optional): Tuple of crop sizes.
|
235
|
+
gaps (Tuple[int, ...], optional): Tuple of gaps between crops.
|
216
236
|
|
217
237
|
Notes:
|
218
238
|
The directory structure assumed for the DOTA dataset:
|
@@ -240,16 +260,18 @@ def split_images_and_labels(data_root, save_dir, split="train", crop_sizes=(1024
|
|
240
260
|
crop_and_save(anno, windows, window_objs, str(im_dir), str(lb_dir))
|
241
261
|
|
242
262
|
|
243
|
-
def split_trainval(
|
263
|
+
def split_trainval(
|
264
|
+
data_root: str, save_dir: str, crop_size: int = 1024, gap: int = 200, rates: Tuple[float, ...] = (1.0,)
|
265
|
+
) -> None:
|
244
266
|
"""
|
245
|
-
Split train and val
|
267
|
+
Split train and val sets of DOTA dataset with multiple scaling rates.
|
246
268
|
|
247
269
|
Args:
|
248
270
|
data_root (str): Root directory of the dataset.
|
249
271
|
save_dir (str): Directory to save the split dataset.
|
250
|
-
crop_size (int): Base crop size.
|
251
|
-
gap (int): Base gap between crops.
|
252
|
-
rates (
|
272
|
+
crop_size (int, optional): Base crop size.
|
273
|
+
gap (int, optional): Base gap between crops.
|
274
|
+
rates (Tuple[float, ...], optional): Scaling rates for crop_size and gap.
|
253
275
|
|
254
276
|
Notes:
|
255
277
|
The directory structure assumed for the DOTA dataset:
|
@@ -277,16 +299,18 @@ def split_trainval(data_root, save_dir, crop_size=1024, gap=200, rates=(1.0,)):
|
|
277
299
|
split_images_and_labels(data_root, save_dir, split, crop_sizes, gaps)
|
278
300
|
|
279
301
|
|
280
|
-
def split_test(
|
302
|
+
def split_test(
|
303
|
+
data_root: str, save_dir: str, crop_size: int = 1024, gap: int = 200, rates: Tuple[float, ...] = (1.0,)
|
304
|
+
) -> None:
|
281
305
|
"""
|
282
|
-
Split test set of DOTA, labels are not included within this set.
|
306
|
+
Split test set of DOTA dataset, labels are not included within this set.
|
283
307
|
|
284
308
|
Args:
|
285
309
|
data_root (str): Root directory of the dataset.
|
286
310
|
save_dir (str): Directory to save the split dataset.
|
287
|
-
crop_size (int): Base crop size.
|
288
|
-
gap (int): Base gap between crops.
|
289
|
-
rates (
|
311
|
+
crop_size (int, optional): Base crop size.
|
312
|
+
gap (int, optional): Base gap between crops.
|
313
|
+
rates (Tuple[float, ...], optional): Scaling rates for crop_size and gap.
|
290
314
|
|
291
315
|
Notes:
|
292
316
|
The directory structure assumed for the DOTA dataset:
|
ultralytics/data/utils.py
CHANGED
@@ -9,6 +9,7 @@ import zipfile
|
|
9
9
|
from multiprocessing.pool import ThreadPool
|
10
10
|
from pathlib import Path
|
11
11
|
from tarfile import is_tarfile
|
12
|
+
from typing import Dict, List, Tuple, Union
|
12
13
|
|
13
14
|
import cv2
|
14
15
|
import numpy as np
|
@@ -40,13 +41,15 @@ PIN_MEMORY = str(os.getenv("PIN_MEMORY", not MACOS)).lower() == "true" # global
|
|
40
41
|
FORMATS_HELP_MSG = f"Supported formats are:\nimages: {IMG_FORMATS}\nvideos: {VID_FORMATS}"
|
41
42
|
|
42
43
|
|
43
|
-
def img2label_paths(img_paths):
|
44
|
-
"""
|
44
|
+
def img2label_paths(img_paths: List[str]) -> List[str]:
|
45
|
+
"""Convert image paths to label paths by replacing 'images' with 'labels' and extension with '.txt'."""
|
45
46
|
sa, sb = f"{os.sep}images{os.sep}", f"{os.sep}labels{os.sep}" # /images/, /labels/ substrings
|
46
47
|
return [sb.join(x.rsplit(sa, 1)).rsplit(".", 1)[0] + ".txt" for x in img_paths]
|
47
48
|
|
48
49
|
|
49
|
-
def check_file_speeds(
|
50
|
+
def check_file_speeds(
|
51
|
+
files: List[str], threshold_ms: float = 10, threshold_mb: float = 50, max_files: int = 5, prefix: str = ""
|
52
|
+
):
|
50
53
|
"""
|
51
54
|
Check dataset file access speed and provide performance feedback.
|
52
55
|
|
@@ -54,7 +57,7 @@ def check_file_speeds(files, threshold_ms=10, threshold_mb=50, max_files=5, pref
|
|
54
57
|
It samples up to 5 files from the provided list and warns if access times exceed the threshold.
|
55
58
|
|
56
59
|
Args:
|
57
|
-
files (
|
60
|
+
files (List[str]): List of file paths to check for access speed.
|
58
61
|
threshold_ms (float, optional): Threshold in milliseconds for ping time warnings.
|
59
62
|
threshold_mb (float, optional): Threshold in megabytes per second for read speed warnings.
|
60
63
|
max_files (int, optional): The maximum number of files to check.
|
@@ -122,8 +125,8 @@ def check_file_speeds(files, threshold_ms=10, threshold_mb=50, max_files=5, pref
|
|
122
125
|
)
|
123
126
|
|
124
127
|
|
125
|
-
def get_hash(paths):
|
126
|
-
"""
|
128
|
+
def get_hash(paths: List[str]) -> str:
|
129
|
+
"""Return a single hash value of a list of paths (files or dirs)."""
|
127
130
|
size = 0
|
128
131
|
for p in paths:
|
129
132
|
try:
|
@@ -135,8 +138,8 @@ def get_hash(paths):
|
|
135
138
|
return h.hexdigest() # return hash
|
136
139
|
|
137
140
|
|
138
|
-
def exif_size(img: Image.Image):
|
139
|
-
"""
|
141
|
+
def exif_size(img: Image.Image) -> Tuple[int, int]:
|
142
|
+
"""Return exif-corrected PIL size."""
|
140
143
|
s = img.size # (width, height)
|
141
144
|
if img.format == "JPEG": # only support JPEG images
|
142
145
|
try:
|
@@ -149,7 +152,7 @@ def exif_size(img: Image.Image):
|
|
149
152
|
return s
|
150
153
|
|
151
154
|
|
152
|
-
def verify_image(args):
|
155
|
+
def verify_image(args: Tuple) -> Tuple:
|
153
156
|
"""Verify one image."""
|
154
157
|
(im_file, cls), prefix = args
|
155
158
|
# Number (found, corrupt), message
|
@@ -174,7 +177,7 @@ def verify_image(args):
|
|
174
177
|
return (im_file, cls), nf, nc, msg
|
175
178
|
|
176
179
|
|
177
|
-
def verify_image_label(args):
|
180
|
+
def verify_image_label(args: Tuple) -> List:
|
178
181
|
"""Verify one image-label pair."""
|
179
182
|
im_file, lb_file, prefix, keypoint, num_cls, nkpt, ndim, single_cls = args
|
180
183
|
# Number (missing, found, empty, corrupt), message, segments, keypoints
|
@@ -247,9 +250,9 @@ def verify_image_label(args):
|
|
247
250
|
return [None, None, None, None, None, nm, nf, ne, nc, msg]
|
248
251
|
|
249
252
|
|
250
|
-
def visualize_image_annotations(image_path, txt_path, label_map):
|
253
|
+
def visualize_image_annotations(image_path: str, txt_path: str, label_map: Dict[int, str]):
|
251
254
|
"""
|
252
|
-
|
255
|
+
Visualize YOLO annotations (bounding boxes and class labels) on an image.
|
253
256
|
|
254
257
|
This function reads an image and its corresponding annotation file in YOLO format, then
|
255
258
|
draws bounding boxes around detected objects and labels them with their respective class names.
|
@@ -259,7 +262,7 @@ def visualize_image_annotations(image_path, txt_path, label_map):
|
|
259
262
|
Args:
|
260
263
|
image_path (str): The path to the image file to annotate, and it can be in formats supported by PIL.
|
261
264
|
txt_path (str): The path to the annotation file in YOLO format, that should contain one line per object.
|
262
|
-
label_map (
|
265
|
+
label_map (Dict[int, str]): A dictionary that maps class IDs (integers) to class labels (strings).
|
263
266
|
|
264
267
|
Examples:
|
265
268
|
>>> label_map = {0: "cat", 1: "dog", 2: "bird"} # It should include all annotated classes details
|
@@ -291,13 +294,15 @@ def visualize_image_annotations(image_path, txt_path, label_map):
|
|
291
294
|
plt.show()
|
292
295
|
|
293
296
|
|
294
|
-
def polygon2mask(
|
297
|
+
def polygon2mask(
|
298
|
+
imgsz: Tuple[int, int], polygons: List[np.ndarray], color: int = 1, downsample_ratio: int = 1
|
299
|
+
) -> np.ndarray:
|
295
300
|
"""
|
296
301
|
Convert a list of polygons to a binary mask of the specified image size.
|
297
302
|
|
298
303
|
Args:
|
299
|
-
imgsz (
|
300
|
-
polygons (
|
304
|
+
imgsz (Tuple[int, int]): The size of the image as (height, width).
|
305
|
+
polygons (List[np.ndarray]): A list of polygons. Each polygon is an array with shape (N, M), where
|
301
306
|
N is the number of polygons, and M is the number of points such that M % 2 = 0.
|
302
307
|
color (int, optional): The color value to fill in the polygons on the mask.
|
303
308
|
downsample_ratio (int, optional): Factor by which to downsample the mask.
|
@@ -314,13 +319,15 @@ def polygon2mask(imgsz, polygons, color=1, downsample_ratio=1):
|
|
314
319
|
return cv2.resize(mask, (nw, nh))
|
315
320
|
|
316
321
|
|
317
|
-
def polygons2masks(
|
322
|
+
def polygons2masks(
|
323
|
+
imgsz: Tuple[int, int], polygons: List[np.ndarray], color: int, downsample_ratio: int = 1
|
324
|
+
) -> np.ndarray:
|
318
325
|
"""
|
319
326
|
Convert a list of polygons to a set of binary masks of the specified image size.
|
320
327
|
|
321
328
|
Args:
|
322
|
-
imgsz (
|
323
|
-
polygons (
|
329
|
+
imgsz (Tuple[int, int]): The size of the image as (height, width).
|
330
|
+
polygons (List[np.ndarray]): A list of polygons. Each polygon is an array with shape (N, M), where
|
324
331
|
N is the number of polygons, and M is the number of points such that M % 2 = 0.
|
325
332
|
color (int): The color value to fill in the polygons on the masks.
|
326
333
|
downsample_ratio (int, optional): Factor by which to downsample each mask.
|
@@ -331,7 +338,9 @@ def polygons2masks(imgsz, polygons, color, downsample_ratio=1):
|
|
331
338
|
return np.array([polygon2mask(imgsz, [x.reshape(-1)], color, downsample_ratio) for x in polygons])
|
332
339
|
|
333
340
|
|
334
|
-
def polygons2masks_overlap(
|
341
|
+
def polygons2masks_overlap(
|
342
|
+
imgsz: Tuple[int, int], segments: List[np.ndarray], downsample_ratio: int = 1
|
343
|
+
) -> Tuple[np.ndarray, np.ndarray]:
|
335
344
|
"""Return a (640, 640) overlap mask."""
|
336
345
|
masks = np.zeros(
|
337
346
|
(imgsz[0] // downsample_ratio, imgsz[1] // downsample_ratio),
|
@@ -374,7 +383,7 @@ def find_dataset_yaml(path: Path) -> Path:
|
|
374
383
|
return files[0]
|
375
384
|
|
376
385
|
|
377
|
-
def check_det_dataset(dataset, autodownload=True):
|
386
|
+
def check_det_dataset(dataset: str, autodownload: bool = True) -> Dict:
|
378
387
|
"""
|
379
388
|
Download, verify, and/or unzip a dataset if not found locally.
|
380
389
|
|
@@ -387,7 +396,7 @@ def check_det_dataset(dataset, autodownload=True):
|
|
387
396
|
autodownload (bool, optional): Whether to automatically download the dataset if not found.
|
388
397
|
|
389
398
|
Returns:
|
390
|
-
(
|
399
|
+
(Dict): Parsed dataset information and paths.
|
391
400
|
"""
|
392
401
|
file = check_file(dataset)
|
393
402
|
|
@@ -469,9 +478,9 @@ def check_det_dataset(dataset, autodownload=True):
|
|
469
478
|
return data # dictionary
|
470
479
|
|
471
480
|
|
472
|
-
def check_cls_dataset(dataset, split=""):
|
481
|
+
def check_cls_dataset(dataset: Union[str, Path], split: str = "") -> Dict:
|
473
482
|
"""
|
474
|
-
|
483
|
+
Check a classification dataset such as Imagenet.
|
475
484
|
|
476
485
|
This function accepts a `dataset` name and attempts to retrieve the corresponding dataset information.
|
477
486
|
If the dataset is not found locally, it attempts to download the dataset from the internet and save it locally.
|
@@ -481,13 +490,13 @@ def check_cls_dataset(dataset, split=""):
|
|
481
490
|
split (str, optional): The split of the dataset. Either 'val', 'test', or ''.
|
482
491
|
|
483
492
|
Returns:
|
484
|
-
(
|
493
|
+
(Dict): A dictionary containing the following keys:
|
485
494
|
|
486
495
|
- 'train' (Path): The directory path containing the training set of the dataset.
|
487
496
|
- 'val' (Path): The directory path containing the validation set of the dataset.
|
488
497
|
- 'test' (Path): The directory path containing the test set of the dataset.
|
489
498
|
- 'nc' (int): The number of classes in the dataset.
|
490
|
-
- 'names' (
|
499
|
+
- 'names' (Dict): A dictionary of class names in the dataset.
|
491
500
|
"""
|
492
501
|
# Download (optional if dataset=https://file.zip is passed directly)
|
493
502
|
if str(dataset).startswith(("http:/", "https:/")):
|
@@ -566,9 +575,20 @@ class HUBDatasetStats:
|
|
566
575
|
A class for generating HUB dataset JSON and `-hub` dataset directory.
|
567
576
|
|
568
577
|
Args:
|
569
|
-
path (str): Path to data.yaml or data.zip (with data.yaml inside data.zip).
|
570
|
-
task (str): Dataset task. Options are 'detect', 'segment', 'pose', 'classify'.
|
571
|
-
autodownload (bool): Attempt to download dataset if not found locally.
|
578
|
+
path (str): Path to data.yaml or data.zip (with data.yaml inside data.zip).
|
579
|
+
task (str): Dataset task. Options are 'detect', 'segment', 'pose', 'classify'.
|
580
|
+
autodownload (bool): Attempt to download dataset if not found locally.
|
581
|
+
|
582
|
+
Attributes:
|
583
|
+
task (str): Dataset task type.
|
584
|
+
hub_dir (Path): Directory path for HUB dataset files.
|
585
|
+
im_dir (Path): Directory path for compressed images.
|
586
|
+
stats (Dict): Statistics dictionary containing dataset information.
|
587
|
+
data (Dict): Dataset configuration data.
|
588
|
+
|
589
|
+
Methods:
|
590
|
+
get_json: Return dataset JSON for Ultralytics HUB.
|
591
|
+
process_images: Compress images for Ultralytics HUB.
|
572
592
|
|
573
593
|
Note:
|
574
594
|
Download *.zip files from https://github.com/ultralytics/hub/tree/main/example_datasets
|
@@ -585,7 +605,7 @@ class HUBDatasetStats:
|
|
585
605
|
>>> stats.process_images()
|
586
606
|
"""
|
587
607
|
|
588
|
-
def __init__(self, path="coco8.yaml", task="detect", autodownload=False):
|
608
|
+
def __init__(self, path: str = "coco8.yaml", task: str = "detect", autodownload: bool = False):
|
589
609
|
"""Initialize class."""
|
590
610
|
path = Path(path).resolve()
|
591
611
|
LOGGER.info(f"Starting HUB dataset checks for {path}....")
|
@@ -613,7 +633,7 @@ class HUBDatasetStats:
|
|
613
633
|
self.data = data
|
614
634
|
|
615
635
|
@staticmethod
|
616
|
-
def _unzip(path):
|
636
|
+
def _unzip(path: Path) -> Tuple[bool, str, Path]:
|
617
637
|
"""Unzip data.zip."""
|
618
638
|
if not str(path).endswith(".zip"): # path is data.yaml
|
619
639
|
return False, None, path
|
@@ -623,11 +643,11 @@ class HUBDatasetStats:
|
|
623
643
|
)
|
624
644
|
return True, str(unzip_dir), find_dataset_yaml(unzip_dir) # zipped, data_dir, yaml_path
|
625
645
|
|
626
|
-
def _hub_ops(self, f):
|
627
|
-
"""
|
646
|
+
def _hub_ops(self, f: str):
|
647
|
+
"""Save a compressed image for HUB previews."""
|
628
648
|
compress_one_image(f, self.im_dir / Path(f).name) # save to dataset-hub
|
629
649
|
|
630
|
-
def get_json(self, save=False, verbose=False):
|
650
|
+
def get_json(self, save: bool = False, verbose: bool = False) -> Dict:
|
631
651
|
"""Return dataset JSON for Ultralytics HUB."""
|
632
652
|
|
633
653
|
def _round(labels):
|
@@ -701,7 +721,7 @@ class HUBDatasetStats:
|
|
701
721
|
LOGGER.info(json.dumps(self.stats, indent=2, sort_keys=False))
|
702
722
|
return self.stats
|
703
723
|
|
704
|
-
def process_images(self):
|
724
|
+
def process_images(self) -> Path:
|
705
725
|
"""Compress images for Ultralytics HUB."""
|
706
726
|
from ultralytics.data import YOLODataset # ClassificationDataset
|
707
727
|
|
@@ -717,9 +737,9 @@ class HUBDatasetStats:
|
|
717
737
|
return self.im_dir
|
718
738
|
|
719
739
|
|
720
|
-
def compress_one_image(f, f_new=None, max_dim=1920, quality=50):
|
740
|
+
def compress_one_image(f: str, f_new: str = None, max_dim: int = 1920, quality: int = 50):
|
721
741
|
"""
|
722
|
-
|
742
|
+
Compress a single image file to reduced size while preserving its aspect ratio and quality using either the Python
|
723
743
|
Imaging Library (PIL) or OpenCV library. If the input image is smaller than the maximum dimension, it will not be
|
724
744
|
resized.
|
725
745
|
|
@@ -754,7 +774,7 @@ def compress_one_image(f, f_new=None, max_dim=1920, quality=50):
|
|
754
774
|
cv2.imwrite(str(f_new or f), im)
|
755
775
|
|
756
776
|
|
757
|
-
def load_dataset_cache_file(path):
|
777
|
+
def load_dataset_cache_file(path: Path) -> Dict:
|
758
778
|
"""Load an Ultralytics *.cache dictionary from path."""
|
759
779
|
import gc
|
760
780
|
|
@@ -764,7 +784,7 @@ def load_dataset_cache_file(path):
|
|
764
784
|
return cache
|
765
785
|
|
766
786
|
|
767
|
-
def save_dataset_cache_file(prefix, path, x, version):
|
787
|
+
def save_dataset_cache_file(prefix: str, path: Path, x: Dict, version: str):
|
768
788
|
"""Save an Ultralytics dataset *.cache dictionary x to path."""
|
769
789
|
x["version"] = version # add cache version
|
770
790
|
if is_dir_writeable(path.parent):
|