ultralytics 8.3.88__py3-none-any.whl → 8.3.90__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tests/conftest.py +2 -2
- tests/test_cli.py +13 -11
- tests/test_cuda.py +10 -1
- tests/test_integrations.py +1 -5
- tests/test_python.py +16 -16
- tests/test_solutions.py +9 -9
- ultralytics/__init__.py +1 -1
- ultralytics/cfg/__init__.py +3 -1
- ultralytics/cfg/models/11/yolo11-cls.yaml +5 -5
- ultralytics/cfg/models/11/yolo11-obb.yaml +5 -5
- ultralytics/cfg/models/11/yolo11-pose.yaml +5 -5
- ultralytics/cfg/models/11/yolo11-seg.yaml +5 -5
- ultralytics/cfg/models/11/yolo11.yaml +5 -5
- ultralytics/cfg/models/v8/yolov8-ghost-p2.yaml +5 -5
- ultralytics/cfg/models/v8/yolov8-ghost-p6.yaml +5 -5
- ultralytics/cfg/models/v8/yolov8-ghost.yaml +5 -5
- ultralytics/cfg/models/v8/yolov8-obb.yaml +5 -5
- ultralytics/cfg/models/v8/yolov8-p6.yaml +5 -5
- ultralytics/cfg/models/v8/yolov8-rtdetr.yaml +5 -5
- ultralytics/cfg/models/v8/yolov8-world.yaml +5 -5
- ultralytics/cfg/models/v8/yolov8-worldv2.yaml +5 -5
- ultralytics/cfg/models/v8/yolov8.yaml +5 -5
- ultralytics/cfg/models/v9/yolov9c-seg.yaml +1 -1
- ultralytics/cfg/models/v9/yolov9c.yaml +1 -1
- ultralytics/cfg/models/v9/yolov9e-seg.yaml +1 -1
- ultralytics/cfg/models/v9/yolov9e.yaml +1 -1
- ultralytics/cfg/models/v9/yolov9m.yaml +1 -1
- ultralytics/cfg/models/v9/yolov9s.yaml +1 -1
- ultralytics/cfg/models/v9/yolov9t.yaml +1 -1
- ultralytics/data/annotator.py +9 -14
- ultralytics/data/base.py +125 -39
- ultralytics/data/build.py +63 -24
- ultralytics/data/converter.py +34 -33
- ultralytics/data/dataset.py +207 -53
- ultralytics/data/loaders.py +1 -0
- ultralytics/data/split_dota.py +39 -12
- ultralytics/data/utils.py +33 -47
- ultralytics/engine/exporter.py +19 -17
- ultralytics/engine/model.py +69 -90
- ultralytics/engine/predictor.py +106 -21
- ultralytics/engine/trainer.py +32 -23
- ultralytics/engine/tuner.py +31 -38
- ultralytics/engine/validator.py +75 -41
- ultralytics/hub/__init__.py +21 -26
- ultralytics/hub/auth.py +9 -12
- ultralytics/hub/session.py +76 -21
- ultralytics/hub/utils.py +19 -17
- ultralytics/models/fastsam/model.py +23 -17
- ultralytics/models/fastsam/predict.py +36 -16
- ultralytics/models/fastsam/utils.py +5 -5
- ultralytics/models/fastsam/val.py +6 -6
- ultralytics/models/nas/model.py +29 -24
- ultralytics/models/nas/predict.py +14 -11
- ultralytics/models/nas/val.py +11 -13
- ultralytics/models/rtdetr/model.py +20 -11
- ultralytics/models/rtdetr/predict.py +21 -21
- ultralytics/models/rtdetr/train.py +25 -24
- ultralytics/models/rtdetr/val.py +47 -14
- ultralytics/models/sam/__init__.py +1 -1
- ultralytics/models/sam/amg.py +50 -4
- ultralytics/models/sam/model.py +8 -14
- ultralytics/models/sam/modules/decoders.py +18 -21
- ultralytics/models/sam/modules/encoders.py +25 -46
- ultralytics/models/sam/modules/memory_attention.py +19 -15
- ultralytics/models/sam/modules/sam.py +18 -25
- ultralytics/models/sam/modules/tiny_encoder.py +19 -29
- ultralytics/models/sam/modules/transformer.py +35 -57
- ultralytics/models/sam/modules/utils.py +15 -15
- ultralytics/models/sam/predict.py +0 -3
- ultralytics/models/utils/loss.py +87 -36
- ultralytics/models/utils/ops.py +26 -31
- ultralytics/models/yolo/classify/predict.py +30 -12
- ultralytics/models/yolo/classify/train.py +83 -19
- ultralytics/models/yolo/classify/val.py +45 -23
- ultralytics/models/yolo/detect/predict.py +29 -19
- ultralytics/models/yolo/detect/train.py +90 -23
- ultralytics/models/yolo/detect/val.py +150 -29
- ultralytics/models/yolo/model.py +1 -2
- ultralytics/models/yolo/obb/predict.py +18 -13
- ultralytics/models/yolo/obb/train.py +12 -8
- ultralytics/models/yolo/obb/val.py +35 -22
- ultralytics/models/yolo/pose/predict.py +28 -15
- ultralytics/models/yolo/pose/train.py +21 -8
- ultralytics/models/yolo/pose/val.py +51 -31
- ultralytics/models/yolo/segment/predict.py +27 -16
- ultralytics/models/yolo/segment/train.py +11 -8
- ultralytics/models/yolo/segment/val.py +110 -29
- ultralytics/models/yolo/world/train.py +43 -16
- ultralytics/models/yolo/world/train_world.py +61 -36
- ultralytics/nn/autobackend.py +28 -14
- ultralytics/nn/modules/__init__.py +12 -12
- ultralytics/nn/modules/activation.py +12 -3
- ultralytics/nn/modules/block.py +587 -84
- ultralytics/nn/modules/conv.py +418 -54
- ultralytics/nn/modules/head.py +3 -4
- ultralytics/nn/modules/transformer.py +320 -34
- ultralytics/nn/modules/utils.py +17 -3
- ultralytics/nn/tasks.py +226 -79
- ultralytics/solutions/ai_gym.py +2 -2
- ultralytics/solutions/analytics.py +4 -4
- ultralytics/solutions/heatmap.py +4 -4
- ultralytics/solutions/instance_segmentation.py +10 -4
- ultralytics/solutions/object_blurrer.py +2 -2
- ultralytics/solutions/object_counter.py +2 -2
- ultralytics/solutions/object_cropper.py +2 -2
- ultralytics/solutions/parking_management.py +9 -9
- ultralytics/solutions/queue_management.py +1 -1
- ultralytics/solutions/region_counter.py +2 -2
- ultralytics/solutions/security_alarm.py +7 -7
- ultralytics/solutions/solutions.py +7 -4
- ultralytics/solutions/speed_estimation.py +2 -2
- ultralytics/solutions/streamlit_inference.py +6 -6
- ultralytics/solutions/trackzone.py +9 -2
- ultralytics/solutions/vision_eye.py +4 -4
- ultralytics/trackers/basetrack.py +1 -1
- ultralytics/trackers/bot_sort.py +23 -22
- ultralytics/trackers/byte_tracker.py +4 -4
- ultralytics/trackers/track.py +2 -1
- ultralytics/trackers/utils/gmc.py +26 -27
- ultralytics/trackers/utils/kalman_filter.py +31 -29
- ultralytics/trackers/utils/matching.py +7 -7
- ultralytics/utils/__init__.py +37 -35
- ultralytics/utils/autobatch.py +5 -5
- ultralytics/utils/benchmarks.py +111 -18
- ultralytics/utils/callbacks/base.py +3 -3
- ultralytics/utils/callbacks/clearml.py +11 -11
- ultralytics/utils/callbacks/comet.py +35 -22
- ultralytics/utils/callbacks/dvc.py +11 -10
- ultralytics/utils/callbacks/hub.py +8 -8
- ultralytics/utils/callbacks/mlflow.py +1 -1
- ultralytics/utils/callbacks/neptune.py +12 -10
- ultralytics/utils/callbacks/raytune.py +1 -1
- ultralytics/utils/callbacks/tensorboard.py +6 -6
- ultralytics/utils/callbacks/wb.py +16 -16
- ultralytics/utils/checks.py +139 -68
- ultralytics/utils/dist.py +15 -2
- ultralytics/utils/downloads.py +37 -56
- ultralytics/utils/files.py +12 -13
- ultralytics/utils/instance.py +117 -52
- ultralytics/utils/loss.py +28 -33
- ultralytics/utils/metrics.py +246 -181
- ultralytics/utils/ops.py +65 -61
- ultralytics/utils/patches.py +8 -6
- ultralytics/utils/plotting.py +72 -59
- ultralytics/utils/tal.py +88 -57
- ultralytics/utils/torch_utils.py +202 -64
- ultralytics/utils/triton.py +13 -3
- ultralytics/utils/tuner.py +13 -25
- {ultralytics-8.3.88.dist-info → ultralytics-8.3.90.dist-info}/METADATA +2 -2
- ultralytics-8.3.90.dist-info/RECORD +250 -0
- ultralytics-8.3.88.dist-info/RECORD +0 -250
- {ultralytics-8.3.88.dist-info → ultralytics-8.3.90.dist-info}/LICENSE +0 -0
- {ultralytics-8.3.88.dist-info → ultralytics-8.3.90.dist-info}/WHEEL +0 -0
- {ultralytics-8.3.88.dist-info → ultralytics-8.3.90.dist-info}/entry_points.txt +0 -0
- {ultralytics-8.3.88.dist-info → ultralytics-8.3.90.dist-info}/top_level.txt +0 -0
ultralytics/data/dataset.py
CHANGED
@@ -46,16 +46,38 @@ class YOLODataset(BaseDataset):
|
|
46
46
|
"""
|
47
47
|
Dataset class for loading object detection and/or segmentation labels in YOLO format.
|
48
48
|
|
49
|
-
|
50
|
-
|
51
|
-
task (str): An explicit arg to point current task, Defaults to 'detect'.
|
49
|
+
This class supports loading data for object detection, segmentation, pose estimation, and oriented bounding box
|
50
|
+
(OBB) tasks using the YOLO format.
|
52
51
|
|
53
|
-
|
54
|
-
(
|
52
|
+
Attributes:
|
53
|
+
use_segments (bool): Indicates if segmentation masks should be used.
|
54
|
+
use_keypoints (bool): Indicates if keypoints should be used for pose estimation.
|
55
|
+
use_obb (bool): Indicates if oriented bounding boxes should be used.
|
56
|
+
data (dict): Dataset configuration dictionary.
|
57
|
+
|
58
|
+
Methods:
|
59
|
+
cache_labels: Cache dataset labels, check images and read shapes.
|
60
|
+
get_labels: Returns dictionary of labels for YOLO training.
|
61
|
+
build_transforms: Builds and appends transforms to the list.
|
62
|
+
close_mosaic: Sets mosaic, copy_paste and mixup options to 0.0 and builds transformations.
|
63
|
+
update_labels_info: Updates label format for different tasks.
|
64
|
+
collate_fn: Collates data samples into batches.
|
65
|
+
|
66
|
+
Examples:
|
67
|
+
>>> dataset = YOLODataset(img_path="path/to/images", data={"names": {0: "person"}}, task="detect")
|
68
|
+
>>> dataset.get_labels()
|
55
69
|
"""
|
56
70
|
|
57
71
|
def __init__(self, *args, data=None, task="detect", **kwargs):
|
58
|
-
"""
|
72
|
+
"""
|
73
|
+
Initialize the YOLODataset.
|
74
|
+
|
75
|
+
Args:
|
76
|
+
data (dict, optional): Dataset configuration dictionary.
|
77
|
+
task (str): Task type, one of 'detect', 'segment', 'pose', or 'obb'.
|
78
|
+
*args (Any): Additional positional arguments for the parent class.
|
79
|
+
**kwargs (Any): Additional keyword arguments for the parent class.
|
80
|
+
"""
|
59
81
|
self.use_segments = task == "segment"
|
60
82
|
self.use_keypoints = task == "pose"
|
61
83
|
self.use_obb = task == "obb"
|
@@ -68,10 +90,10 @@ class YOLODataset(BaseDataset):
|
|
68
90
|
Cache dataset labels, check images and read shapes.
|
69
91
|
|
70
92
|
Args:
|
71
|
-
path (Path): Path where to save the cache file.
|
93
|
+
path (Path): Path where to save the cache file.
|
72
94
|
|
73
95
|
Returns:
|
74
|
-
(dict): labels.
|
96
|
+
(dict): Dictionary containing cached labels and related information.
|
75
97
|
"""
|
76
98
|
x = {"labels": []}
|
77
99
|
nm, nf, ne, nc, msgs = 0, 0, 0, 0, [] # number missing, found, empty, corrupt, messages
|
@@ -131,7 +153,14 @@ class YOLODataset(BaseDataset):
|
|
131
153
|
return x
|
132
154
|
|
133
155
|
def get_labels(self):
|
134
|
-
"""
|
156
|
+
"""
|
157
|
+
Returns dictionary of labels for YOLO training.
|
158
|
+
|
159
|
+
This method loads labels from disk or cache, verifies their integrity, and prepares them for training.
|
160
|
+
|
161
|
+
Returns:
|
162
|
+
(List[dict]): List of label dictionaries, each containing information about an image and its annotations.
|
163
|
+
"""
|
135
164
|
self.label_files = img2label_paths(self.im_files)
|
136
165
|
cache_path = Path(self.label_files[0]).parent.with_suffix(".cache")
|
137
166
|
try:
|
@@ -172,7 +201,15 @@ class YOLODataset(BaseDataset):
|
|
172
201
|
return labels
|
173
202
|
|
174
203
|
def build_transforms(self, hyp=None):
|
175
|
-
"""
|
204
|
+
"""
|
205
|
+
Builds and appends transforms to the list.
|
206
|
+
|
207
|
+
Args:
|
208
|
+
hyp (dict, optional): Hyperparameters for transforms.
|
209
|
+
|
210
|
+
Returns:
|
211
|
+
(Compose): Composed transforms.
|
212
|
+
"""
|
176
213
|
if self.augment:
|
177
214
|
hyp.mosaic = hyp.mosaic if self.augment and not self.rect else 0.0
|
178
215
|
hyp.mixup = hyp.mixup if self.augment and not self.rect else 0.0
|
@@ -195,7 +232,12 @@ class YOLODataset(BaseDataset):
|
|
195
232
|
return transforms
|
196
233
|
|
197
234
|
def close_mosaic(self, hyp):
|
198
|
-
"""
|
235
|
+
"""
|
236
|
+
Sets mosaic, copy_paste and mixup options to 0.0 and builds transformations.
|
237
|
+
|
238
|
+
Args:
|
239
|
+
hyp (dict): Hyperparameters for transforms.
|
240
|
+
"""
|
199
241
|
hyp.mosaic = 0.0 # set mosaic ratio=0.0
|
200
242
|
hyp.copy_paste = 0.0 # keep the same behavior as previous v8 close-mosaic
|
201
243
|
hyp.mixup = 0.0 # keep the same behavior as previous v8 close-mosaic
|
@@ -205,6 +247,12 @@ class YOLODataset(BaseDataset):
|
|
205
247
|
"""
|
206
248
|
Custom your label format here.
|
207
249
|
|
250
|
+
Args:
|
251
|
+
label (dict): Label dictionary containing bboxes, segments, keypoints, etc.
|
252
|
+
|
253
|
+
Returns:
|
254
|
+
(dict): Updated label dictionary with instances.
|
255
|
+
|
208
256
|
Note:
|
209
257
|
cls is not with bboxes now, classification and semantic segmentation need an independent cls label
|
210
258
|
Can also support classification and semantic segmentation by adding or removing dict keys there.
|
@@ -230,7 +278,15 @@ class YOLODataset(BaseDataset):
|
|
230
278
|
|
231
279
|
@staticmethod
|
232
280
|
def collate_fn(batch):
|
233
|
-
"""
|
281
|
+
"""
|
282
|
+
Collates data samples into batches.
|
283
|
+
|
284
|
+
Args:
|
285
|
+
batch (List[dict]): List of dictionaries containing sample data.
|
286
|
+
|
287
|
+
Returns:
|
288
|
+
(dict): Collated batch with stacked tensors.
|
289
|
+
"""
|
234
290
|
new_batch = {}
|
235
291
|
keys = batch[0].keys()
|
236
292
|
values = list(zip(*[list(b.values()) for b in batch]))
|
@@ -250,29 +306,58 @@ class YOLODataset(BaseDataset):
|
|
250
306
|
|
251
307
|
class YOLOMultiModalDataset(YOLODataset):
|
252
308
|
"""
|
253
|
-
Dataset class for loading object detection and/or segmentation labels in YOLO format.
|
309
|
+
Dataset class for loading object detection and/or segmentation labels in YOLO format with multi-modal support.
|
254
310
|
|
255
|
-
|
256
|
-
|
257
|
-
task (str): An explicit arg to point current task, Defaults to 'detect'.
|
311
|
+
This class extends YOLODataset to add text information for multi-modal model training, enabling models to
|
312
|
+
process both image and text data.
|
258
313
|
|
259
|
-
|
260
|
-
|
314
|
+
Methods:
|
315
|
+
update_labels_info: Adds text information for multi-modal model training.
|
316
|
+
build_transforms: Enhances data transformations with text augmentation.
|
317
|
+
|
318
|
+
Examples:
|
319
|
+
>>> dataset = YOLOMultiModalDataset(img_path="path/to/images", data={"names": {0: "person"}}, task="detect")
|
320
|
+
>>> batch = next(iter(dataset))
|
321
|
+
>>> print(batch.keys()) # Should include 'texts'
|
261
322
|
"""
|
262
323
|
|
263
324
|
def __init__(self, *args, data=None, task="detect", **kwargs):
|
264
|
-
"""
|
325
|
+
"""
|
326
|
+
Initialize a YOLOMultiModalDataset.
|
327
|
+
|
328
|
+
Args:
|
329
|
+
data (dict, optional): Dataset configuration dictionary.
|
330
|
+
task (str): Task type, one of 'detect', 'segment', 'pose', or 'obb'.
|
331
|
+
*args (Any): Additional positional arguments for the parent class.
|
332
|
+
**kwargs (Any): Additional keyword arguments for the parent class.
|
333
|
+
"""
|
265
334
|
super().__init__(*args, data=data, task=task, **kwargs)
|
266
335
|
|
267
336
|
def update_labels_info(self, label):
|
268
|
-
"""
|
337
|
+
"""
|
338
|
+
Add texts information for multi-modal model training.
|
339
|
+
|
340
|
+
Args:
|
341
|
+
label (dict): Label dictionary containing bboxes, segments, keypoints, etc.
|
342
|
+
|
343
|
+
Returns:
|
344
|
+
(dict): Updated label dictionary with instances and texts.
|
345
|
+
"""
|
269
346
|
labels = super().update_labels_info(label)
|
270
347
|
# NOTE: some categories are concatenated with its synonyms by `/`.
|
271
348
|
labels["texts"] = [v.split("/") for _, v in self.data["names"].items()]
|
272
349
|
return labels
|
273
350
|
|
274
351
|
def build_transforms(self, hyp=None):
|
275
|
-
"""
|
352
|
+
"""
|
353
|
+
Enhances data transformations with optional text augmentation for multi-modal training.
|
354
|
+
|
355
|
+
Args:
|
356
|
+
hyp (dict, optional): Hyperparameters for transforms.
|
357
|
+
|
358
|
+
Returns:
|
359
|
+
(Compose): Composed transforms including text augmentation if applicable.
|
360
|
+
"""
|
276
361
|
transforms = super().build_transforms(hyp)
|
277
362
|
if self.augment:
|
278
363
|
# NOTE: hard-coded the args for now.
|
@@ -281,20 +366,58 @@ class YOLOMultiModalDataset(YOLODataset):
|
|
281
366
|
|
282
367
|
|
283
368
|
class GroundingDataset(YOLODataset):
|
284
|
-
"""
|
369
|
+
"""
|
370
|
+
Handles object detection tasks by loading annotations from a specified JSON file, supporting YOLO format.
|
371
|
+
|
372
|
+
This dataset is designed for grounding tasks where annotations are provided in a JSON file rather than
|
373
|
+
the standard YOLO format text files.
|
374
|
+
|
375
|
+
Attributes:
|
376
|
+
json_file (str): Path to the JSON file containing annotations.
|
377
|
+
|
378
|
+
Methods:
|
379
|
+
get_img_files: Returns empty list as image files are read in get_labels.
|
380
|
+
get_labels: Loads annotations from a JSON file and prepares them for training.
|
381
|
+
build_transforms: Configures augmentations for training with optional text loading.
|
382
|
+
|
383
|
+
Examples:
|
384
|
+
>>> dataset = GroundingDataset(img_path="path/to/images", json_file="annotations.json", task="detect")
|
385
|
+
>>> len(dataset) # Number of valid images with annotations
|
386
|
+
"""
|
285
387
|
|
286
388
|
def __init__(self, *args, task="detect", json_file, **kwargs):
|
287
|
-
"""
|
389
|
+
"""
|
390
|
+
Initialize a GroundingDataset for object detection.
|
391
|
+
|
392
|
+
Args:
|
393
|
+
json_file (str): Path to the JSON file containing annotations.
|
394
|
+
task (str): Must be 'detect' for GroundingDataset.
|
395
|
+
*args (Any): Additional positional arguments for the parent class.
|
396
|
+
**kwargs (Any): Additional keyword arguments for the parent class.
|
397
|
+
"""
|
288
398
|
assert task == "detect", "`GroundingDataset` only support `detect` task for now!"
|
289
399
|
self.json_file = json_file
|
290
400
|
super().__init__(*args, task=task, data={}, **kwargs)
|
291
401
|
|
292
402
|
def get_img_files(self, img_path):
|
293
|
-
"""
|
403
|
+
"""
|
404
|
+
The image files would be read in `get_labels` function, return empty list here.
|
405
|
+
|
406
|
+
Args:
|
407
|
+
img_path (str): Path to the directory containing images.
|
408
|
+
|
409
|
+
Returns:
|
410
|
+
(List): Empty list as image files are read in get_labels.
|
411
|
+
"""
|
294
412
|
return []
|
295
413
|
|
296
414
|
def get_labels(self):
|
297
|
-
"""
|
415
|
+
"""
|
416
|
+
Loads annotations from a JSON file, filters, and normalizes bounding boxes for each image.
|
417
|
+
|
418
|
+
Returns:
|
419
|
+
(List[dict]): List of label dictionaries, each containing information about an image and its annotations.
|
420
|
+
"""
|
298
421
|
labels = []
|
299
422
|
LOGGER.info("Loading annotation file...")
|
300
423
|
with open(self.json_file) as f:
|
@@ -347,7 +470,15 @@ class GroundingDataset(YOLODataset):
|
|
347
470
|
return labels
|
348
471
|
|
349
472
|
def build_transforms(self, hyp=None):
|
350
|
-
"""
|
473
|
+
"""
|
474
|
+
Configures augmentations for training with optional text loading.
|
475
|
+
|
476
|
+
Args:
|
477
|
+
hyp (dict, optional): Hyperparameters for transforms.
|
478
|
+
|
479
|
+
Returns:
|
480
|
+
(Compose): Composed transforms including text augmentation if applicable.
|
481
|
+
"""
|
351
482
|
transforms = super().build_transforms(hyp)
|
352
483
|
if self.augment:
|
353
484
|
# NOTE: hard-coded the args for now.
|
@@ -359,27 +490,35 @@ class YOLOConcatDataset(ConcatDataset):
|
|
359
490
|
"""
|
360
491
|
Dataset as a concatenation of multiple datasets.
|
361
492
|
|
362
|
-
This class is useful to assemble different existing datasets
|
493
|
+
This class is useful to assemble different existing datasets for YOLO training, ensuring they use the same
|
494
|
+
collation function.
|
495
|
+
|
496
|
+
Methods:
|
497
|
+
collate_fn: Static method that collates data samples into batches using YOLODataset's collation function.
|
498
|
+
|
499
|
+
Examples:
|
500
|
+
>>> dataset1 = YOLODataset(...)
|
501
|
+
>>> dataset2 = YOLODataset(...)
|
502
|
+
>>> combined_dataset = YOLOConcatDataset([dataset1, dataset2])
|
363
503
|
"""
|
364
504
|
|
365
505
|
@staticmethod
|
366
506
|
def collate_fn(batch):
|
367
|
-
"""
|
507
|
+
"""
|
508
|
+
Collates data samples into batches.
|
509
|
+
|
510
|
+
Args:
|
511
|
+
batch (List[dict]): List of dictionaries containing sample data.
|
512
|
+
|
513
|
+
Returns:
|
514
|
+
(dict): Collated batch with stacked tensors.
|
515
|
+
"""
|
368
516
|
return YOLODataset.collate_fn(batch)
|
369
517
|
|
370
518
|
|
371
519
|
# TODO: support semantic segmentation
|
372
520
|
class SemanticDataset(BaseDataset):
|
373
|
-
"""
|
374
|
-
Semantic Segmentation Dataset.
|
375
|
-
|
376
|
-
This class is responsible for handling datasets used for semantic segmentation tasks. It inherits functionalities
|
377
|
-
from the BaseDataset class.
|
378
|
-
|
379
|
-
Note:
|
380
|
-
This class is currently a placeholder and needs to be populated with methods and attributes for supporting
|
381
|
-
semantic segmentation tasks.
|
382
|
-
"""
|
521
|
+
"""Semantic Segmentation Dataset."""
|
383
522
|
|
384
523
|
def __init__(self):
|
385
524
|
"""Initialize a SemanticDataset object."""
|
@@ -388,20 +527,25 @@ class SemanticDataset(BaseDataset):
|
|
388
527
|
|
389
528
|
class ClassificationDataset:
|
390
529
|
"""
|
391
|
-
Extends torchvision ImageFolder to support YOLO classification tasks
|
392
|
-
augmentation, caching, and verification. It's designed to efficiently handle large datasets for training deep
|
393
|
-
learning models, with optional image transformations and caching mechanisms to speed up training.
|
530
|
+
Extends torchvision ImageFolder to support YOLO classification tasks.
|
394
531
|
|
395
|
-
This class
|
396
|
-
|
397
|
-
to
|
532
|
+
This class offers functionalities like image augmentation, caching, and verification. It's designed to efficiently
|
533
|
+
handle large datasets for training deep learning models, with optional image transformations and caching mechanisms
|
534
|
+
to speed up training.
|
398
535
|
|
399
536
|
Attributes:
|
400
537
|
cache_ram (bool): Indicates if caching in RAM is enabled.
|
401
538
|
cache_disk (bool): Indicates if caching on disk is enabled.
|
402
|
-
samples (
|
539
|
+
samples (List): A list of tuples, each containing the path to an image, its class index, path to its .npy cache
|
403
540
|
file (if caching on disk), and optionally the loaded image array (if caching in RAM).
|
404
541
|
torch_transforms (callable): PyTorch transforms to be applied to the images.
|
542
|
+
root (str): Root directory of the dataset.
|
543
|
+
prefix (str): Prefix for logging and cache filenames.
|
544
|
+
|
545
|
+
Methods:
|
546
|
+
__getitem__: Returns subset of data and targets corresponding to given indices.
|
547
|
+
__len__: Returns the total number of samples in the dataset.
|
548
|
+
verify_images: Verifies all images in dataset.
|
405
549
|
"""
|
406
550
|
|
407
551
|
def __init__(self, root, args, augment=False, prefix=""):
|
@@ -411,12 +555,9 @@ class ClassificationDataset:
|
|
411
555
|
Args:
|
412
556
|
root (str): Path to the dataset directory where images are stored in a class-specific folder structure.
|
413
557
|
args (Namespace): Configuration containing dataset-related settings such as image size, augmentation
|
414
|
-
parameters, and cache settings.
|
415
|
-
|
416
|
-
|
417
|
-
augment (bool, optional): Whether to apply augmentations to the dataset. Default is False.
|
418
|
-
prefix (str, optional): Prefix for logging and cache filenames, aiding in dataset identification and
|
419
|
-
debugging. Default is an empty string.
|
558
|
+
parameters, and cache settings.
|
559
|
+
augment (bool, optional): Whether to apply augmentations to the dataset.
|
560
|
+
prefix (str, optional): Prefix for logging and cache filenames, aiding in dataset identification.
|
420
561
|
"""
|
421
562
|
import torchvision # scope for faster 'import ultralytics'
|
422
563
|
|
@@ -460,7 +601,15 @@ class ClassificationDataset:
|
|
460
601
|
)
|
461
602
|
|
462
603
|
def __getitem__(self, i):
|
463
|
-
"""
|
604
|
+
"""
|
605
|
+
Returns subset of data and targets corresponding to given indices.
|
606
|
+
|
607
|
+
Args:
|
608
|
+
i (int): Index of the sample to retrieve.
|
609
|
+
|
610
|
+
Returns:
|
611
|
+
(dict): Dictionary containing the image and its class index.
|
612
|
+
"""
|
464
613
|
f, j, fn, im = self.samples[i] # filename, index, filename.with_suffix('.npy'), image
|
465
614
|
if self.cache_ram:
|
466
615
|
if im is None: # Warning: two separate if statements required here, do not combine this with previous line
|
@@ -481,7 +630,12 @@ class ClassificationDataset:
|
|
481
630
|
return len(self.samples)
|
482
631
|
|
483
632
|
def verify_images(self):
|
484
|
-
"""
|
633
|
+
"""
|
634
|
+
Verify all images in dataset.
|
635
|
+
|
636
|
+
Returns:
|
637
|
+
(List): List of valid samples after verification.
|
638
|
+
"""
|
485
639
|
desc = f"{self.prefix}Scanning {self.root}..."
|
486
640
|
path = Path(self.root).with_suffix(".cache") # *.cache file path
|
487
641
|
|
ultralytics/data/loaders.py
CHANGED
@@ -33,6 +33,7 @@ class SourceTypes:
|
|
33
33
|
stream (bool): Flag indicating if the input source is a video stream.
|
34
34
|
screenshot (bool): Flag indicating if the input source is a screenshot.
|
35
35
|
from_img (bool): Flag indicating if the input source is an image file.
|
36
|
+
tensor (bool): Flag indicating if the input source is a tensor.
|
36
37
|
|
37
38
|
Examples:
|
38
39
|
>>> source_types = SourceTypes(stream=True, screenshot=False, from_img=False)
|
ultralytics/data/split_dota.py
CHANGED
@@ -19,14 +19,14 @@ def bbox_iof(polygon1, bbox2, eps=1e-6):
|
|
19
19
|
Calculate Intersection over Foreground (IoF) between polygons and bounding boxes.
|
20
20
|
|
21
21
|
Args:
|
22
|
-
polygon1 (np.ndarray): Polygon coordinates
|
23
|
-
bbox2 (np.ndarray): Bounding boxes
|
24
|
-
eps (float, optional): Small value to prevent division by zero.
|
22
|
+
polygon1 (np.ndarray): Polygon coordinates with shape (n, 8).
|
23
|
+
bbox2 (np.ndarray): Bounding boxes with shape (n, 4).
|
24
|
+
eps (float, optional): Small value to prevent division by zero.
|
25
25
|
|
26
26
|
Returns:
|
27
|
-
(np.ndarray): IoF scores
|
27
|
+
(np.ndarray): IoF scores with shape (n, 1) or (n, m) if bbox2 is (m, 4).
|
28
28
|
|
29
|
-
|
29
|
+
Notes:
|
30
30
|
Polygon format: [x1, y1, x2, y2, x3, y3, x4, y4].
|
31
31
|
Bounding box format: [x_min, y_min, x_max, y_max].
|
32
32
|
"""
|
@@ -66,9 +66,12 @@ def load_yolo_dota(data_root, split="train"):
|
|
66
66
|
Load DOTA dataset.
|
67
67
|
|
68
68
|
Args:
|
69
|
-
data_root (str): Data root.
|
69
|
+
data_root (str): Data root directory.
|
70
70
|
split (str): The split data set, could be `train` or `val`.
|
71
71
|
|
72
|
+
Returns:
|
73
|
+
(List[Dict]): List of annotation dictionaries containing image information.
|
74
|
+
|
72
75
|
Notes:
|
73
76
|
The directory structure assumed for the DOTA dataset:
|
74
77
|
- data_root
|
@@ -100,10 +103,13 @@ def get_windows(im_size, crop_sizes=(1024,), gaps=(200,), im_rate_thr=0.6, eps=0
|
|
100
103
|
|
101
104
|
Args:
|
102
105
|
im_size (tuple): Original image size, (h, w).
|
103
|
-
crop_sizes (List
|
104
|
-
gaps (List
|
105
|
-
im_rate_thr (float): Threshold of windows areas divided by image
|
106
|
+
crop_sizes (List[int]): Crop size of windows.
|
107
|
+
gaps (List[int]): Gap between crops.
|
108
|
+
im_rate_thr (float): Threshold of windows areas divided by image areas.
|
106
109
|
eps (float): Epsilon value for math operations.
|
110
|
+
|
111
|
+
Returns:
|
112
|
+
(np.ndarray): Array of window coordinates with shape (n, 4) where each row is [x_start, y_start, x_stop, y_stop].
|
107
113
|
"""
|
108
114
|
h, w = im_size
|
109
115
|
windows = []
|
@@ -157,9 +163,9 @@ def crop_and_save(anno, windows, window_objs, im_dir, lb_dir, allow_background_i
|
|
157
163
|
Crop images and save new labels.
|
158
164
|
|
159
165
|
Args:
|
160
|
-
anno (
|
161
|
-
windows (
|
162
|
-
window_objs (
|
166
|
+
anno (Dict): Annotation dict, including `filepath`, `label`, `ori_size` as its keys.
|
167
|
+
windows (np.ndarray): Array of windows coordinates with shape (n, 4).
|
168
|
+
window_objs (List): A list of labels inside each window.
|
163
169
|
im_dir (str): The output directory path of images.
|
164
170
|
lb_dir (str): The output directory path of labels.
|
165
171
|
allow_background_images (bool): Whether to include background images without labels.
|
@@ -201,6 +207,13 @@ def split_images_and_labels(data_root, save_dir, split="train", crop_sizes=(1024
|
|
201
207
|
"""
|
202
208
|
Split both images and labels.
|
203
209
|
|
210
|
+
Args:
|
211
|
+
data_root (str): Root directory of the dataset.
|
212
|
+
save_dir (str): Directory to save the split dataset.
|
213
|
+
split (str): The split data set, could be `train` or `val`.
|
214
|
+
crop_sizes (tuple): Tuple of crop sizes.
|
215
|
+
gaps (tuple): Tuple of gaps between crops.
|
216
|
+
|
204
217
|
Notes:
|
205
218
|
The directory structure assumed for the DOTA dataset:
|
206
219
|
- data_root
|
@@ -231,6 +244,13 @@ def split_trainval(data_root, save_dir, crop_size=1024, gap=200, rates=(1.0,)):
|
|
231
244
|
"""
|
232
245
|
Split train and val set of DOTA.
|
233
246
|
|
247
|
+
Args:
|
248
|
+
data_root (str): Root directory of the dataset.
|
249
|
+
save_dir (str): Directory to save the split dataset.
|
250
|
+
crop_size (int): Base crop size.
|
251
|
+
gap (int): Base gap between crops.
|
252
|
+
rates (tuple): Scaling rates for crop_size and gap.
|
253
|
+
|
234
254
|
Notes:
|
235
255
|
The directory structure assumed for the DOTA dataset:
|
236
256
|
- data_root
|
@@ -261,6 +281,13 @@ def split_test(data_root, save_dir, crop_size=1024, gap=200, rates=(1.0,)):
|
|
261
281
|
"""
|
262
282
|
Split test set of DOTA, labels are not included within this set.
|
263
283
|
|
284
|
+
Args:
|
285
|
+
data_root (str): Root directory of the dataset.
|
286
|
+
save_dir (str): Directory to save the split dataset.
|
287
|
+
crop_size (int): Base crop size.
|
288
|
+
gap (int): Base gap between crops.
|
289
|
+
rates (tuple): Scaling rates for crop_size and gap.
|
290
|
+
|
264
291
|
Notes:
|
265
292
|
The directory structure assumed for the DOTA dataset:
|
266
293
|
- data_root
|
ultralytics/data/utils.py
CHANGED
@@ -175,13 +175,8 @@ def visualize_image_annotations(image_path, txt_path, label_map):
|
|
175
175
|
adjusted for readability, depending on the background color's luminance.
|
176
176
|
|
177
177
|
Args:
|
178
|
-
image_path (str): The path to the image file to annotate, and it can be in formats supported by PIL
|
179
|
-
txt_path (str): The path to the annotation file in YOLO format, that should contain one line per object
|
180
|
-
- class_id (int): The class index.
|
181
|
-
- x_center (float): The X center of the bounding box (relative to image width).
|
182
|
-
- y_center (float): The Y center of the bounding box (relative to image height).
|
183
|
-
- width (float): The width of the bounding box (relative to image width).
|
184
|
-
- height (float): The height of the bounding box (relative to image height).
|
178
|
+
image_path (str): The path to the image file to annotate, and it can be in formats supported by PIL.
|
179
|
+
txt_path (str): The path to the annotation file in YOLO format, that should contain one line per object.
|
185
180
|
label_map (dict): A dictionary that maps class IDs (integers) to class labels (strings).
|
186
181
|
|
187
182
|
Examples:
|
@@ -222,8 +217,8 @@ def polygon2mask(imgsz, polygons, color=1, downsample_ratio=1):
|
|
222
217
|
imgsz (tuple): The size of the image as (height, width).
|
223
218
|
polygons (list[np.ndarray]): A list of polygons. Each polygon is an array with shape [N, M], where
|
224
219
|
N is the number of polygons, and M is the number of points such that M % 2 = 0.
|
225
|
-
color (int, optional): The color value to fill in the polygons on the mask.
|
226
|
-
downsample_ratio (int, optional): Factor by which to downsample the mask.
|
220
|
+
color (int, optional): The color value to fill in the polygons on the mask.
|
221
|
+
downsample_ratio (int, optional): Factor by which to downsample the mask.
|
227
222
|
|
228
223
|
Returns:
|
229
224
|
(np.ndarray): A binary mask of the specified image size with the polygons filled in.
|
@@ -246,7 +241,7 @@ def polygons2masks(imgsz, polygons, color, downsample_ratio=1):
|
|
246
241
|
polygons (list[np.ndarray]): A list of polygons. Each polygon is an array with shape [N, M], where
|
247
242
|
N is the number of polygons, and M is the number of points such that M % 2 = 0.
|
248
243
|
color (int): The color value to fill in the polygons on the masks.
|
249
|
-
downsample_ratio (int, optional): Factor by which to downsample each mask.
|
244
|
+
downsample_ratio (int, optional): Factor by which to downsample each mask.
|
250
245
|
|
251
246
|
Returns:
|
252
247
|
(np.ndarray): A set of binary masks of the specified image size with the polygons filled in.
|
@@ -281,8 +276,7 @@ def find_dataset_yaml(path: Path) -> Path:
|
|
281
276
|
Find and return the YAML file associated with a Detect, Segment or Pose dataset.
|
282
277
|
|
283
278
|
This function searches for a YAML file at the root level of the provided directory first, and if not found, it
|
284
|
-
performs a recursive search. It prefers YAML files that have the same stem as the provided path.
|
285
|
-
is raised if no YAML file is found or if multiple YAML files are found.
|
279
|
+
performs a recursive search. It prefers YAML files that have the same stem as the provided path.
|
286
280
|
|
287
281
|
Args:
|
288
282
|
path (Path): The directory path to search for the YAML file.
|
@@ -308,7 +302,7 @@ def check_det_dataset(dataset, autodownload=True):
|
|
308
302
|
|
309
303
|
Args:
|
310
304
|
dataset (str): Path to the dataset or dataset descriptor (like a YAML file).
|
311
|
-
autodownload (bool, optional): Whether to automatically download the dataset if not found.
|
305
|
+
autodownload (bool, optional): Whether to automatically download the dataset if not found.
|
312
306
|
|
313
307
|
Returns:
|
314
308
|
(dict): Parsed dataset information and paths.
|
@@ -400,7 +394,7 @@ def check_cls_dataset(dataset, split=""):
|
|
400
394
|
|
401
395
|
Args:
|
402
396
|
dataset (str | Path): The name of the dataset.
|
403
|
-
split (str, optional): The split of the dataset. Either 'val', 'test', or ''.
|
397
|
+
split (str, optional): The split of the dataset. Either 'val', 'test', or ''.
|
404
398
|
|
405
399
|
Returns:
|
406
400
|
(dict): A dictionary containing the following keys:
|
@@ -478,21 +472,19 @@ class HUBDatasetStats:
|
|
478
472
|
task (str): Dataset task. Options are 'detect', 'segment', 'pose', 'classify'. Default is 'detect'.
|
479
473
|
autodownload (bool): Attempt to download dataset if not found locally. Default is False.
|
480
474
|
|
481
|
-
|
475
|
+
Note:
|
482
476
|
Download *.zip files from https://github.com/ultralytics/hub/tree/main/example_datasets
|
483
|
-
|
484
|
-
|
485
|
-
|
486
|
-
|
487
|
-
stats = HUBDatasetStats("path/to/coco8.zip", task="detect") # detect dataset
|
488
|
-
stats = HUBDatasetStats("path/to/coco8-seg.zip", task="segment") # segment dataset
|
489
|
-
stats = HUBDatasetStats("path/to/coco8-pose.zip", task="pose") # pose dataset
|
490
|
-
stats = HUBDatasetStats("path/to/dota8.zip", task="obb") # OBB dataset
|
491
|
-
stats = HUBDatasetStats("path/to/imagenet10.zip", task="classify") # classification dataset
|
492
|
-
|
493
|
-
stats.
|
494
|
-
stats.process_images()
|
495
|
-
```
|
477
|
+
i.e. https://github.com/ultralytics/hub/raw/main/example_datasets/coco8.zip for coco8.zip.
|
478
|
+
|
479
|
+
Examples:
|
480
|
+
>>> from ultralytics.data.utils import HUBDatasetStats
|
481
|
+
>>> stats = HUBDatasetStats("path/to/coco8.zip", task="detect") # detect dataset
|
482
|
+
>>> stats = HUBDatasetStats("path/to/coco8-seg.zip", task="segment") # segment dataset
|
483
|
+
>>> stats = HUBDatasetStats("path/to/coco8-pose.zip", task="pose") # pose dataset
|
484
|
+
>>> stats = HUBDatasetStats("path/to/dota8.zip", task="obb") # OBB dataset
|
485
|
+
>>> stats = HUBDatasetStats("path/to/imagenet10.zip", task="classify") # classification dataset
|
486
|
+
>>> stats.get_json(save=True)
|
487
|
+
>>> stats.process_images()
|
496
488
|
"""
|
497
489
|
|
498
490
|
def __init__(self, path="coco8.yaml", task="detect", autodownload=False):
|
@@ -636,17 +628,14 @@ def compress_one_image(f, f_new=None, max_dim=1920, quality=50):
|
|
636
628
|
Args:
|
637
629
|
f (str): The path to the input image file.
|
638
630
|
f_new (str, optional): The path to the output image file. If not specified, the input file will be overwritten.
|
639
|
-
max_dim (int, optional): The maximum dimension (width or height) of the output image.
|
640
|
-
quality (int, optional): The image compression quality as a percentage.
|
641
|
-
|
642
|
-
Example:
|
643
|
-
```python
|
644
|
-
from pathlib import Path
|
645
|
-
from ultralytics.data.utils import compress_one_image
|
631
|
+
max_dim (int, optional): The maximum dimension (width or height) of the output image.
|
632
|
+
quality (int, optional): The image compression quality as a percentage.
|
646
633
|
|
647
|
-
|
648
|
-
|
649
|
-
|
634
|
+
Examples:
|
635
|
+
>>> from pathlib import Path
|
636
|
+
>>> from ultralytics.data.utils import compress_one_image
|
637
|
+
>>> for f in Path("path/to/dataset").rglob("*.jpg"):
|
638
|
+
>>> compress_one_image(f)
|
650
639
|
"""
|
651
640
|
try: # use PIL
|
652
641
|
im = Image.open(f)
|
@@ -669,16 +658,13 @@ def autosplit(path=DATASETS_DIR / "coco8/images", weights=(0.9, 0.1, 0.0), annot
|
|
669
658
|
Automatically split a dataset into train/val/test splits and save the resulting splits into autosplit_*.txt files.
|
670
659
|
|
671
660
|
Args:
|
672
|
-
path (Path, optional): Path to images directory.
|
673
|
-
weights (list | tuple, optional): Train, validation, and test split fractions.
|
674
|
-
annotated_only (bool, optional): If True, only images with an associated txt file are used.
|
675
|
-
|
676
|
-
Example:
|
677
|
-
```python
|
678
|
-
from ultralytics.data.utils import autosplit
|
661
|
+
path (Path, optional): Path to images directory.
|
662
|
+
weights (list | tuple, optional): Train, validation, and test split fractions.
|
663
|
+
annotated_only (bool, optional): If True, only images with an associated txt file are used.
|
679
664
|
|
680
|
-
|
681
|
-
|
665
|
+
Examples:
|
666
|
+
>>> from ultralytics.data.utils import autosplit
|
667
|
+
>>> autosplit()
|
682
668
|
"""
|
683
669
|
path = Path(path) # images dir
|
684
670
|
files = sorted(x for x in path.rglob("*.*") if x.suffix[1:].lower() in IMG_FORMATS) # image files only
|