ultralytics 8.3.189__py3-none-any.whl → 8.3.191__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tests/test_cuda.py +6 -5
- tests/test_exports.py +1 -6
- tests/test_python.py +1 -4
- tests/test_solutions.py +1 -1
- ultralytics/__init__.py +1 -1
- ultralytics/cfg/__init__.py +16 -14
- ultralytics/cfg/datasets/VisDrone.yaml +4 -4
- ultralytics/data/annotator.py +6 -6
- ultralytics/data/augment.py +53 -51
- ultralytics/data/base.py +15 -13
- ultralytics/data/build.py +7 -4
- ultralytics/data/converter.py +9 -10
- ultralytics/data/dataset.py +24 -22
- ultralytics/data/loaders.py +13 -11
- ultralytics/data/split.py +4 -3
- ultralytics/data/split_dota.py +14 -12
- ultralytics/data/utils.py +31 -25
- ultralytics/engine/exporter.py +7 -4
- ultralytics/engine/model.py +16 -14
- ultralytics/engine/predictor.py +9 -7
- ultralytics/engine/results.py +59 -57
- ultralytics/engine/trainer.py +7 -0
- ultralytics/engine/tuner.py +4 -3
- ultralytics/engine/validator.py +3 -1
- ultralytics/hub/__init__.py +6 -2
- ultralytics/hub/auth.py +2 -2
- ultralytics/hub/google/__init__.py +9 -8
- ultralytics/hub/session.py +11 -11
- ultralytics/hub/utils.py +8 -9
- ultralytics/models/fastsam/model.py +8 -6
- ultralytics/models/nas/model.py +5 -3
- ultralytics/models/rtdetr/train.py +4 -3
- ultralytics/models/rtdetr/val.py +6 -4
- ultralytics/models/sam/amg.py +13 -10
- ultralytics/models/sam/model.py +3 -2
- ultralytics/models/sam/modules/blocks.py +21 -21
- ultralytics/models/sam/modules/decoders.py +11 -11
- ultralytics/models/sam/modules/encoders.py +25 -25
- ultralytics/models/sam/modules/memory_attention.py +9 -8
- ultralytics/models/sam/modules/sam.py +8 -10
- ultralytics/models/sam/modules/tiny_encoder.py +21 -20
- ultralytics/models/sam/modules/transformer.py +6 -5
- ultralytics/models/sam/modules/utils.py +7 -5
- ultralytics/models/sam/predict.py +32 -31
- ultralytics/models/utils/loss.py +29 -27
- ultralytics/models/utils/ops.py +10 -8
- ultralytics/models/yolo/classify/train.py +7 -5
- ultralytics/models/yolo/classify/val.py +10 -8
- ultralytics/models/yolo/detect/predict.py +3 -3
- ultralytics/models/yolo/detect/train.py +8 -6
- ultralytics/models/yolo/detect/val.py +23 -21
- ultralytics/models/yolo/model.py +14 -14
- ultralytics/models/yolo/obb/train.py +5 -3
- ultralytics/models/yolo/obb/val.py +13 -10
- ultralytics/models/yolo/pose/train.py +7 -5
- ultralytics/models/yolo/pose/val.py +11 -9
- ultralytics/models/yolo/segment/train.py +4 -5
- ultralytics/models/yolo/segment/val.py +12 -10
- ultralytics/models/yolo/world/train.py +9 -7
- ultralytics/models/yolo/yoloe/train.py +7 -6
- ultralytics/models/yolo/yoloe/val.py +10 -8
- ultralytics/nn/autobackend.py +40 -52
- ultralytics/nn/modules/__init__.py +3 -3
- ultralytics/nn/modules/block.py +12 -12
- ultralytics/nn/modules/conv.py +4 -3
- ultralytics/nn/modules/head.py +46 -38
- ultralytics/nn/modules/transformer.py +22 -21
- ultralytics/nn/tasks.py +2 -2
- ultralytics/nn/text_model.py +6 -5
- ultralytics/solutions/analytics.py +7 -5
- ultralytics/solutions/config.py +12 -10
- ultralytics/solutions/distance_calculation.py +3 -3
- ultralytics/solutions/heatmap.py +4 -2
- ultralytics/solutions/object_counter.py +5 -3
- ultralytics/solutions/parking_management.py +4 -2
- ultralytics/solutions/region_counter.py +7 -5
- ultralytics/solutions/similarity_search.py +5 -3
- ultralytics/solutions/solutions.py +38 -36
- ultralytics/solutions/streamlit_inference.py +8 -7
- ultralytics/trackers/bot_sort.py +11 -9
- ultralytics/trackers/byte_tracker.py +17 -15
- ultralytics/trackers/utils/gmc.py +4 -3
- ultralytics/utils/__init__.py +27 -77
- ultralytics/utils/autobatch.py +3 -2
- ultralytics/utils/autodevice.py +10 -10
- ultralytics/utils/benchmarks.py +11 -10
- ultralytics/utils/callbacks/comet.py +9 -9
- ultralytics/utils/callbacks/platform.py +2 -1
- ultralytics/utils/checks.py +20 -29
- ultralytics/utils/downloads.py +2 -2
- ultralytics/utils/export.py +12 -11
- ultralytics/utils/files.py +8 -7
- ultralytics/utils/git.py +139 -0
- ultralytics/utils/instance.py +8 -7
- ultralytics/utils/logger.py +7 -6
- ultralytics/utils/loss.py +15 -13
- ultralytics/utils/metrics.py +62 -62
- ultralytics/utils/nms.py +346 -0
- ultralytics/utils/ops.py +83 -251
- ultralytics/utils/patches.py +6 -4
- ultralytics/utils/plotting.py +18 -16
- ultralytics/utils/tal.py +1 -1
- ultralytics/utils/torch_utils.py +4 -2
- ultralytics/utils/tqdm.py +47 -33
- ultralytics/utils/triton.py +3 -2
- {ultralytics-8.3.189.dist-info → ultralytics-8.3.191.dist-info}/METADATA +1 -1
- {ultralytics-8.3.189.dist-info → ultralytics-8.3.191.dist-info}/RECORD +111 -109
- {ultralytics-8.3.189.dist-info → ultralytics-8.3.191.dist-info}/WHEEL +0 -0
- {ultralytics-8.3.189.dist-info → ultralytics-8.3.191.dist-info}/entry_points.txt +0 -0
- {ultralytics-8.3.189.dist-info → ultralytics-8.3.191.dist-info}/licenses/LICENSE +0 -0
- {ultralytics-8.3.189.dist-info → ultralytics-8.3.191.dist-info}/top_level.txt +0 -0
ultralytics/data/base.py
CHANGED
@@ -1,5 +1,7 @@
|
|
1
1
|
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
|
2
2
|
|
3
|
+
from __future__ import annotations
|
4
|
+
|
3
5
|
import glob
|
4
6
|
import math
|
5
7
|
import os
|
@@ -7,7 +9,7 @@ import random
|
|
7
9
|
from copy import deepcopy
|
8
10
|
from multiprocessing.pool import ThreadPool
|
9
11
|
from pathlib import Path
|
10
|
-
from typing import Any
|
12
|
+
from typing import Any
|
11
13
|
|
12
14
|
import cv2
|
13
15
|
import numpy as np
|
@@ -69,18 +71,18 @@ class BaseDataset(Dataset):
|
|
69
71
|
|
70
72
|
def __init__(
|
71
73
|
self,
|
72
|
-
img_path:
|
74
|
+
img_path: str | list[str],
|
73
75
|
imgsz: int = 640,
|
74
|
-
cache:
|
76
|
+
cache: bool | str = False,
|
75
77
|
augment: bool = True,
|
76
|
-
hyp:
|
78
|
+
hyp: dict[str, Any] = DEFAULT_CFG,
|
77
79
|
prefix: str = "",
|
78
80
|
rect: bool = False,
|
79
81
|
batch_size: int = 16,
|
80
82
|
stride: int = 32,
|
81
83
|
pad: float = 0.5,
|
82
84
|
single_cls: bool = False,
|
83
|
-
classes:
|
85
|
+
classes: list[int] | None = None,
|
84
86
|
fraction: float = 1.0,
|
85
87
|
channels: int = 3,
|
86
88
|
):
|
@@ -145,7 +147,7 @@ class BaseDataset(Dataset):
|
|
145
147
|
# Transforms
|
146
148
|
self.transforms = self.build_transforms(hyp=hyp)
|
147
149
|
|
148
|
-
def get_img_files(self, img_path:
|
150
|
+
def get_img_files(self, img_path: str | list[str]) -> list[str]:
|
149
151
|
"""
|
150
152
|
Read image files from the specified path.
|
151
153
|
|
@@ -183,7 +185,7 @@ class BaseDataset(Dataset):
|
|
183
185
|
check_file_speeds(im_files, prefix=self.prefix) # check image read speeds
|
184
186
|
return im_files
|
185
187
|
|
186
|
-
def update_labels(self, include_class:
|
188
|
+
def update_labels(self, include_class: list[int] | None) -> None:
|
187
189
|
"""
|
188
190
|
Update labels to include only specified classes.
|
189
191
|
|
@@ -207,7 +209,7 @@ class BaseDataset(Dataset):
|
|
207
209
|
if self.single_cls:
|
208
210
|
self.labels[i]["cls"][:, 0] = 0
|
209
211
|
|
210
|
-
def load_image(self, i: int, rect_mode: bool = True) ->
|
212
|
+
def load_image(self, i: int, rect_mode: bool = True) -> tuple[np.ndarray, tuple[int, int], tuple[int, int]]:
|
211
213
|
"""
|
212
214
|
Load an image from dataset index 'i'.
|
213
215
|
|
@@ -374,11 +376,11 @@ class BaseDataset(Dataset):
|
|
374
376
|
self.batch_shapes = np.ceil(np.array(shapes) * self.imgsz / self.stride + self.pad).astype(int) * self.stride
|
375
377
|
self.batch = bi # batch index of image
|
376
378
|
|
377
|
-
def __getitem__(self, index: int) ->
|
379
|
+
def __getitem__(self, index: int) -> dict[str, Any]:
|
378
380
|
"""Return transformed label information for given index."""
|
379
381
|
return self.transforms(self.get_image_and_label(index))
|
380
382
|
|
381
|
-
def get_image_and_label(self, index: int) ->
|
383
|
+
def get_image_and_label(self, index: int) -> dict[str, Any]:
|
382
384
|
"""
|
383
385
|
Get and return label information from the dataset.
|
384
386
|
|
@@ -403,11 +405,11 @@ class BaseDataset(Dataset):
|
|
403
405
|
"""Return the length of the labels list for the dataset."""
|
404
406
|
return len(self.labels)
|
405
407
|
|
406
|
-
def update_labels_info(self, label:
|
408
|
+
def update_labels_info(self, label: dict[str, Any]) -> dict[str, Any]:
|
407
409
|
"""Custom your label format here."""
|
408
410
|
return label
|
409
411
|
|
410
|
-
def build_transforms(self, hyp:
|
412
|
+
def build_transforms(self, hyp: dict[str, Any] | None = None):
|
411
413
|
"""
|
412
414
|
Users can customize augmentations here.
|
413
415
|
|
@@ -421,7 +423,7 @@ class BaseDataset(Dataset):
|
|
421
423
|
"""
|
422
424
|
raise NotImplementedError
|
423
425
|
|
424
|
-
def get_labels(self) ->
|
426
|
+
def get_labels(self) -> list[dict[str, Any]]:
|
425
427
|
"""
|
426
428
|
Users can customize their own format here.
|
427
429
|
|
ultralytics/data/build.py
CHANGED
@@ -1,9 +1,12 @@
|
|
1
1
|
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
|
2
2
|
|
3
|
+
from __future__ import annotations
|
4
|
+
|
3
5
|
import os
|
4
6
|
import random
|
7
|
+
from collections.abc import Iterator
|
5
8
|
from pathlib import Path
|
6
|
-
from typing import Any
|
9
|
+
from typing import Any
|
7
10
|
|
8
11
|
import numpy as np
|
9
12
|
import torch
|
@@ -116,7 +119,7 @@ def build_yolo_dataset(
|
|
116
119
|
cfg: IterableSimpleNamespace,
|
117
120
|
img_path: str,
|
118
121
|
batch: int,
|
119
|
-
data:
|
122
|
+
data: dict[str, Any],
|
120
123
|
mode: str = "train",
|
121
124
|
rect: bool = False,
|
122
125
|
stride: int = 32,
|
@@ -133,7 +136,7 @@ def build_yolo_dataset(
|
|
133
136
|
rect=cfg.rect or rect, # rectangular batches
|
134
137
|
cache=cfg.cache or None,
|
135
138
|
single_cls=cfg.single_cls or False,
|
136
|
-
stride=
|
139
|
+
stride=stride,
|
137
140
|
pad=0.0 if mode == "train" else 0.5,
|
138
141
|
prefix=colorstr(f"{mode}: "),
|
139
142
|
task=cfg.task,
|
@@ -165,7 +168,7 @@ def build_grounding(
|
|
165
168
|
rect=cfg.rect or rect, # rectangular batches
|
166
169
|
cache=cfg.cache or None,
|
167
170
|
single_cls=cfg.single_cls or False,
|
168
|
-
stride=
|
171
|
+
stride=stride,
|
169
172
|
pad=0.0 if mode == "train" else 0.5,
|
170
173
|
prefix=colorstr(f"{mode}: "),
|
171
174
|
task=cfg.task,
|
ultralytics/data/converter.py
CHANGED
@@ -1,5 +1,7 @@
|
|
1
1
|
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
|
2
2
|
|
3
|
+
from __future__ import annotations
|
4
|
+
|
3
5
|
import asyncio
|
4
6
|
import json
|
5
7
|
import random
|
@@ -7,7 +9,6 @@ import shutil
|
|
7
9
|
from collections import defaultdict
|
8
10
|
from concurrent.futures import ThreadPoolExecutor, as_completed
|
9
11
|
from pathlib import Path
|
10
|
-
from typing import List, Optional, Union
|
11
12
|
|
12
13
|
import cv2
|
13
14
|
import numpy as np
|
@@ -19,7 +20,7 @@ from ultralytics.utils.downloads import download, zip_directory
|
|
19
20
|
from ultralytics.utils.files import increment_path
|
20
21
|
|
21
22
|
|
22
|
-
def coco91_to_coco80_class() ->
|
23
|
+
def coco91_to_coco80_class() -> list[int]:
|
23
24
|
"""
|
24
25
|
Convert 91-index COCO class IDs to 80-index COCO class IDs.
|
25
26
|
|
@@ -122,7 +123,7 @@ def coco91_to_coco80_class() -> List[int]:
|
|
122
123
|
]
|
123
124
|
|
124
125
|
|
125
|
-
def coco80_to_coco91_class() ->
|
126
|
+
def coco80_to_coco91_class() -> list[int]:
|
126
127
|
r"""
|
127
128
|
Convert 80-index (val2014) to 91-index (paper).
|
128
129
|
|
@@ -531,7 +532,7 @@ def min_index(arr1: np.ndarray, arr2: np.ndarray):
|
|
531
532
|
return np.unravel_index(np.argmin(dis, axis=None), dis.shape)
|
532
533
|
|
533
534
|
|
534
|
-
def merge_multi_segment(segments:
|
535
|
+
def merge_multi_segment(segments: list[list]):
|
535
536
|
"""
|
536
537
|
Merge multiple segments into one list by connecting the coordinates with the minimum distance between each segment.
|
537
538
|
|
@@ -582,9 +583,7 @@ def merge_multi_segment(segments: List[List]):
|
|
582
583
|
return s
|
583
584
|
|
584
585
|
|
585
|
-
def yolo_bbox2segment(
|
586
|
-
im_dir: Union[str, Path], save_dir: Optional[Union[str, Path]] = None, sam_model: str = "sam_b.pt", device=None
|
587
|
-
):
|
586
|
+
def yolo_bbox2segment(im_dir: str | Path, save_dir: str | Path | None = None, sam_model: str = "sam_b.pt", device=None):
|
588
587
|
"""
|
589
588
|
Convert existing object detection dataset (bounding boxes) to segmentation dataset or oriented bounding box (OBB) in
|
590
589
|
YOLO format. Generate segmentation data using SAM auto-annotator as needed.
|
@@ -706,7 +705,7 @@ def create_synthetic_coco_dataset():
|
|
706
705
|
LOGGER.info("Synthetic COCO dataset created successfully.")
|
707
706
|
|
708
707
|
|
709
|
-
def convert_to_multispectral(path:
|
708
|
+
def convert_to_multispectral(path: str | Path, n_channels: int = 10, replace: bool = False, zip: bool = False):
|
710
709
|
"""
|
711
710
|
Convert RGB images to multispectral images by interpolating across wavelength bands.
|
712
711
|
|
@@ -733,7 +732,7 @@ def convert_to_multispectral(path: Union[str, Path], n_channels: int = 10, repla
|
|
733
732
|
path = Path(path)
|
734
733
|
if path.is_dir():
|
735
734
|
# Process directory
|
736
|
-
im_files = sum(
|
735
|
+
im_files = sum((list(path.rglob(f"*.{ext}")) for ext in (IMG_FORMATS - {"tif", "tiff"})), [])
|
737
736
|
for im_path in im_files:
|
738
737
|
try:
|
739
738
|
convert_to_multispectral(im_path, n_channels)
|
@@ -758,7 +757,7 @@ def convert_to_multispectral(path: Union[str, Path], n_channels: int = 10, repla
|
|
758
757
|
LOGGER.info(f"Converted {output_path}")
|
759
758
|
|
760
759
|
|
761
|
-
async def convert_ndjson_to_yolo(ndjson_path:
|
760
|
+
async def convert_ndjson_to_yolo(ndjson_path: str | Path, output_path: str | Path | None = None) -> Path:
|
762
761
|
"""
|
763
762
|
Convert NDJSON dataset format to Ultralytics YOLO11 dataset structure.
|
764
763
|
|
ultralytics/data/dataset.py
CHANGED
@@ -1,11 +1,13 @@
|
|
1
1
|
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
|
2
2
|
|
3
|
+
from __future__ import annotations
|
4
|
+
|
3
5
|
import json
|
4
6
|
from collections import defaultdict
|
5
7
|
from itertools import repeat
|
6
8
|
from multiprocessing.pool import ThreadPool
|
7
9
|
from pathlib import Path
|
8
|
-
from typing import Any
|
10
|
+
from typing import Any
|
9
11
|
|
10
12
|
import cv2
|
11
13
|
import numpy as np
|
@@ -70,7 +72,7 @@ class YOLODataset(BaseDataset):
|
|
70
72
|
>>> dataset.get_labels()
|
71
73
|
"""
|
72
74
|
|
73
|
-
def __init__(self, *args, data:
|
75
|
+
def __init__(self, *args, data: dict | None = None, task: str = "detect", **kwargs):
|
74
76
|
"""
|
75
77
|
Initialize the YOLODataset.
|
76
78
|
|
@@ -87,7 +89,7 @@ class YOLODataset(BaseDataset):
|
|
87
89
|
assert not (self.use_segments and self.use_keypoints), "Can not use both segments and keypoints."
|
88
90
|
super().__init__(*args, channels=self.data.get("channels", 3), **kwargs)
|
89
91
|
|
90
|
-
def cache_labels(self, path: Path = Path("./labels.cache")) ->
|
92
|
+
def cache_labels(self, path: Path = Path("./labels.cache")) -> dict:
|
91
93
|
"""
|
92
94
|
Cache dataset labels, check images and read shapes.
|
93
95
|
|
@@ -155,7 +157,7 @@ class YOLODataset(BaseDataset):
|
|
155
157
|
save_dataset_cache_file(self.prefix, path, x, DATASET_CACHE_VERSION)
|
156
158
|
return x
|
157
159
|
|
158
|
-
def get_labels(self) ->
|
160
|
+
def get_labels(self) -> list[dict]:
|
159
161
|
"""
|
160
162
|
Return dictionary of labels for YOLO training.
|
161
163
|
|
@@ -205,7 +207,7 @@ class YOLODataset(BaseDataset):
|
|
205
207
|
LOGGER.warning(f"Labels are missing or empty in {cache_path}, training may not work correctly. {HELP_URL}")
|
206
208
|
return labels
|
207
209
|
|
208
|
-
def build_transforms(self, hyp:
|
210
|
+
def build_transforms(self, hyp: dict | None = None) -> Compose:
|
209
211
|
"""
|
210
212
|
Build and append transforms to the list.
|
211
213
|
|
@@ -237,7 +239,7 @@ class YOLODataset(BaseDataset):
|
|
237
239
|
)
|
238
240
|
return transforms
|
239
241
|
|
240
|
-
def close_mosaic(self, hyp:
|
242
|
+
def close_mosaic(self, hyp: dict) -> None:
|
241
243
|
"""
|
242
244
|
Disable mosaic, copy_paste, mixup and cutmix augmentations by setting their probabilities to 0.0.
|
243
245
|
|
@@ -250,7 +252,7 @@ class YOLODataset(BaseDataset):
|
|
250
252
|
hyp.cutmix = 0.0
|
251
253
|
self.transforms = self.build_transforms(hyp)
|
252
254
|
|
253
|
-
def update_labels_info(self, label:
|
255
|
+
def update_labels_info(self, label: dict) -> dict:
|
254
256
|
"""
|
255
257
|
Update label format for different tasks.
|
256
258
|
|
@@ -284,7 +286,7 @@ class YOLODataset(BaseDataset):
|
|
284
286
|
return label
|
285
287
|
|
286
288
|
@staticmethod
|
287
|
-
def collate_fn(batch:
|
289
|
+
def collate_fn(batch: list[dict]) -> dict:
|
288
290
|
"""
|
289
291
|
Collate data samples into batches.
|
290
292
|
|
@@ -331,7 +333,7 @@ class YOLOMultiModalDataset(YOLODataset):
|
|
331
333
|
>>> print(batch.keys()) # Should include 'texts'
|
332
334
|
"""
|
333
335
|
|
334
|
-
def __init__(self, *args, data:
|
336
|
+
def __init__(self, *args, data: dict | None = None, task: str = "detect", **kwargs):
|
335
337
|
"""
|
336
338
|
Initialize a YOLOMultiModalDataset.
|
337
339
|
|
@@ -343,7 +345,7 @@ class YOLOMultiModalDataset(YOLODataset):
|
|
343
345
|
"""
|
344
346
|
super().__init__(*args, data=data, task=task, **kwargs)
|
345
347
|
|
346
|
-
def update_labels_info(self, label:
|
348
|
+
def update_labels_info(self, label: dict) -> dict:
|
347
349
|
"""
|
348
350
|
Add text information for multi-modal model training.
|
349
351
|
|
@@ -360,7 +362,7 @@ class YOLOMultiModalDataset(YOLODataset):
|
|
360
362
|
|
361
363
|
return labels
|
362
364
|
|
363
|
-
def build_transforms(self, hyp:
|
365
|
+
def build_transforms(self, hyp: dict | None = None) -> Compose:
|
364
366
|
"""
|
365
367
|
Enhance data transformations with optional text augmentation for multi-modal training.
|
366
368
|
|
@@ -409,7 +411,7 @@ class YOLOMultiModalDataset(YOLODataset):
|
|
409
411
|
return category_freq
|
410
412
|
|
411
413
|
@staticmethod
|
412
|
-
def _get_neg_texts(category_freq:
|
414
|
+
def _get_neg_texts(category_freq: dict, threshold: int = 100) -> list[str]:
|
413
415
|
"""Get negative text samples based on frequency threshold."""
|
414
416
|
threshold = min(max(category_freq.values()), 100)
|
415
417
|
return [k for k, v in category_freq.items() if v >= threshold]
|
@@ -451,7 +453,7 @@ class GroundingDataset(YOLODataset):
|
|
451
453
|
self.max_samples = max_samples
|
452
454
|
super().__init__(*args, task=task, data={"channels": 3}, **kwargs)
|
453
455
|
|
454
|
-
def get_img_files(self, img_path: str) ->
|
456
|
+
def get_img_files(self, img_path: str) -> list:
|
455
457
|
"""
|
456
458
|
The image files would be read in `get_labels` function, return empty list here.
|
457
459
|
|
@@ -463,7 +465,7 @@ class GroundingDataset(YOLODataset):
|
|
463
465
|
"""
|
464
466
|
return []
|
465
467
|
|
466
|
-
def verify_labels(self, labels:
|
468
|
+
def verify_labels(self, labels: list[dict[str, Any]]) -> None:
|
467
469
|
"""
|
468
470
|
Verify the number of instances in the dataset matches expected counts.
|
469
471
|
|
@@ -498,7 +500,7 @@ class GroundingDataset(YOLODataset):
|
|
498
500
|
return
|
499
501
|
LOGGER.warning(f"Skipping instance count verification for unrecognized dataset '{self.json_file}'")
|
500
502
|
|
501
|
-
def cache_labels(self, path: Path = Path("./labels.cache")) ->
|
503
|
+
def cache_labels(self, path: Path = Path("./labels.cache")) -> dict[str, Any]:
|
502
504
|
"""
|
503
505
|
Load annotations from a JSON file, filter, and normalize bounding boxes for each image.
|
504
506
|
|
@@ -589,7 +591,7 @@ class GroundingDataset(YOLODataset):
|
|
589
591
|
save_dataset_cache_file(self.prefix, path, x, DATASET_CACHE_VERSION)
|
590
592
|
return x
|
591
593
|
|
592
|
-
def get_labels(self) ->
|
594
|
+
def get_labels(self) -> list[dict]:
|
593
595
|
"""
|
594
596
|
Load labels from cache or generate them from JSON file.
|
595
597
|
|
@@ -611,7 +613,7 @@ class GroundingDataset(YOLODataset):
|
|
611
613
|
LOGGER.info(f"Load {self.json_file} from cache file {cache_path}")
|
612
614
|
return labels
|
613
615
|
|
614
|
-
def build_transforms(self, hyp:
|
616
|
+
def build_transforms(self, hyp: dict | None = None) -> Compose:
|
615
617
|
"""
|
616
618
|
Configure augmentations for training with optional text loading.
|
617
619
|
|
@@ -652,7 +654,7 @@ class GroundingDataset(YOLODataset):
|
|
652
654
|
return category_freq
|
653
655
|
|
654
656
|
@staticmethod
|
655
|
-
def _get_neg_texts(category_freq:
|
657
|
+
def _get_neg_texts(category_freq: dict, threshold: int = 100) -> list[str]:
|
656
658
|
"""Get negative text samples based on frequency threshold."""
|
657
659
|
threshold = min(max(category_freq.values()), 100)
|
658
660
|
return [k for k, v in category_freq.items() if v >= threshold]
|
@@ -675,7 +677,7 @@ class YOLOConcatDataset(ConcatDataset):
|
|
675
677
|
"""
|
676
678
|
|
677
679
|
@staticmethod
|
678
|
-
def collate_fn(batch:
|
680
|
+
def collate_fn(batch: list[dict]) -> dict:
|
679
681
|
"""
|
680
682
|
Collate data samples into batches.
|
681
683
|
|
@@ -687,7 +689,7 @@ class YOLOConcatDataset(ConcatDataset):
|
|
687
689
|
"""
|
688
690
|
return YOLODataset.collate_fn(batch)
|
689
691
|
|
690
|
-
def close_mosaic(self, hyp:
|
692
|
+
def close_mosaic(self, hyp: dict) -> None:
|
691
693
|
"""
|
692
694
|
Set mosaic, copy_paste and mixup options to 0.0 and build transformations.
|
693
695
|
|
@@ -784,7 +786,7 @@ class ClassificationDataset:
|
|
784
786
|
else classify_transforms(size=args.imgsz)
|
785
787
|
)
|
786
788
|
|
787
|
-
def __getitem__(self, i: int) ->
|
789
|
+
def __getitem__(self, i: int) -> dict:
|
788
790
|
"""
|
789
791
|
Return subset of data and targets corresponding to given indices.
|
790
792
|
|
@@ -813,7 +815,7 @@ class ClassificationDataset:
|
|
813
815
|
"""Return the total number of samples in the dataset."""
|
814
816
|
return len(self.samples)
|
815
817
|
|
816
|
-
def verify_images(self) ->
|
818
|
+
def verify_images(self) -> list[tuple]:
|
817
819
|
"""
|
818
820
|
Verify all images in dataset.
|
819
821
|
|
ultralytics/data/loaders.py
CHANGED
@@ -1,5 +1,7 @@
|
|
1
1
|
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
|
2
2
|
|
3
|
+
from __future__ import annotations
|
4
|
+
|
3
5
|
import glob
|
4
6
|
import math
|
5
7
|
import os
|
@@ -8,7 +10,7 @@ import urllib
|
|
8
10
|
from dataclasses import dataclass
|
9
11
|
from pathlib import Path
|
10
12
|
from threading import Thread
|
11
|
-
from typing import Any
|
13
|
+
from typing import Any
|
12
14
|
|
13
15
|
import cv2
|
14
16
|
import numpy as np
|
@@ -192,7 +194,7 @@ class LoadStreams:
|
|
192
194
|
self.count = -1
|
193
195
|
return self
|
194
196
|
|
195
|
-
def __next__(self) ->
|
197
|
+
def __next__(self) -> tuple[list[str], list[np.ndarray], list[str]]:
|
196
198
|
"""Return the next batch of frames from multiple video streams for processing."""
|
197
199
|
self.count += 1
|
198
200
|
|
@@ -294,7 +296,7 @@ class LoadScreenshots:
|
|
294
296
|
"""Yield the next screenshot image from the specified screen or region for processing."""
|
295
297
|
return self
|
296
298
|
|
297
|
-
def __next__(self) ->
|
299
|
+
def __next__(self) -> tuple[list[str], list[np.ndarray], list[str]]:
|
298
300
|
"""Capture and return the next screenshot as a numpy array using the mss library."""
|
299
301
|
im0 = np.asarray(self.sct.grab(self.monitor))[:, :, :3] # BGRA to BGR
|
300
302
|
im0 = cv2.cvtColor(im0, cv2.COLOR_BGR2GRAY)[..., None] if self.cv2_flag == cv2.IMREAD_GRAYSCALE else im0
|
@@ -344,7 +346,7 @@ class LoadImagesAndVideos:
|
|
344
346
|
- Can read from a text file containing paths to images and videos.
|
345
347
|
"""
|
346
348
|
|
347
|
-
def __init__(self, path:
|
349
|
+
def __init__(self, path: str | Path | list, batch: int = 1, vid_stride: int = 1, channels: int = 3):
|
348
350
|
"""
|
349
351
|
Initialize dataloader for images and videos, supporting various input formats.
|
350
352
|
|
@@ -403,7 +405,7 @@ class LoadImagesAndVideos:
|
|
403
405
|
self.count = 0
|
404
406
|
return self
|
405
407
|
|
406
|
-
def __next__(self) ->
|
408
|
+
def __next__(self) -> tuple[list[str], list[np.ndarray], list[str]]:
|
407
409
|
"""Return the next batch of images or video frames with their paths and metadata."""
|
408
410
|
paths, imgs, info = [], [], []
|
409
411
|
while len(imgs) < self.bs:
|
@@ -514,7 +516,7 @@ class LoadPilAndNumpy:
|
|
514
516
|
Loaded 2 images
|
515
517
|
"""
|
516
518
|
|
517
|
-
def __init__(self, im0:
|
519
|
+
def __init__(self, im0: Image.Image | np.ndarray | list, channels: int = 3):
|
518
520
|
"""
|
519
521
|
Initialize a loader for PIL and Numpy images, converting inputs to a standardized format.
|
520
522
|
|
@@ -532,7 +534,7 @@ class LoadPilAndNumpy:
|
|
532
534
|
self.bs = len(self.im0)
|
533
535
|
|
534
536
|
@staticmethod
|
535
|
-
def _single_check(im:
|
537
|
+
def _single_check(im: Image.Image | np.ndarray, flag: str = "RGB") -> np.ndarray:
|
536
538
|
"""Validate and format an image to numpy array, ensuring RGB order and contiguous memory."""
|
537
539
|
assert isinstance(im, (Image.Image, np.ndarray)), f"Expected PIL/np.ndarray image type, but got {type(im)}"
|
538
540
|
if isinstance(im, Image.Image):
|
@@ -548,7 +550,7 @@ class LoadPilAndNumpy:
|
|
548
550
|
"""Return the length of the 'im0' attribute, representing the number of loaded images."""
|
549
551
|
return len(self.im0)
|
550
552
|
|
551
|
-
def __next__(self) ->
|
553
|
+
def __next__(self) -> tuple[list[str], list[np.ndarray], list[str]]:
|
552
554
|
"""Return the next batch of images, paths, and metadata for processing."""
|
553
555
|
if self.count == 1: # loop only once as it's batch inference
|
554
556
|
raise StopIteration
|
@@ -624,7 +626,7 @@ class LoadTensor:
|
|
624
626
|
self.count = 0
|
625
627
|
return self
|
626
628
|
|
627
|
-
def __next__(self) ->
|
629
|
+
def __next__(self) -> tuple[list[str], torch.Tensor, list[str]]:
|
628
630
|
"""Yield the next batch of tensor images and metadata for processing."""
|
629
631
|
if self.count == 1:
|
630
632
|
raise StopIteration
|
@@ -636,7 +638,7 @@ class LoadTensor:
|
|
636
638
|
return self.bs
|
637
639
|
|
638
640
|
|
639
|
-
def autocast_list(source:
|
641
|
+
def autocast_list(source: list[Any]) -> list[Image.Image | np.ndarray]:
|
640
642
|
"""Merge a list of sources into a list of numpy arrays or PIL images for Ultralytics prediction."""
|
641
643
|
files = []
|
642
644
|
for im in source:
|
@@ -653,7 +655,7 @@ def autocast_list(source: List[Any]) -> List[Union[Image.Image, np.ndarray]]:
|
|
653
655
|
return files
|
654
656
|
|
655
657
|
|
656
|
-
def get_best_youtube_url(url: str, method: str = "pytube") ->
|
658
|
+
def get_best_youtube_url(url: str, method: str = "pytube") -> str | None:
|
657
659
|
"""
|
658
660
|
Retrieve the URL of the best quality MP4 video stream from a given YouTube video.
|
659
661
|
|
ultralytics/data/split.py
CHANGED
@@ -1,15 +1,16 @@
|
|
1
1
|
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
|
2
2
|
|
3
|
+
from __future__ import annotations
|
4
|
+
|
3
5
|
import random
|
4
6
|
import shutil
|
5
7
|
from pathlib import Path
|
6
|
-
from typing import Tuple, Union
|
7
8
|
|
8
9
|
from ultralytics.data.utils import IMG_FORMATS, img2label_paths
|
9
10
|
from ultralytics.utils import DATASETS_DIR, LOGGER, TQDM
|
10
11
|
|
11
12
|
|
12
|
-
def split_classify_dataset(source_dir:
|
13
|
+
def split_classify_dataset(source_dir: str | Path, train_ratio: float = 0.8) -> Path:
|
13
14
|
"""
|
14
15
|
Split classification dataset into train and val directories in a new directory.
|
15
16
|
|
@@ -97,7 +98,7 @@ def split_classify_dataset(source_dir: Union[str, Path], train_ratio: float = 0.
|
|
97
98
|
|
98
99
|
def autosplit(
|
99
100
|
path: Path = DATASETS_DIR / "coco8/images",
|
100
|
-
weights:
|
101
|
+
weights: tuple[float, float, float] = (0.9, 0.1, 0.0),
|
101
102
|
annotated_only: bool = False,
|
102
103
|
) -> None:
|
103
104
|
"""
|
ultralytics/data/split_dota.py
CHANGED
@@ -1,10 +1,12 @@
|
|
1
1
|
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
|
2
2
|
|
3
|
+
from __future__ import annotations
|
4
|
+
|
3
5
|
import itertools
|
4
6
|
from glob import glob
|
5
7
|
from math import ceil
|
6
8
|
from pathlib import Path
|
7
|
-
from typing import Any
|
9
|
+
from typing import Any
|
8
10
|
|
9
11
|
import cv2
|
10
12
|
import numpy as np
|
@@ -62,7 +64,7 @@ def bbox_iof(polygon1: np.ndarray, bbox2: np.ndarray, eps: float = 1e-6) -> np.n
|
|
62
64
|
return outputs
|
63
65
|
|
64
66
|
|
65
|
-
def load_yolo_dota(data_root: str, split: str = "train") ->
|
67
|
+
def load_yolo_dota(data_root: str, split: str = "train") -> list[dict[str, Any]]:
|
66
68
|
"""
|
67
69
|
Load DOTA dataset annotations and image information.
|
68
70
|
|
@@ -99,9 +101,9 @@ def load_yolo_dota(data_root: str, split: str = "train") -> List[Dict[str, Any]]
|
|
99
101
|
|
100
102
|
|
101
103
|
def get_windows(
|
102
|
-
im_size:
|
103
|
-
crop_sizes:
|
104
|
-
gaps:
|
104
|
+
im_size: tuple[int, int],
|
105
|
+
crop_sizes: tuple[int, ...] = (1024,),
|
106
|
+
gaps: tuple[int, ...] = (200,),
|
105
107
|
im_rate_thr: float = 0.6,
|
106
108
|
eps: float = 0.01,
|
107
109
|
) -> np.ndarray:
|
@@ -151,7 +153,7 @@ def get_windows(
|
|
151
153
|
return windows[im_rates > im_rate_thr]
|
152
154
|
|
153
155
|
|
154
|
-
def get_window_obj(anno:
|
156
|
+
def get_window_obj(anno: dict[str, Any], windows: np.ndarray, iof_thr: float = 0.7) -> list[np.ndarray]:
|
155
157
|
"""Get objects for each window based on IoF threshold."""
|
156
158
|
h, w = anno["ori_size"]
|
157
159
|
label = anno["label"]
|
@@ -166,9 +168,9 @@ def get_window_obj(anno: Dict[str, Any], windows: np.ndarray, iof_thr: float = 0
|
|
166
168
|
|
167
169
|
|
168
170
|
def crop_and_save(
|
169
|
-
anno:
|
171
|
+
anno: dict[str, Any],
|
170
172
|
windows: np.ndarray,
|
171
|
-
window_objs:
|
173
|
+
window_objs: list[np.ndarray],
|
172
174
|
im_dir: str,
|
173
175
|
lb_dir: str,
|
174
176
|
allow_background_images: bool = True,
|
@@ -221,8 +223,8 @@ def split_images_and_labels(
|
|
221
223
|
data_root: str,
|
222
224
|
save_dir: str,
|
223
225
|
split: str = "train",
|
224
|
-
crop_sizes:
|
225
|
-
gaps:
|
226
|
+
crop_sizes: tuple[int, ...] = (1024,),
|
227
|
+
gaps: tuple[int, ...] = (200,),
|
226
228
|
) -> None:
|
227
229
|
"""
|
228
230
|
Split both images and labels for a given dataset split.
|
@@ -261,7 +263,7 @@ def split_images_and_labels(
|
|
261
263
|
|
262
264
|
|
263
265
|
def split_trainval(
|
264
|
-
data_root: str, save_dir: str, crop_size: int = 1024, gap: int = 200, rates:
|
266
|
+
data_root: str, save_dir: str, crop_size: int = 1024, gap: int = 200, rates: tuple[float, ...] = (1.0,)
|
265
267
|
) -> None:
|
266
268
|
"""
|
267
269
|
Split train and val sets of DOTA dataset with multiple scaling rates.
|
@@ -300,7 +302,7 @@ def split_trainval(
|
|
300
302
|
|
301
303
|
|
302
304
|
def split_test(
|
303
|
-
data_root: str, save_dir: str, crop_size: int = 1024, gap: int = 200, rates:
|
305
|
+
data_root: str, save_dir: str, crop_size: int = 1024, gap: int = 200, rates: tuple[float, ...] = (1.0,)
|
304
306
|
) -> None:
|
305
307
|
"""
|
306
308
|
Split test set of DOTA dataset, labels are not included within this set.
|