ultralytics 8.3.190__py3-none-any.whl → 8.3.192__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- tests/test_cuda.py +6 -5
- tests/test_exports.py +1 -6
- tests/test_python.py +1 -4
- tests/test_solutions.py +1 -1
- ultralytics/__init__.py +1 -1
- ultralytics/cfg/__init__.py +16 -14
- ultralytics/cfg/datasets/SKU-110K.yaml +1 -1
- ultralytics/cfg/datasets/VisDrone.yaml +4 -4
- ultralytics/data/annotator.py +6 -6
- ultralytics/data/augment.py +53 -51
- ultralytics/data/base.py +15 -13
- ultralytics/data/build.py +7 -4
- ultralytics/data/converter.py +9 -10
- ultralytics/data/dataset.py +24 -22
- ultralytics/data/loaders.py +13 -11
- ultralytics/data/split.py +4 -3
- ultralytics/data/split_dota.py +14 -12
- ultralytics/data/utils.py +29 -23
- ultralytics/engine/exporter.py +2 -2
- ultralytics/engine/model.py +16 -14
- ultralytics/engine/predictor.py +8 -6
- ultralytics/engine/results.py +54 -52
- ultralytics/engine/trainer.py +8 -3
- ultralytics/engine/tuner.py +230 -42
- ultralytics/hub/google/__init__.py +7 -6
- ultralytics/hub/session.py +8 -6
- ultralytics/hub/utils.py +3 -4
- ultralytics/models/fastsam/model.py +8 -6
- ultralytics/models/nas/model.py +5 -3
- ultralytics/models/rtdetr/train.py +4 -3
- ultralytics/models/rtdetr/val.py +6 -4
- ultralytics/models/sam/amg.py +13 -10
- ultralytics/models/sam/model.py +3 -2
- ultralytics/models/sam/modules/blocks.py +21 -21
- ultralytics/models/sam/modules/decoders.py +11 -11
- ultralytics/models/sam/modules/encoders.py +25 -25
- ultralytics/models/sam/modules/memory_attention.py +9 -8
- ultralytics/models/sam/modules/sam.py +8 -10
- ultralytics/models/sam/modules/tiny_encoder.py +21 -20
- ultralytics/models/sam/modules/transformer.py +6 -5
- ultralytics/models/sam/modules/utils.py +7 -5
- ultralytics/models/sam/predict.py +32 -31
- ultralytics/models/utils/loss.py +29 -27
- ultralytics/models/utils/ops.py +10 -8
- ultralytics/models/yolo/classify/train.py +9 -7
- ultralytics/models/yolo/classify/val.py +11 -9
- ultralytics/models/yolo/detect/predict.py +1 -1
- ultralytics/models/yolo/detect/train.py +8 -6
- ultralytics/models/yolo/detect/val.py +22 -20
- ultralytics/models/yolo/model.py +14 -14
- ultralytics/models/yolo/obb/train.py +5 -3
- ultralytics/models/yolo/obb/val.py +11 -9
- ultralytics/models/yolo/pose/train.py +7 -5
- ultralytics/models/yolo/pose/val.py +12 -10
- ultralytics/models/yolo/segment/train.py +4 -5
- ultralytics/models/yolo/segment/val.py +13 -11
- ultralytics/models/yolo/world/train.py +10 -8
- ultralytics/models/yolo/yoloe/train.py +10 -10
- ultralytics/models/yolo/yoloe/val.py +11 -9
- ultralytics/nn/autobackend.py +17 -19
- ultralytics/nn/modules/block.py +12 -12
- ultralytics/nn/modules/conv.py +4 -3
- ultralytics/nn/modules/head.py +41 -37
- ultralytics/nn/modules/transformer.py +22 -21
- ultralytics/nn/tasks.py +2 -2
- ultralytics/nn/text_model.py +6 -5
- ultralytics/solutions/analytics.py +7 -5
- ultralytics/solutions/config.py +12 -10
- ultralytics/solutions/distance_calculation.py +3 -3
- ultralytics/solutions/heatmap.py +4 -2
- ultralytics/solutions/object_counter.py +5 -3
- ultralytics/solutions/parking_management.py +4 -2
- ultralytics/solutions/region_counter.py +7 -5
- ultralytics/solutions/similarity_search.py +5 -3
- ultralytics/solutions/solutions.py +38 -36
- ultralytics/solutions/streamlit_inference.py +8 -7
- ultralytics/trackers/bot_sort.py +11 -9
- ultralytics/trackers/byte_tracker.py +17 -15
- ultralytics/trackers/utils/gmc.py +4 -3
- ultralytics/utils/__init__.py +16 -88
- ultralytics/utils/autobatch.py +3 -2
- ultralytics/utils/autodevice.py +10 -10
- ultralytics/utils/benchmarks.py +11 -10
- ultralytics/utils/callbacks/comet.py +9 -9
- ultralytics/utils/checks.py +17 -26
- ultralytics/utils/export.py +12 -11
- ultralytics/utils/files.py +8 -7
- ultralytics/utils/git.py +139 -0
- ultralytics/utils/instance.py +8 -7
- ultralytics/utils/loss.py +15 -13
- ultralytics/utils/metrics.py +62 -62
- ultralytics/utils/ops.py +3 -2
- ultralytics/utils/patches.py +6 -4
- ultralytics/utils/plotting.py +20 -18
- ultralytics/utils/torch_utils.py +4 -2
- ultralytics/utils/tqdm.py +18 -14
- ultralytics/utils/triton.py +3 -2
- {ultralytics-8.3.190.dist-info → ultralytics-8.3.192.dist-info}/METADATA +1 -1
- {ultralytics-8.3.190.dist-info → ultralytics-8.3.192.dist-info}/RECORD +103 -102
- {ultralytics-8.3.190.dist-info → ultralytics-8.3.192.dist-info}/WHEEL +0 -0
- {ultralytics-8.3.190.dist-info → ultralytics-8.3.192.dist-info}/entry_points.txt +0 -0
- {ultralytics-8.3.190.dist-info → ultralytics-8.3.192.dist-info}/licenses/LICENSE +0 -0
- {ultralytics-8.3.190.dist-info → ultralytics-8.3.192.dist-info}/top_level.txt +0 -0
tests/test_cuda.py
CHANGED
@@ -23,10 +23,11 @@ if CUDA_IS_AVAILABLE:
|
|
23
23
|
gpu_info = GPUInfo()
|
24
24
|
gpu_info.print_status()
|
25
25
|
autodevice_fraction = __import__("os").environ.get("YOLO_AUTODEVICE_FRACTION_FREE", 0.3)
|
26
|
-
idle_gpus
|
27
|
-
count=2,
|
28
|
-
|
29
|
-
|
26
|
+
if idle_gpus := gpu_info.select_idle_gpu(
|
27
|
+
count=2,
|
28
|
+
min_memory_fraction=autodevice_fraction,
|
29
|
+
min_util_fraction=autodevice_fraction,
|
30
|
+
):
|
30
31
|
DEVICES = idle_gpus
|
31
32
|
|
32
33
|
|
@@ -112,9 +113,9 @@ def test_train():
|
|
112
113
|
import os
|
113
114
|
|
114
115
|
device = tuple(DEVICES) if len(DEVICES) > 1 else DEVICES[0]
|
115
|
-
results = YOLO(MODEL).train(data="coco8.yaml", imgsz=64, epochs=1, device=device) # requires imgsz>=64
|
116
116
|
# NVIDIA Jetson only has one GPU and therefore skipping checks
|
117
117
|
if not IS_JETSON:
|
118
|
+
results = YOLO(MODEL).train(data="coco8.yaml", imgsz=64, epochs=1, device=device) # requires imgsz>=64
|
118
119
|
visible = eval(os.environ["CUDA_VISIBLE_DEVICES"])
|
119
120
|
assert visible == device, f"Passed GPUs '{device}', but used GPUs '{visible}'"
|
120
121
|
assert (
|
tests/test_exports.py
CHANGED
@@ -241,12 +241,7 @@ def test_export_ncnn():
|
|
241
241
|
|
242
242
|
|
243
243
|
@pytest.mark.slow
|
244
|
-
@pytest.mark.parametrize(
|
245
|
-
"task, half, batch",
|
246
|
-
[ # generate all combinations except for exclusion cases
|
247
|
-
(task, half, batch) for task, half, batch in product(TASKS, [True, False], [1])
|
248
|
-
],
|
249
|
-
)
|
244
|
+
@pytest.mark.parametrize("task, half, batch", list(product(TASKS, [True, False], [1])))
|
250
245
|
def test_export_ncnn_matrix(task, half, batch):
|
251
246
|
"""Test YOLO export to NCNN format considering various export configurations."""
|
252
247
|
file = YOLO(TASK2MODEL[task]).export(format="ncnn", imgsz=32, half=half, batch=batch)
|
tests/test_python.py
CHANGED
@@ -397,18 +397,15 @@ def test_cfg_init():
|
|
397
397
|
|
398
398
|
def test_utils_init():
|
399
399
|
"""Test initialization utilities in the Ultralytics library."""
|
400
|
-
from ultralytics.utils import
|
400
|
+
from ultralytics.utils import get_ubuntu_version, is_github_action_running
|
401
401
|
|
402
402
|
get_ubuntu_version()
|
403
403
|
is_github_action_running()
|
404
|
-
get_git_origin_url()
|
405
|
-
get_git_branch()
|
406
404
|
|
407
405
|
|
408
406
|
def test_utils_checks():
|
409
407
|
"""Test various utility checks for filenames, git status, requirements, image sizes, and versions."""
|
410
408
|
checks.check_yolov5u_filename("yolov5n.pt")
|
411
|
-
checks.git_describe(ROOT)
|
412
409
|
checks.check_requirements() # check requirements.txt
|
413
410
|
checks.check_imgsz([600, 600], max_dim=1)
|
414
411
|
checks.check_imshow(warn=True)
|
tests/test_solutions.py
CHANGED
@@ -227,7 +227,7 @@ def test_right_click_reset():
|
|
227
227
|
dc = solutions.DistanceCalculation()
|
228
228
|
dc.selected_boxes, dc.left_mouse_count = {1: [10, 10, 50, 50]}, 1
|
229
229
|
dc.mouse_event_for_distance(cv2.EVENT_RBUTTONDOWN, 0, 0, None, None)
|
230
|
-
assert dc.selected_boxes
|
230
|
+
assert not dc.selected_boxes
|
231
231
|
assert dc.left_mouse_count == 0
|
232
232
|
|
233
233
|
|
ultralytics/__init__.py
CHANGED
ultralytics/cfg/__init__.py
CHANGED
@@ -1,11 +1,13 @@
|
|
1
1
|
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
|
2
2
|
|
3
|
+
from __future__ import annotations
|
4
|
+
|
3
5
|
import shutil
|
4
6
|
import subprocess
|
5
7
|
import sys
|
6
8
|
from pathlib import Path
|
7
9
|
from types import SimpleNamespace
|
8
|
-
from typing import Any
|
10
|
+
from typing import Any
|
9
11
|
|
10
12
|
from ultralytics import __version__
|
11
13
|
from ultralytics.utils import (
|
@@ -78,9 +80,9 @@ SOLUTIONS_HELP_MSG = f"""
|
|
78
80
|
yolo solutions SOLUTION ARGS
|
79
81
|
|
80
82
|
Where SOLUTION (optional) is one of {list(SOLUTION_MAP.keys())[:-1]}
|
81
|
-
ARGS (optional) are any number of custom 'arg=value' pairs like 'show_in=True' that override defaults
|
83
|
+
ARGS (optional) are any number of custom 'arg=value' pairs like 'show_in=True' that override defaults
|
82
84
|
at https://docs.ultralytics.com/usage/cfg
|
83
|
-
|
85
|
+
|
84
86
|
1. Call object counting solution
|
85
87
|
yolo solutions count source="path/to/video.mp4" region="[(20, 400), (1080, 400), (1080, 360), (20, 360)]"
|
86
88
|
|
@@ -95,10 +97,10 @@ SOLUTIONS_HELP_MSG = f"""
|
|
95
97
|
|
96
98
|
5. Generate analytical graphs
|
97
99
|
yolo solutions analytics analytics_type="pie"
|
98
|
-
|
100
|
+
|
99
101
|
6. Track objects within specific zones
|
100
102
|
yolo solutions trackzone source="path/to/video.mp4" region="[(150, 150), (1130, 150), (1130, 570), (150, 570)]"
|
101
|
-
|
103
|
+
|
102
104
|
7. Streamlit real-time webcam inference GUI
|
103
105
|
yolo streamlit-predict
|
104
106
|
"""
|
@@ -237,7 +239,7 @@ CFG_BOOL_KEYS = frozenset(
|
|
237
239
|
)
|
238
240
|
|
239
241
|
|
240
|
-
def cfg2dict(cfg:
|
242
|
+
def cfg2dict(cfg: str | Path | dict | SimpleNamespace) -> dict:
|
241
243
|
"""
|
242
244
|
Convert a configuration object to a dictionary.
|
243
245
|
|
@@ -272,7 +274,7 @@ def cfg2dict(cfg: Union[str, Path, Dict, SimpleNamespace]) -> Dict:
|
|
272
274
|
return cfg
|
273
275
|
|
274
276
|
|
275
|
-
def get_cfg(cfg:
|
277
|
+
def get_cfg(cfg: str | Path | dict | SimpleNamespace = DEFAULT_CFG_DICT, overrides: dict = None) -> SimpleNamespace:
|
276
278
|
"""
|
277
279
|
Load and merge configuration data from a file or dictionary, with optional overrides.
|
278
280
|
|
@@ -320,7 +322,7 @@ def get_cfg(cfg: Union[str, Path, Dict, SimpleNamespace] = DEFAULT_CFG_DICT, ove
|
|
320
322
|
return IterableSimpleNamespace(**cfg)
|
321
323
|
|
322
324
|
|
323
|
-
def check_cfg(cfg:
|
325
|
+
def check_cfg(cfg: dict, hard: bool = True) -> None:
|
324
326
|
"""
|
325
327
|
Check configuration argument types and values for the Ultralytics library.
|
326
328
|
|
@@ -414,7 +416,7 @@ def get_save_dir(args: SimpleNamespace, name: str = None) -> Path:
|
|
414
416
|
return Path(save_dir)
|
415
417
|
|
416
418
|
|
417
|
-
def _handle_deprecation(custom:
|
419
|
+
def _handle_deprecation(custom: dict) -> dict:
|
418
420
|
"""
|
419
421
|
Handle deprecated configuration keys by mapping them to current equivalents with deprecation warnings.
|
420
422
|
|
@@ -458,7 +460,7 @@ def _handle_deprecation(custom: Dict) -> Dict:
|
|
458
460
|
return custom
|
459
461
|
|
460
462
|
|
461
|
-
def check_dict_alignment(base:
|
463
|
+
def check_dict_alignment(base: dict, custom: dict, e: Exception = None) -> None:
|
462
464
|
"""
|
463
465
|
Check alignment between custom and base configuration dictionaries, handling deprecated keys and providing error
|
464
466
|
messages for mismatched keys.
|
@@ -498,7 +500,7 @@ def check_dict_alignment(base: Dict, custom: Dict, e: Exception = None) -> None:
|
|
498
500
|
raise SyntaxError(string + CLI_HELP_MSG) from e
|
499
501
|
|
500
502
|
|
501
|
-
def merge_equals_args(args:
|
503
|
+
def merge_equals_args(args: list[str]) -> list[str]:
|
502
504
|
"""
|
503
505
|
Merge arguments around isolated '=' in a list of strings and join fragments with brackets.
|
504
506
|
|
@@ -557,7 +559,7 @@ def merge_equals_args(args: List[str]) -> List[str]:
|
|
557
559
|
return new_args
|
558
560
|
|
559
561
|
|
560
|
-
def handle_yolo_hub(args:
|
562
|
+
def handle_yolo_hub(args: list[str]) -> None:
|
561
563
|
"""
|
562
564
|
Handle Ultralytics HUB command-line interface (CLI) commands for authentication.
|
563
565
|
|
@@ -587,7 +589,7 @@ def handle_yolo_hub(args: List[str]) -> None:
|
|
587
589
|
hub.logout()
|
588
590
|
|
589
591
|
|
590
|
-
def handle_yolo_settings(args:
|
592
|
+
def handle_yolo_settings(args: list[str]) -> None:
|
591
593
|
"""
|
592
594
|
Handle YOLO settings command-line interface (CLI) commands.
|
593
595
|
|
@@ -630,7 +632,7 @@ def handle_yolo_settings(args: List[str]) -> None:
|
|
630
632
|
LOGGER.warning(f"settings error: '{e}'. Please see {url} for help.")
|
631
633
|
|
632
634
|
|
633
|
-
def handle_yolo_solutions(args:
|
635
|
+
def handle_yolo_solutions(args: list[str]) -> None:
|
634
636
|
"""
|
635
637
|
Process YOLO solutions arguments and run the specified computer vision solutions pipeline.
|
636
638
|
|
@@ -45,7 +45,7 @@ download: |
|
|
45
45
|
# Convert labels
|
46
46
|
names = "image", "x1", "y1", "x2", "y2", "class", "image_width", "image_height" # column names
|
47
47
|
for d in "annotations_train.csv", "annotations_val.csv", "annotations_test.csv":
|
48
|
-
x = pl.read_csv(dir / "annotations" / d, names=names).to_numpy() # annotations
|
48
|
+
x = pl.read_csv(dir / "annotations" / d, names=names, infer_schema_length=None).to_numpy() # annotations
|
49
49
|
images, unique_images = x[:, 0], np.unique(x[:, 0])
|
50
50
|
with open((dir / d).with_suffix(".txt").__str__().replace("annotations_", ""), "w", encoding="utf-8") as f:
|
51
51
|
f.writelines(f"./images/{s}\n" for s in unique_images)
|
@@ -45,18 +45,18 @@ download: |
|
|
45
45
|
images_dir = dir / "images" / split
|
46
46
|
labels_dir = dir / "labels" / split
|
47
47
|
labels_dir.mkdir(parents=True, exist_ok=True)
|
48
|
-
|
48
|
+
|
49
49
|
# Move images to new structure
|
50
50
|
if (source_images_dir := source_dir / "images").exists():
|
51
51
|
images_dir.mkdir(parents=True, exist_ok=True)
|
52
52
|
for img in source_images_dir.glob("*.jpg"):
|
53
53
|
img.rename(images_dir / img.name)
|
54
|
-
|
54
|
+
|
55
55
|
for f in TQDM((source_dir / "annotations").glob("*.txt"), desc=f"Converting {split}"):
|
56
56
|
img_size = Image.open(images_dir / f.with_suffix(".jpg").name).size
|
57
57
|
dw, dh = 1.0 / img_size[0], 1.0 / img_size[1]
|
58
58
|
lines = []
|
59
|
-
|
59
|
+
|
60
60
|
with open(f, encoding="utf-8") as file:
|
61
61
|
for row in [x.split(",") for x in file.read().strip().splitlines()]:
|
62
62
|
if row[4] != "0": # Skip ignored regions
|
@@ -66,7 +66,7 @@ download: |
|
|
66
66
|
x_center, y_center = (x + w / 2) * dw, (y + h / 2) * dh
|
67
67
|
w_norm, h_norm = w * dw, h * dh
|
68
68
|
lines.append(f"{cls} {x_center:.6f} {y_center:.6f} {w_norm:.6f} {h_norm:.6f}\n")
|
69
|
-
|
69
|
+
|
70
70
|
(labels_dir / f.name).write_text("".join(lines), encoding="utf-8")
|
71
71
|
|
72
72
|
|
ultralytics/data/annotator.py
CHANGED
@@ -1,13 +1,14 @@
|
|
1
1
|
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
|
2
2
|
|
3
|
+
from __future__ import annotations
|
4
|
+
|
3
5
|
from pathlib import Path
|
4
|
-
from typing import List, Optional, Union
|
5
6
|
|
6
7
|
from ultralytics import SAM, YOLO
|
7
8
|
|
8
9
|
|
9
10
|
def auto_annotate(
|
10
|
-
data:
|
11
|
+
data: str | Path,
|
11
12
|
det_model: str = "yolo11x.pt",
|
12
13
|
sam_model: str = "sam_b.pt",
|
13
14
|
device: str = "",
|
@@ -15,8 +16,8 @@ def auto_annotate(
|
|
15
16
|
iou: float = 0.45,
|
16
17
|
imgsz: int = 640,
|
17
18
|
max_det: int = 300,
|
18
|
-
classes:
|
19
|
-
output_dir:
|
19
|
+
classes: list[int] | None = None,
|
20
|
+
output_dir: str | Path | None = None,
|
20
21
|
) -> None:
|
21
22
|
"""
|
22
23
|
Automatically annotate images using a YOLO object detection model and a SAM segmentation model.
|
@@ -54,8 +55,7 @@ def auto_annotate(
|
|
54
55
|
)
|
55
56
|
|
56
57
|
for result in det_results:
|
57
|
-
class_ids
|
58
|
-
if class_ids:
|
58
|
+
if class_ids := result.boxes.cls.int().tolist(): # Extract class IDs from detection results
|
59
59
|
boxes = result.boxes.xyxy # Boxes object for bbox outputs
|
60
60
|
sam_results = sam_model(result.orig_img, bboxes=boxes, verbose=False, save=False, device=device)
|
61
61
|
segments = sam_results[0].masks.xyn
|
ultralytics/data/augment.py
CHANGED
@@ -1,9 +1,11 @@
|
|
1
1
|
# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
|
2
2
|
|
3
|
+
from __future__ import annotations
|
4
|
+
|
3
5
|
import math
|
4
6
|
import random
|
5
7
|
from copy import deepcopy
|
6
|
-
from typing import Any
|
8
|
+
from typing import Any
|
7
9
|
|
8
10
|
import cv2
|
9
11
|
import numpy as np
|
@@ -231,7 +233,7 @@ class Compose:
|
|
231
233
|
"""
|
232
234
|
self.transforms.insert(index, transform)
|
233
235
|
|
234
|
-
def __getitem__(self, index:
|
236
|
+
def __getitem__(self, index: list | int) -> Compose:
|
235
237
|
"""
|
236
238
|
Retrieve a specific transform or a set of transforms using indexing.
|
237
239
|
|
@@ -253,7 +255,7 @@ class Compose:
|
|
253
255
|
assert isinstance(index, (int, list)), f"The indices should be either list or int type but got {type(index)}"
|
254
256
|
return Compose([self.transforms[i] for i in index]) if isinstance(index, list) else self.transforms[index]
|
255
257
|
|
256
|
-
def __setitem__(self, index:
|
258
|
+
def __setitem__(self, index: list | int, value: list | int) -> None:
|
257
259
|
"""
|
258
260
|
Set one or more transforms in the composition using indexing.
|
259
261
|
|
@@ -366,7 +368,7 @@ class BaseMixTransform:
|
|
366
368
|
self.pre_transform = pre_transform
|
367
369
|
self.p = p
|
368
370
|
|
369
|
-
def __call__(self, labels:
|
371
|
+
def __call__(self, labels: dict[str, Any]) -> dict[str, Any]:
|
370
372
|
"""
|
371
373
|
Apply pre-processing transforms and cutmix/mixup/mosaic transforms to labels data.
|
372
374
|
|
@@ -406,7 +408,7 @@ class BaseMixTransform:
|
|
406
408
|
labels.pop("mix_labels", None)
|
407
409
|
return labels
|
408
410
|
|
409
|
-
def _mix_transform(self, labels:
|
411
|
+
def _mix_transform(self, labels: dict[str, Any]):
|
410
412
|
"""
|
411
413
|
Apply CutMix, MixUp or Mosaic augmentation to the label dictionary.
|
412
414
|
|
@@ -442,7 +444,7 @@ class BaseMixTransform:
|
|
442
444
|
return random.randint(0, len(self.dataset) - 1)
|
443
445
|
|
444
446
|
@staticmethod
|
445
|
-
def _update_label_text(labels:
|
447
|
+
def _update_label_text(labels: dict[str, Any]) -> dict[str, Any]:
|
446
448
|
"""
|
447
449
|
Update label text and class IDs for mixed labels in image augmentation.
|
448
450
|
|
@@ -564,7 +566,7 @@ class Mosaic(BaseMixTransform):
|
|
564
566
|
else: # select any images
|
565
567
|
return [random.randint(0, len(self.dataset) - 1) for _ in range(self.n - 1)]
|
566
568
|
|
567
|
-
def _mix_transform(self, labels:
|
569
|
+
def _mix_transform(self, labels: dict[str, Any]) -> dict[str, Any]:
|
568
570
|
"""
|
569
571
|
Apply mosaic augmentation to the input image and labels.
|
570
572
|
|
@@ -587,13 +589,13 @@ class Mosaic(BaseMixTransform):
|
|
587
589
|
>>> mosaic = Mosaic(dataset, imgsz=640, p=1.0, n=4)
|
588
590
|
>>> augmented_data = mosaic._mix_transform(labels)
|
589
591
|
"""
|
590
|
-
assert labels.get("rect_shape"
|
592
|
+
assert labels.get("rect_shape") is None, "rect and mosaic are mutually exclusive."
|
591
593
|
assert len(labels.get("mix_labels", [])), "There are no other images for mosaic augment."
|
592
594
|
return (
|
593
595
|
self._mosaic3(labels) if self.n == 3 else self._mosaic4(labels) if self.n == 4 else self._mosaic9(labels)
|
594
596
|
) # This code is modified for mosaic3 method.
|
595
597
|
|
596
|
-
def _mosaic3(self, labels:
|
598
|
+
def _mosaic3(self, labels: dict[str, Any]) -> dict[str, Any]:
|
597
599
|
"""
|
598
600
|
Create a 1x3 image mosaic by combining three images.
|
599
601
|
|
@@ -652,7 +654,7 @@ class Mosaic(BaseMixTransform):
|
|
652
654
|
final_labels["img"] = img3[-self.border[0] : self.border[0], -self.border[1] : self.border[1]]
|
653
655
|
return final_labels
|
654
656
|
|
655
|
-
def _mosaic4(self, labels:
|
657
|
+
def _mosaic4(self, labels: dict[str, Any]) -> dict[str, Any]:
|
656
658
|
"""
|
657
659
|
Create a 2x2 image mosaic from four input images.
|
658
660
|
|
@@ -710,7 +712,7 @@ class Mosaic(BaseMixTransform):
|
|
710
712
|
final_labels["img"] = img4
|
711
713
|
return final_labels
|
712
714
|
|
713
|
-
def _mosaic9(self, labels:
|
715
|
+
def _mosaic9(self, labels: dict[str, Any]) -> dict[str, Any]:
|
714
716
|
"""
|
715
717
|
Create a 3x3 image mosaic from the input image and eight additional images.
|
716
718
|
|
@@ -783,7 +785,7 @@ class Mosaic(BaseMixTransform):
|
|
783
785
|
return final_labels
|
784
786
|
|
785
787
|
@staticmethod
|
786
|
-
def _update_labels(labels, padw: int, padh: int) ->
|
788
|
+
def _update_labels(labels, padw: int, padh: int) -> dict[str, Any]:
|
787
789
|
"""
|
788
790
|
Update label coordinates with padding values.
|
789
791
|
|
@@ -809,7 +811,7 @@ class Mosaic(BaseMixTransform):
|
|
809
811
|
labels["instances"].add_padding(padw, padh)
|
810
812
|
return labels
|
811
813
|
|
812
|
-
def _cat_labels(self, mosaic_labels:
|
814
|
+
def _cat_labels(self, mosaic_labels: list[dict[str, Any]]) -> dict[str, Any]:
|
813
815
|
"""
|
814
816
|
Concatenate and process labels for mosaic augmentation.
|
815
817
|
|
@@ -836,7 +838,7 @@ class Mosaic(BaseMixTransform):
|
|
836
838
|
>>> print(result.keys())
|
837
839
|
dict_keys(['im_file', 'ori_shape', 'resized_shape', 'cls', 'instances', 'mosaic_border'])
|
838
840
|
"""
|
839
|
-
if
|
841
|
+
if not mosaic_labels:
|
840
842
|
return {}
|
841
843
|
cls = []
|
842
844
|
instances = []
|
@@ -902,7 +904,7 @@ class MixUp(BaseMixTransform):
|
|
902
904
|
"""
|
903
905
|
super().__init__(dataset=dataset, pre_transform=pre_transform, p=p)
|
904
906
|
|
905
|
-
def _mix_transform(self, labels:
|
907
|
+
def _mix_transform(self, labels: dict[str, Any]) -> dict[str, Any]:
|
906
908
|
"""
|
907
909
|
Apply MixUp augmentation to the input labels.
|
908
910
|
|
@@ -967,7 +969,7 @@ class CutMix(BaseMixTransform):
|
|
967
969
|
self.beta = beta
|
968
970
|
self.num_areas = num_areas
|
969
971
|
|
970
|
-
def _rand_bbox(self, width: int, height: int) ->
|
972
|
+
def _rand_bbox(self, width: int, height: int) -> tuple[int, int, int, int]:
|
971
973
|
"""
|
972
974
|
Generate random bounding box coordinates for the cut region.
|
973
975
|
|
@@ -997,7 +999,7 @@ class CutMix(BaseMixTransform):
|
|
997
999
|
|
998
1000
|
return x1, y1, x2, y2
|
999
1001
|
|
1000
|
-
def _mix_transform(self, labels:
|
1002
|
+
def _mix_transform(self, labels: dict[str, Any]) -> dict[str, Any]:
|
1001
1003
|
"""
|
1002
1004
|
Apply CutMix augmentation to the input labels.
|
1003
1005
|
|
@@ -1086,7 +1088,7 @@ class RandomPerspective:
|
|
1086
1088
|
scale: float = 0.5,
|
1087
1089
|
shear: float = 0.0,
|
1088
1090
|
perspective: float = 0.0,
|
1089
|
-
border:
|
1091
|
+
border: tuple[int, int] = (0, 0),
|
1090
1092
|
pre_transform=None,
|
1091
1093
|
):
|
1092
1094
|
"""
|
@@ -1117,7 +1119,7 @@ class RandomPerspective:
|
|
1117
1119
|
self.border = border # mosaic border
|
1118
1120
|
self.pre_transform = pre_transform
|
1119
1121
|
|
1120
|
-
def affine_transform(self, img: np.ndarray, border:
|
1122
|
+
def affine_transform(self, img: np.ndarray, border: tuple[int, int]) -> tuple[np.ndarray, np.ndarray, float]:
|
1121
1123
|
"""
|
1122
1124
|
Apply a sequence of affine transformations centered around the image center.
|
1123
1125
|
|
@@ -1215,7 +1217,7 @@ class RandomPerspective:
|
|
1215
1217
|
y = xy[:, [1, 3, 5, 7]]
|
1216
1218
|
return np.concatenate((x.min(1), y.min(1), x.max(1), y.max(1)), dtype=bboxes.dtype).reshape(4, n).T
|
1217
1219
|
|
1218
|
-
def apply_segments(self, segments: np.ndarray, M: np.ndarray) ->
|
1220
|
+
def apply_segments(self, segments: np.ndarray, M: np.ndarray) -> tuple[np.ndarray, np.ndarray]:
|
1219
1221
|
"""
|
1220
1222
|
Apply affine transformations to segments and generate new bounding boxes.
|
1221
1223
|
|
@@ -1285,7 +1287,7 @@ class RandomPerspective:
|
|
1285
1287
|
visible[out_mask] = 0
|
1286
1288
|
return np.concatenate([xy, visible], axis=-1).reshape(n, nkpt, 3)
|
1287
1289
|
|
1288
|
-
def __call__(self, labels:
|
1290
|
+
def __call__(self, labels: dict[str, Any]) -> dict[str, Any]:
|
1289
1291
|
"""
|
1290
1292
|
Apply random perspective and affine transformations to an image and its associated labels.
|
1291
1293
|
|
@@ -1453,7 +1455,7 @@ class RandomHSV:
|
|
1453
1455
|
self.sgain = sgain
|
1454
1456
|
self.vgain = vgain
|
1455
1457
|
|
1456
|
-
def __call__(self, labels:
|
1458
|
+
def __call__(self, labels: dict[str, Any]) -> dict[str, Any]:
|
1457
1459
|
"""
|
1458
1460
|
Apply random HSV augmentation to an image within predefined limits.
|
1459
1461
|
|
@@ -1515,7 +1517,7 @@ class RandomFlip:
|
|
1515
1517
|
>>> flipped_instances = result["instances"]
|
1516
1518
|
"""
|
1517
1519
|
|
1518
|
-
def __init__(self, p: float = 0.5, direction: str = "horizontal", flip_idx:
|
1520
|
+
def __init__(self, p: float = 0.5, direction: str = "horizontal", flip_idx: list[int] = None) -> None:
|
1519
1521
|
"""
|
1520
1522
|
Initialize the RandomFlip class with probability and direction.
|
1521
1523
|
|
@@ -1541,7 +1543,7 @@ class RandomFlip:
|
|
1541
1543
|
self.direction = direction
|
1542
1544
|
self.flip_idx = flip_idx
|
1543
1545
|
|
1544
|
-
def __call__(self, labels:
|
1546
|
+
def __call__(self, labels: dict[str, Any]) -> dict[str, Any]:
|
1545
1547
|
"""
|
1546
1548
|
Apply random flip to an image and update any instances like bounding boxes or keypoints accordingly.
|
1547
1549
|
|
@@ -1615,7 +1617,7 @@ class LetterBox:
|
|
1615
1617
|
|
1616
1618
|
def __init__(
|
1617
1619
|
self,
|
1618
|
-
new_shape:
|
1620
|
+
new_shape: tuple[int, int] = (640, 640),
|
1619
1621
|
auto: bool = False,
|
1620
1622
|
scale_fill: bool = False,
|
1621
1623
|
scaleup: bool = True,
|
@@ -1662,7 +1664,7 @@ class LetterBox:
|
|
1662
1664
|
self.padding_value = padding_value
|
1663
1665
|
self.interpolation = interpolation
|
1664
1666
|
|
1665
|
-
def __call__(self, labels:
|
1667
|
+
def __call__(self, labels: dict[str, Any] = None, image: np.ndarray = None) -> dict[str, Any] | np.ndarray:
|
1666
1668
|
"""
|
1667
1669
|
Resize and pad an image for object detection, instance segmentation, or pose estimation tasks.
|
1668
1670
|
|
@@ -1741,7 +1743,7 @@ class LetterBox:
|
|
1741
1743
|
return img
|
1742
1744
|
|
1743
1745
|
@staticmethod
|
1744
|
-
def _update_labels(labels:
|
1746
|
+
def _update_labels(labels: dict[str, Any], ratio: tuple[float, float], padw: float, padh: float) -> dict[str, Any]:
|
1745
1747
|
"""
|
1746
1748
|
Update labels after applying letterboxing to an image.
|
1747
1749
|
|
@@ -1801,12 +1803,12 @@ class CopyPaste(BaseMixTransform):
|
|
1801
1803
|
assert mode in {"flip", "mixup"}, f"Expected `mode` to be `flip` or `mixup`, but got {mode}."
|
1802
1804
|
self.mode = mode
|
1803
1805
|
|
1804
|
-
def _mix_transform(self, labels:
|
1806
|
+
def _mix_transform(self, labels: dict[str, Any]) -> dict[str, Any]:
|
1805
1807
|
"""Apply Copy-Paste augmentation to combine objects from another image into the current image."""
|
1806
1808
|
labels2 = labels["mix_labels"][0]
|
1807
1809
|
return self._transform(labels, labels2)
|
1808
1810
|
|
1809
|
-
def __call__(self, labels:
|
1811
|
+
def __call__(self, labels: dict[str, Any]) -> dict[str, Any]:
|
1810
1812
|
"""Apply Copy-Paste augmentation to an image and its labels."""
|
1811
1813
|
if len(labels["instances"].segments) == 0 or self.p == 0:
|
1812
1814
|
return labels
|
@@ -1833,7 +1835,7 @@ class CopyPaste(BaseMixTransform):
|
|
1833
1835
|
labels.pop("mix_labels", None)
|
1834
1836
|
return labels
|
1835
1837
|
|
1836
|
-
def _transform(self, labels1:
|
1838
|
+
def _transform(self, labels1: dict[str, Any], labels2: dict[str, Any] = {}) -> dict[str, Any]:
|
1837
1839
|
"""Apply Copy-Paste augmentation to combine objects from another image into the current image."""
|
1838
1840
|
im = labels1["img"]
|
1839
1841
|
if "mosaic_border" not in labels1:
|
@@ -2011,7 +2013,7 @@ class Albumentations:
|
|
2011
2013
|
except Exception as e:
|
2012
2014
|
LOGGER.info(f"{prefix}{e}")
|
2013
2015
|
|
2014
|
-
def __call__(self, labels:
|
2016
|
+
def __call__(self, labels: dict[str, Any]) -> dict[str, Any]:
|
2015
2017
|
"""
|
2016
2018
|
Apply Albumentations transformations to input labels.
|
2017
2019
|
|
@@ -2153,7 +2155,7 @@ class Format:
|
|
2153
2155
|
self.batch_idx = batch_idx # keep the batch indexes
|
2154
2156
|
self.bgr = bgr
|
2155
2157
|
|
2156
|
-
def __call__(self, labels:
|
2158
|
+
def __call__(self, labels: dict[str, Any]) -> dict[str, Any]:
|
2157
2159
|
"""
|
2158
2160
|
Format image annotations for object detection, instance segmentation, and pose estimation tasks.
|
2159
2161
|
|
@@ -2255,7 +2257,7 @@ class Format:
|
|
2255
2257
|
|
2256
2258
|
def _format_segments(
|
2257
2259
|
self, instances: Instances, cls: np.ndarray, w: int, h: int
|
2258
|
-
) ->
|
2260
|
+
) -> tuple[np.ndarray, Instances, np.ndarray]:
|
2259
2261
|
"""
|
2260
2262
|
Convert polygon segments to bitmap masks.
|
2261
2263
|
|
@@ -2317,7 +2319,7 @@ class LoadVisualPrompt:
|
|
2317
2319
|
|
2318
2320
|
return (r >= x1) * (r < x2) * (c >= y1) * (c < y2)
|
2319
2321
|
|
2320
|
-
def __call__(self, labels:
|
2322
|
+
def __call__(self, labels: dict[str, Any]) -> dict[str, Any]:
|
2321
2323
|
"""
|
2322
2324
|
Process labels to create visual prompts.
|
2323
2325
|
|
@@ -2340,10 +2342,10 @@ class LoadVisualPrompt:
|
|
2340
2342
|
|
2341
2343
|
def get_visuals(
|
2342
2344
|
self,
|
2343
|
-
category:
|
2344
|
-
shape:
|
2345
|
-
bboxes:
|
2346
|
-
masks:
|
2345
|
+
category: int | np.ndarray | torch.Tensor,
|
2346
|
+
shape: tuple[int, int],
|
2347
|
+
bboxes: np.ndarray | torch.Tensor = None,
|
2348
|
+
masks: np.ndarray | torch.Tensor = None,
|
2347
2349
|
) -> torch.Tensor:
|
2348
2350
|
"""
|
2349
2351
|
Generate visual masks based on bounding boxes or masks.
|
@@ -2415,10 +2417,10 @@ class RandomLoadText:
|
|
2415
2417
|
def __init__(
|
2416
2418
|
self,
|
2417
2419
|
prompt_format: str = "{}",
|
2418
|
-
neg_samples:
|
2420
|
+
neg_samples: tuple[int, int] = (80, 80),
|
2419
2421
|
max_samples: int = 80,
|
2420
2422
|
padding: bool = False,
|
2421
|
-
padding_value:
|
2423
|
+
padding_value: list[str] = [""],
|
2422
2424
|
) -> None:
|
2423
2425
|
"""
|
2424
2426
|
Initialize the RandomLoadText class for randomly sampling positive and negative texts.
|
@@ -2459,7 +2461,7 @@ class RandomLoadText:
|
|
2459
2461
|
self.padding = padding
|
2460
2462
|
self.padding_value = padding_value
|
2461
2463
|
|
2462
|
-
def __call__(self, labels:
|
2464
|
+
def __call__(self, labels: dict[str, Any]) -> dict[str, Any]:
|
2463
2465
|
"""
|
2464
2466
|
Randomly sample positive and negative texts and update class indices accordingly.
|
2465
2467
|
|
@@ -2595,9 +2597,9 @@ def v8_transforms(dataset, imgsz: int, hyp: IterableSimpleNamespace, stretch: bo
|
|
2595
2597
|
|
2596
2598
|
# Classification augmentations -----------------------------------------------------------------------------------------
|
2597
2599
|
def classify_transforms(
|
2598
|
-
size:
|
2599
|
-
mean:
|
2600
|
-
std:
|
2600
|
+
size: tuple[int, int] | int = 224,
|
2601
|
+
mean: tuple[float, float, float] = DEFAULT_MEAN,
|
2602
|
+
std: tuple[float, float, float] = DEFAULT_STD,
|
2601
2603
|
interpolation: str = "BILINEAR",
|
2602
2604
|
crop_fraction: float = None,
|
2603
2605
|
):
|
@@ -2647,10 +2649,10 @@ def classify_transforms(
|
|
2647
2649
|
# Classification training augmentations --------------------------------------------------------------------------------
|
2648
2650
|
def classify_augmentations(
|
2649
2651
|
size: int = 224,
|
2650
|
-
mean:
|
2651
|
-
std:
|
2652
|
-
scale:
|
2653
|
-
ratio:
|
2652
|
+
mean: tuple[float, float, float] = DEFAULT_MEAN,
|
2653
|
+
std: tuple[float, float, float] = DEFAULT_STD,
|
2654
|
+
scale: tuple[float, float] = None,
|
2655
|
+
ratio: tuple[float, float] = None,
|
2654
2656
|
hflip: float = 0.5,
|
2655
2657
|
vflip: float = 0.0,
|
2656
2658
|
auto_augment: str = None,
|
@@ -2773,7 +2775,7 @@ class ClassifyLetterBox:
|
|
2773
2775
|
(640, 640, 3)
|
2774
2776
|
"""
|
2775
2777
|
|
2776
|
-
def __init__(self, size:
|
2778
|
+
def __init__(self, size: int | tuple[int, int] = (640, 640), auto: bool = False, stride: int = 32):
|
2777
2779
|
"""
|
2778
2780
|
Initialize the ClassifyLetterBox object for image preprocessing.
|
2779
2781
|
|
@@ -2862,7 +2864,7 @@ class CenterCrop:
|
|
2862
2864
|
(640, 640, 3)
|
2863
2865
|
"""
|
2864
2866
|
|
2865
|
-
def __init__(self, size:
|
2867
|
+
def __init__(self, size: int | tuple[int, int] = (640, 640)):
|
2866
2868
|
"""
|
2867
2869
|
Initialize the CenterCrop object for image preprocessing.
|
2868
2870
|
|
@@ -2886,7 +2888,7 @@ class CenterCrop:
|
|
2886
2888
|
super().__init__()
|
2887
2889
|
self.h, self.w = (size, size) if isinstance(size, int) else size
|
2888
2890
|
|
2889
|
-
def __call__(self, im:
|
2891
|
+
def __call__(self, im: Image.Image | np.ndarray) -> np.ndarray:
|
2890
2892
|
"""
|
2891
2893
|
Apply center cropping to an input image.
|
2892
2894
|
|