PyPI - ultralytics - Versions diffs - 8.1.29__py3-none-any.whl → 8.3.62__py3-none-any.whl - Mend

ultralytics 8.1.29py3-none-any.whl → 8.3.62py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (247) hide show

tests/__init__.py +22 -0
tests/conftest.py +83 -0
tests/test_cli.py +122 -0
tests/test_cuda.py +155 -0
tests/test_engine.py +131 -0
tests/test_exports.py +216 -0
tests/test_integrations.py +150 -0
tests/test_python.py +615 -0
tests/test_solutions.py +94 -0
ultralytics/__init__.py +11 -8
ultralytics/cfg/__init__.py +569 -131
ultralytics/cfg/datasets/Argoverse.yaml +2 -1
ultralytics/cfg/datasets/DOTAv1.5.yaml +3 -2
ultralytics/cfg/datasets/DOTAv1.yaml +3 -2
ultralytics/cfg/datasets/GlobalWheat2020.yaml +3 -2
ultralytics/cfg/datasets/ImageNet.yaml +2 -1
ultralytics/cfg/datasets/Objects365.yaml +5 -4
ultralytics/cfg/datasets/SKU-110K.yaml +2 -1
ultralytics/cfg/datasets/VOC.yaml +3 -2
ultralytics/cfg/datasets/VisDrone.yaml +6 -5
ultralytics/cfg/datasets/african-wildlife.yaml +25 -0
ultralytics/cfg/datasets/brain-tumor.yaml +23 -0
ultralytics/cfg/datasets/carparts-seg.yaml +3 -2
ultralytics/cfg/datasets/coco-pose.yaml +7 -6
ultralytics/cfg/datasets/coco.yaml +3 -2
ultralytics/cfg/datasets/coco128-seg.yaml +4 -3
ultralytics/cfg/datasets/coco128.yaml +4 -3
ultralytics/cfg/datasets/coco8-pose.yaml +3 -2
ultralytics/cfg/datasets/coco8-seg.yaml +3 -2
ultralytics/cfg/datasets/coco8.yaml +3 -2
ultralytics/cfg/datasets/crack-seg.yaml +3 -2
ultralytics/cfg/datasets/dog-pose.yaml +24 -0
ultralytics/cfg/datasets/dota8.yaml +3 -2
ultralytics/cfg/datasets/hand-keypoints.yaml +26 -0
ultralytics/cfg/datasets/lvis.yaml +1236 -0
ultralytics/cfg/datasets/medical-pills.yaml +22 -0
ultralytics/cfg/datasets/open-images-v7.yaml +2 -1
ultralytics/cfg/datasets/package-seg.yaml +5 -4
ultralytics/cfg/datasets/signature.yaml +21 -0
ultralytics/cfg/datasets/tiger-pose.yaml +3 -2
ultralytics/cfg/datasets/xView.yaml +2 -1
ultralytics/cfg/default.yaml +14 -11
ultralytics/cfg/models/11/yolo11-cls-resnet18.yaml +24 -0
ultralytics/cfg/models/11/yolo11-cls.yaml +33 -0
ultralytics/cfg/models/11/yolo11-obb.yaml +50 -0
ultralytics/cfg/models/11/yolo11-pose.yaml +51 -0
ultralytics/cfg/models/11/yolo11-seg.yaml +50 -0
ultralytics/cfg/models/11/yolo11.yaml +50 -0
ultralytics/cfg/models/rt-detr/rtdetr-l.yaml +5 -2
ultralytics/cfg/models/rt-detr/rtdetr-resnet101.yaml +5 -2
ultralytics/cfg/models/rt-detr/rtdetr-resnet50.yaml +5 -2
ultralytics/cfg/models/rt-detr/rtdetr-x.yaml +5 -2
ultralytics/cfg/models/v10/yolov10b.yaml +45 -0
ultralytics/cfg/models/v10/yolov10l.yaml +45 -0
ultralytics/cfg/models/v10/yolov10m.yaml +45 -0
ultralytics/cfg/models/v10/yolov10n.yaml +45 -0
ultralytics/cfg/models/v10/yolov10s.yaml +45 -0
ultralytics/cfg/models/v10/yolov10x.yaml +45 -0
ultralytics/cfg/models/v3/yolov3-spp.yaml +5 -2
ultralytics/cfg/models/v3/yolov3-tiny.yaml +5 -2
ultralytics/cfg/models/v3/yolov3.yaml +5 -2
ultralytics/cfg/models/v5/yolov5-p6.yaml +5 -2
ultralytics/cfg/models/v5/yolov5.yaml +5 -2
ultralytics/cfg/models/v6/yolov6.yaml +5 -2
ultralytics/cfg/models/v8/yolov8-cls-resnet101.yaml +5 -2
ultralytics/cfg/models/v8/yolov8-cls-resnet50.yaml +5 -2
ultralytics/cfg/models/v8/yolov8-cls.yaml +5 -2
ultralytics/cfg/models/v8/yolov8-ghost-p2.yaml +6 -2
ultralytics/cfg/models/v8/yolov8-ghost-p6.yaml +6 -2
ultralytics/cfg/models/v8/yolov8-ghost.yaml +5 -2
ultralytics/cfg/models/v8/yolov8-obb.yaml +5 -2
ultralytics/cfg/models/v8/yolov8-p2.yaml +5 -2
ultralytics/cfg/models/v8/yolov8-p6.yaml +10 -7
ultralytics/cfg/models/v8/yolov8-pose-p6.yaml +5 -2
ultralytics/cfg/models/v8/yolov8-pose.yaml +5 -2
ultralytics/cfg/models/v8/yolov8-rtdetr.yaml +5 -2
ultralytics/cfg/models/v8/yolov8-seg-p6.yaml +5 -2
ultralytics/cfg/models/v8/yolov8-seg.yaml +5 -2
ultralytics/cfg/models/v8/yolov8-world.yaml +5 -2
ultralytics/cfg/models/v8/yolov8-worldv2.yaml +5 -2
ultralytics/cfg/models/v8/yolov8.yaml +5 -2
ultralytics/cfg/models/v9/yolov9c-seg.yaml +41 -0
ultralytics/cfg/models/v9/yolov9c.yaml +30 -25
ultralytics/cfg/models/v9/yolov9e-seg.yaml +64 -0
ultralytics/cfg/models/v9/yolov9e.yaml +46 -42
ultralytics/cfg/models/v9/yolov9m.yaml +41 -0
ultralytics/cfg/models/v9/yolov9s.yaml +41 -0
ultralytics/cfg/models/v9/yolov9t.yaml +41 -0
ultralytics/cfg/solutions/default.yaml +24 -0
ultralytics/cfg/trackers/botsort.yaml +8 -5
ultralytics/cfg/trackers/bytetrack.yaml +8 -5
ultralytics/data/__init__.py +14 -3
ultralytics/data/annotator.py +37 -15
ultralytics/data/augment.py +1783 -289
ultralytics/data/base.py +62 -27
ultralytics/data/build.py +36 -8
ultralytics/data/converter.py +196 -36
ultralytics/data/dataset.py +233 -94
ultralytics/data/loaders.py +199 -96
ultralytics/data/split_dota.py +39 -29
ultralytics/data/utils.py +110 -40
ultralytics/engine/__init__.py +1 -1
ultralytics/engine/exporter.py +569 -242
ultralytics/engine/model.py +604 -252
ultralytics/engine/predictor.py +22 -11
ultralytics/engine/results.py +1228 -218
ultralytics/engine/trainer.py +190 -129
ultralytics/engine/tuner.py +18 -18
ultralytics/engine/validator.py +18 -15
ultralytics/hub/__init__.py +31 -13
ultralytics/hub/auth.py +11 -7
ultralytics/hub/google/__init__.py +159 -0
ultralytics/hub/session.py +128 -94
ultralytics/hub/utils.py +20 -21
ultralytics/models/__init__.py +4 -2
ultralytics/models/fastsam/__init__.py +2 -3
ultralytics/models/fastsam/model.py +26 -4
ultralytics/models/fastsam/predict.py +127 -63
ultralytics/models/fastsam/utils.py +1 -44
ultralytics/models/fastsam/val.py +1 -1
ultralytics/models/nas/__init__.py +1 -1
ultralytics/models/nas/model.py +21 -10
ultralytics/models/nas/predict.py +3 -6
ultralytics/models/nas/val.py +4 -4
ultralytics/models/rtdetr/__init__.py +1 -1
ultralytics/models/rtdetr/model.py +1 -1
ultralytics/models/rtdetr/predict.py +6 -8
ultralytics/models/rtdetr/train.py +6 -2
ultralytics/models/rtdetr/val.py +3 -3
ultralytics/models/sam/__init__.py +3 -3
ultralytics/models/sam/amg.py +29 -23
ultralytics/models/sam/build.py +211 -13
ultralytics/models/sam/model.py +91 -30
ultralytics/models/sam/modules/__init__.py +1 -1
ultralytics/models/sam/modules/blocks.py +1129 -0
ultralytics/models/sam/modules/decoders.py +381 -53
ultralytics/models/sam/modules/encoders.py +515 -324
ultralytics/models/sam/modules/memory_attention.py +237 -0
ultralytics/models/sam/modules/sam.py +969 -21
ultralytics/models/sam/modules/tiny_encoder.py +425 -154
ultralytics/models/sam/modules/transformer.py +159 -60
ultralytics/models/sam/modules/utils.py +293 -0
ultralytics/models/sam/predict.py +1263 -132
ultralytics/models/utils/__init__.py +1 -1
ultralytics/models/utils/loss.py +36 -24
ultralytics/models/utils/ops.py +3 -7
ultralytics/models/yolo/__init__.py +3 -3
ultralytics/models/yolo/classify/__init__.py +1 -1
ultralytics/models/yolo/classify/predict.py +7 -8
ultralytics/models/yolo/classify/train.py +17 -22
ultralytics/models/yolo/classify/val.py +8 -4
ultralytics/models/yolo/detect/__init__.py +1 -1
ultralytics/models/yolo/detect/predict.py +3 -5
ultralytics/models/yolo/detect/train.py +11 -4
ultralytics/models/yolo/detect/val.py +90 -52
ultralytics/models/yolo/model.py +14 -9
ultralytics/models/yolo/obb/__init__.py +1 -1
ultralytics/models/yolo/obb/predict.py +2 -2
ultralytics/models/yolo/obb/train.py +5 -3
ultralytics/models/yolo/obb/val.py +41 -23
ultralytics/models/yolo/pose/__init__.py +1 -1
ultralytics/models/yolo/pose/predict.py +3 -5
ultralytics/models/yolo/pose/train.py +2 -2
ultralytics/models/yolo/pose/val.py +51 -17
ultralytics/models/yolo/segment/__init__.py +1 -1
ultralytics/models/yolo/segment/predict.py +3 -5
ultralytics/models/yolo/segment/train.py +2 -2
ultralytics/models/yolo/segment/val.py +60 -19
ultralytics/models/yolo/world/__init__.py +5 -0
ultralytics/models/yolo/world/train.py +92 -0
ultralytics/models/yolo/world/train_world.py +109 -0
ultralytics/nn/__init__.py +1 -1
ultralytics/nn/autobackend.py +228 -93
ultralytics/nn/modules/__init__.py +39 -14
ultralytics/nn/modules/activation.py +21 -0
ultralytics/nn/modules/block.py +526 -66
ultralytics/nn/modules/conv.py +24 -7
ultralytics/nn/modules/head.py +177 -34
ultralytics/nn/modules/transformer.py +6 -5
ultralytics/nn/modules/utils.py +1 -2
ultralytics/nn/tasks.py +225 -77
ultralytics/solutions/__init__.py +30 -1
ultralytics/solutions/ai_gym.py +96 -143
ultralytics/solutions/analytics.py +247 -0
ultralytics/solutions/distance_calculation.py +78 -135
ultralytics/solutions/heatmap.py +93 -247
ultralytics/solutions/object_counter.py +184 -259
ultralytics/solutions/parking_management.py +246 -0
ultralytics/solutions/queue_management.py +112 -0
ultralytics/solutions/region_counter.py +116 -0
ultralytics/solutions/security_alarm.py +144 -0
ultralytics/solutions/solutions.py +178 -0
ultralytics/solutions/speed_estimation.py +86 -174
ultralytics/solutions/streamlit_inference.py +190 -0
ultralytics/solutions/trackzone.py +68 -0
ultralytics/trackers/__init__.py +1 -1
ultralytics/trackers/basetrack.py +32 -13
ultralytics/trackers/bot_sort.py +61 -28
ultralytics/trackers/byte_tracker.py +83 -51
ultralytics/trackers/track.py +21 -6
ultralytics/trackers/utils/__init__.py +1 -1
ultralytics/trackers/utils/gmc.py +62 -48
ultralytics/trackers/utils/kalman_filter.py +166 -35
ultralytics/trackers/utils/matching.py +40 -21
ultralytics/utils/__init__.py +511 -239
ultralytics/utils/autobatch.py +40 -22
ultralytics/utils/benchmarks.py +266 -85
ultralytics/utils/callbacks/__init__.py +1 -1
ultralytics/utils/callbacks/base.py +1 -3
ultralytics/utils/callbacks/clearml.py +7 -6
ultralytics/utils/callbacks/comet.py +39 -17
ultralytics/utils/callbacks/dvc.py +1 -1
ultralytics/utils/callbacks/hub.py +16 -16
ultralytics/utils/callbacks/mlflow.py +28 -24
ultralytics/utils/callbacks/neptune.py +6 -2
ultralytics/utils/callbacks/raytune.py +3 -4
ultralytics/utils/callbacks/tensorboard.py +18 -18
ultralytics/utils/callbacks/wb.py +27 -20
ultralytics/utils/checks.py +160 -100
ultralytics/utils/dist.py +2 -1
ultralytics/utils/downloads.py +40 -34
ultralytics/utils/errors.py +1 -1
ultralytics/utils/files.py +72 -38
ultralytics/utils/instance.py +41 -19
ultralytics/utils/loss.py +83 -55
ultralytics/utils/metrics.py +61 -56
ultralytics/utils/ops.py +94 -89
ultralytics/utils/patches.py +30 -14
ultralytics/utils/plotting.py +600 -269
ultralytics/utils/tal.py +67 -26
ultralytics/utils/torch_utils.py +302 -102
ultralytics/utils/triton.py +2 -1
ultralytics/utils/tuner.py +21 -12
ultralytics-8.3.62.dist-info/METADATA +370 -0
ultralytics-8.3.62.dist-info/RECORD +241 -0
{ultralytics-8.1.29.dist-info → ultralytics-8.3.62.dist-info}/WHEEL +1 -1
ultralytics/data/explorer/__init__.py +0 -5
ultralytics/data/explorer/explorer.py +0 -472
ultralytics/data/explorer/gui/__init__.py +0 -1
ultralytics/data/explorer/gui/dash.py +0 -268
ultralytics/data/explorer/utils.py +0 -166
ultralytics/models/fastsam/prompt.py +0 -357
ultralytics-8.1.29.dist-info/METADATA +0 -373
ultralytics-8.1.29.dist-info/RECORD +0 -197
{ultralytics-8.1.29.dist-info → ultralytics-8.3.62.dist-info}/LICENSE +0 -0
{ultralytics-8.1.29.dist-info → ultralytics-8.3.62.dist-info}/entry_points.txt +0 -0
{ultralytics-8.1.29.dist-info → ultralytics-8.3.62.dist-info}/top_level.txt +0 -0

ultralytics/utils/torch_utils.py CHANGED Viewed

@@ -1,11 +1,13 @@
-# Ultralytics YOLO 🚀, AGPL-3.0 license
+# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
+import gc
 import math
 import os
 import random
 import time
 from contextlib import contextmanager
 from copy import deepcopy
+from datetime import datetime
 from pathlib import Path
 from typing import Union
@@ -14,33 +16,51 @@ import torch
 import torch.distributed as dist
 import torch.nn as nn
 import torch.nn.functional as F
-import torchvision
-from ultralytics.utils import DEFAULT_CFG_DICT, DEFAULT_CFG_KEYS, LOGGER, __version__
-from ultralytics.utils.checks import PYTHON_VERSION, check_version
+from ultralytics.utils import (
+    DEFAULT_CFG_DICT,
+    DEFAULT_CFG_KEYS,
+    LOGGER,
+    NUM_THREADS,
+    PYTHON_VERSION,
+    TORCHVISION_VERSION,
+    WINDOWS,
+    __version__,
+    colorstr,
+)
+from ultralytics.utils.checks import check_version
 try:
     import thop
 except ImportError:
     thop = None
+# Version checks (all default to version>=min_version)
 TORCH_1_9 = check_version(torch.__version__, "1.9.0")
 TORCH_1_13 = check_version(torch.__version__, "1.13.0")
 TORCH_2_0 = check_version(torch.__version__, "2.0.0")
-TORCHVISION_0_10 = check_version(torchvision.__version__, "0.10.0")
-TORCHVISION_0_11 = check_version(torchvision.__version__, "0.11.0")
-TORCHVISION_0_13 = check_version(torchvision.__version__, "0.13.0")
+TORCH_2_4 = check_version(torch.__version__, "2.4.0")
+TORCHVISION_0_10 = check_version(TORCHVISION_VERSION, "0.10.0")
+TORCHVISION_0_11 = check_version(TORCHVISION_VERSION, "0.11.0")
+TORCHVISION_0_13 = check_version(TORCHVISION_VERSION, "0.13.0")
+TORCHVISION_0_18 = check_version(TORCHVISION_VERSION, "0.18.0")
+if WINDOWS and check_version(torch.__version__, "==2.4.0"):  # reject version 2.4.0 on Windows
+    LOGGER.warning(
+        "WARNING ⚠️ Known issue with torch==2.4.0 on Windows with CPU, recommend upgrading to torch>=2.4.1 to resolve "
+        "https://github.com/ultralytics/ultralytics/issues/15049"
+    )
 @contextmanager
 def torch_distributed_zero_first(local_rank: int):
-    """Decorator to make all processes in distributed training wait for each local_master to do something."""
-    initialized = torch.distributed.is_available() and torch.distributed.is_initialized()
-    if initialized and local_rank not in (-1, 0):
+    """Ensures all processes in distributed training wait for the local master (rank 0) to complete a task first."""
+    initialized = dist.is_available() and dist.is_initialized()
+    if initialized and local_rank not in {-1, 0}:
         dist.barrier(device_ids=[local_rank])
     yield
     if initialized and local_rank == 0:
-        dist.barrier(device_ids=[0])
+        dist.barrier(device_ids=[local_rank])
 def smart_inference_mode():
@@ -56,14 +76,58 @@ def smart_inference_mode():
     return decorate
+def autocast(enabled: bool, device: str = "cuda"):
+    """
+    Get the appropriate autocast context manager based on PyTorch version and AMP setting.
+    This function returns a context manager for automatic mixed precision (AMP) training that is compatible with both
+    older and newer versions of PyTorch. It handles the differences in the autocast API between PyTorch versions.
+    Args:
+        enabled (bool): Whether to enable automatic mixed precision.
+        device (str, optional): The device to use for autocast. Defaults to 'cuda'.
+    Returns:
+        (torch.amp.autocast): The appropriate autocast context manager.
+    Note:
+        - For PyTorch versions 1.13 and newer, it uses `torch.amp.autocast`.
+        - For older versions, it uses `torch.cuda.autocast`.
+    Example:
+        ```python
+        with autocast(amp=True):
+            # Your mixed precision operations here
+            pass
+        ```
+    """
+    if TORCH_1_13:
+        return torch.amp.autocast(device, enabled=enabled)
+    else:
+        return torch.cuda.amp.autocast(enabled)
 def get_cpu_info():
     """Return a string with system CPU information, i.e. 'Apple M2'."""
-    import cpuinfo  # pip install py-cpuinfo
+    from ultralytics.utils import PERSISTENT_CACHE  # avoid circular import error
-    k = "brand_raw", "hardware_raw", "arch_string_raw"  # info keys sorted by preference (not all keys always available)
-    info = cpuinfo.get_cpu_info()  # info dict
-    string = info.get(k[0] if k[0] in info else k[1] if k[1] in info else k[2], "unknown")
-    return string.replace("(R)", "").replace("CPU ", "").replace("@ ", "")
+    if "cpu_info" not in PERSISTENT_CACHE:
+        try:
+            import cpuinfo  # pip install py-cpuinfo
+            k = "brand_raw", "hardware_raw", "arch_string_raw"  # keys sorted by preference
+            info = cpuinfo.get_cpu_info()  # info dict
+            string = info.get(k[0] if k[0] in info else k[1] if k[1] in info else k[2], "unknown")
+            PERSISTENT_CACHE["cpu_info"] = string.replace("(R)", "").replace("CPU ", "").replace("@ ", "")
+        except Exception:
+            pass
+    return PERSISTENT_CACHE.get("cpu_info", "unknown")
+def get_gpu_info(index):
+    """Return a string with system GPU information, i.e. 'Tesla T4, 15102MiB'."""
+    properties = torch.cuda.get_device_properties(index)
+    return f"{properties.name}, {properties.total_memory / (1 << 20):.0f}MiB"
 def select_device(device="", batch=0, newline=False, verbose=True):
@@ -90,30 +154,31 @@ def select_device(device="", batch=0, newline=False, verbose=True):
             devices when using multiple GPUs.
     Examples:
-        >>> select_device('cuda:0')
+        >>> select_device("cuda:0")
         device(type='cuda', index=0)
-        >>> select_device('cpu')
+        >>> select_device("cpu")
         device(type='cpu')
     Note:
         Sets the 'CUDA_VISIBLE_DEVICES' environment variable for specifying which GPUs to use.
     """
-    if isinstance(device, torch.device):
+    if isinstance(device, torch.device) or str(device).startswith("tpu"):
         return device
-    s = f"Ultralytics YOLOv{__version__} 🚀 Python-{PYTHON_VERSION} torch-{torch.__version__} "
+    s = f"Ultralytics {__version__} 🚀 Python-{PYTHON_VERSION} torch-{torch.__version__} "
     device = str(device).lower()
     for remove in "cuda:", "none", "(", ")", "[", "]", "'", " ":
         device = device.replace(remove, "")  # to string, 'cuda:0' -> '0' and '(0, 1)' -> '0,1'
     cpu = device == "cpu"
-    mps = device in ("mps", "mps:0")  # Apple Metal Performance Shaders (MPS)
+    mps = device in {"mps", "mps:0"}  # Apple Metal Performance Shaders (MPS)
     if cpu or mps:
         os.environ["CUDA_VISIBLE_DEVICES"] = "-1"  # force torch.cuda.is_available() = False
     elif device:  # non-cpu device requested
         if device == "cuda":
             device = "0"
+        if "," in device:
+            device = ",".join([x for x in device.split(",") if x])  # remove sequential commas, i.e. "0,,1" -> "0,1"
         visible = os.environ.get("CUDA_VISIBLE_DEVICES", None)
         os.environ["CUDA_VISIBLE_DEVICES"] = device  # set environment variable - must be before assert is_available()
         if not (torch.cuda.is_available() and torch.cuda.device_count() >= len(device.split(","))):
@@ -135,17 +200,22 @@ def select_device(device="", batch=0, newline=False, verbose=True):
             )
     if not cpu and not mps and torch.cuda.is_available():  # prefer GPU if available
-        devices = device.split(",") if device else "0"  # range(torch.cuda.device_count())  # i.e. 0,1,6,7
+        devices = device.split(",") if device else "0"  # i.e. "0,1" -> ["0", "1"]
         n = len(devices)  # device count
-        if n > 1 and batch > 0 and batch % n != 0:  # check batch_size is divisible by device_count
-            raise ValueError(
-                f"'batch={batch}' must be a multiple of GPU count {n}. Try 'batch={batch // n * n}' or "
-                f"'batch={batch // n * n + n}', the nearest batch sizes evenly divisible by {n}."
-            )
+        if n > 1:  # multi-GPU
+            if batch < 1:
+                raise ValueError(
+                    "AutoBatch with batch<1 not supported for Multi-GPU training, "
+                    "please specify a valid batch size, i.e. batch=16."
+                )
+            if batch >= 0 and batch % n != 0:  # check batch_size is divisible by device_count
+                raise ValueError(
+                    f"'batch={batch}' must be a multiple of GPU count {n}. Try 'batch={batch // n * n}' or "
+                    f"'batch={batch // n * n + n}', the nearest batch sizes evenly divisible by {n}."
+                )
         space = " " * (len(s) + 1)
         for i, d in enumerate(devices):
-            p = torch.cuda.get_device_properties(i)
-            s += f"{'' if i == 0 else space}CUDA:{d} ({p.name}, {p.total_memory / (1 << 20):.0f}MiB)\n"  # bytes to MB
+            s += f"{'' if i == 0 else space}CUDA:{d} ({get_gpu_info(i)})\n"  # bytes to MB
         arg = "cuda:0"
     elif mps and TORCH_2_0 and torch.backends.mps.is_available():
         # Prefer MPS if available
@@ -155,6 +225,8 @@ def select_device(device="", batch=0, newline=False, verbose=True):
         s += f"CPU ({get_cpu_info()})\n"
         arg = "cpu"
+    if arg in {"cpu", "mps"}:
+        torch.set_num_threads(NUM_THREADS)  # reset OMP_NUM_THREADS for cpu training
     if verbose:
         LOGGER.info(s if newline else s.rstrip())
     return torch.device(arg)
@@ -185,7 +257,7 @@ def fuse_conv_and_bn(conv, bn):
     )
     # Prepare filters
-    w_conv = conv.weight.clone().view(conv.out_channels, -1)
+    w_conv = conv.weight.view(conv.out_channels, -1)
     w_bn = torch.diag(bn.weight.div(torch.sqrt(bn.eps + bn.running_var)))
     fusedconv.weight.copy_(torch.mm(w_bn, w_conv).view(fusedconv.weight.shape))
@@ -216,7 +288,7 @@ def fuse_deconv_and_bn(deconv, bn):
     )
     # Prepare filters
-    w_deconv = deconv.weight.clone().view(deconv.out_channels, -1)
+    w_deconv = deconv.weight.view(deconv.out_channels, -1)
     w_bn = torch.diag(bn.weight.div(torch.sqrt(bn.eps + bn.running_var)))
     fuseddconv.weight.copy_(torch.mm(w_bn, w_deconv).view(fuseddconv.weight.shape))
@@ -229,33 +301,27 @@ def fuse_deconv_and_bn(deconv, bn):
 def model_info(model, detailed=False, verbose=True, imgsz=640):
-    """
-    Model information.
-    imgsz may be int or list, i.e. imgsz=640 or imgsz=[640, 320].
-    """
+    """Print and return detailed model information layer by layer."""
     if not verbose:
         return
     n_p = get_num_params(model)  # number of parameters
     n_g = get_num_gradients(model)  # number of gradients
     n_l = len(list(model.modules()))  # number of layers
     if detailed:
-        LOGGER.info(
-            f"{'layer':>5} {'name':>40} {'gradient':>9} {'parameters':>12} {'shape':>20} {'mu':>10} {'sigma':>10}"
-        )
+        LOGGER.info(f"{'layer':>5}{'name':>40}{'gradient':>10}{'parameters':>12}{'shape':>20}{'mu':>10}{'sigma':>10}")
         for i, (name, p) in enumerate(model.named_parameters()):
             name = name.replace("module_list.", "")
             LOGGER.info(
-                "%5g %40s %9s %12g %20s %10.3g %10.3g %10s"
-                % (i, name, p.requires_grad, p.numel(), list(p.shape), p.mean(), p.std(), p.dtype)
+                f"{i:>5g}{name:>40s}{p.requires_grad!r:>10}{p.numel():>12g}{str(list(p.shape)):>20s}"
+                f"{p.mean():>10.3g}{p.std():>10.3g}{str(p.dtype):>15s}"
             )
-    flops = get_flops(model, imgsz)
+    flops = get_flops(model, imgsz)  # imgsz may be int or list, i.e. imgsz=640 or imgsz=[640, 320]
     fused = " (fused)" if getattr(model, "is_fused", lambda: False)() else ""
     fs = f", {flops:.1f} GFLOPs" if flops else ""
     yaml_file = getattr(model, "yaml_file", "") or getattr(model, "yaml", {}).get("yaml_file", "")
     model_name = Path(yaml_file).stem.replace("yolo", "YOLO") or "Model"
-    LOGGER.info(f"{model_name} summary{fused}: {n_l} layers, {n_p} parameters, {n_g} gradients{fs}")
+    LOGGER.info(f"{model_name} summary{fused}: {n_l:,} layers, {n_p:,} parameters, {n_g:,} gradients{fs}")
     return n_l, n_p, n_g, flops
@@ -276,11 +342,13 @@ def model_info_for_loggers(trainer):
     Example:
         YOLOv8n info for loggers
         ```python
-        results = {'model/parameters': 3151904,
-                   'model/GFLOPs': 8.746,
-                   'model/speed_ONNX(ms)': 41.244,
-                   'model/speed_TensorRT(ms)': 3.211,
-                   'model/speed_PyTorch(ms)': 18.755}
+        results = {
+            "model/parameters": 3151904,
+            "model/GFLOPs": 8.746,
+            "model/speed_ONNX(ms)": 41.244,
+            "model/speed_TensorRT(ms)": 3.211,
+            "model/speed_PyTorch(ms)": 18.755,
+        }
         ```
     """
     if trainer.args.profile:  # profile ONNX and TensorRT times
@@ -322,19 +390,28 @@ def get_flops(model, imgsz=640):
 def get_flops_with_torch_profiler(model, imgsz=640):
-    """Compute model FLOPs (thop alternative)."""
-    if TORCH_2_0:
-        model = de_parallel(model)
-        p = next(model.parameters())
+    """Compute model FLOPs (thop package alternative, but 2-10x slower unfortunately)."""
+    if not TORCH_2_0:  # torch profiler implemented in torch>=2.0
+        return 0.0
+    model = de_parallel(model)
+    p = next(model.parameters())
+    if not isinstance(imgsz, list):
+        imgsz = [imgsz, imgsz]  # expand if int/float
+    try:
+        # Use stride size for input tensor
         stride = (max(int(model.stride.max()), 32) if hasattr(model, "stride") else 32) * 2  # max stride
-        im = torch.zeros((1, p.shape[1], stride, stride), device=p.device)  # input image in BCHW format
+        im = torch.empty((1, p.shape[1], stride, stride), device=p.device)  # input image in BCHW format
         with torch.profiler.profile(with_flops=True) as prof:
             model(im)
         flops = sum(x.flops for x in prof.key_averages()) / 1e9
-        imgsz = imgsz if isinstance(imgsz, list) else [imgsz, imgsz]  # expand if int/float
         flops = flops * imgsz[0] / stride * imgsz[1] / stride  # 640x640 GFLOPs
-        return flops
-    return 0
+    except Exception:
+        # Use actual image size for input tensor (i.e. required for RTDETR models)
+        im = torch.empty((1, p.shape[1], *imgsz), device=p.device)  # input image in BCHW format
+        with torch.profiler.profile(with_flops=True) as prof:
+            model(im)
+        flops = sum(x.flops for x in prof.key_averages()) / 1e9
+    return flops
 def initialize_weights(model):
@@ -346,14 +423,12 @@ def initialize_weights(model):
         elif t is nn.BatchNorm2d:
             m.eps = 1e-3
             m.momentum = 0.03
-        elif t in [nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6, nn.SiLU]:
+        elif t in {nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6, nn.SiLU}:
             m.inplace = True
 def scale_img(img, ratio=1.0, same_shape=False, gs=32):
-    """Scales and pads an image tensor of shape img(bs,3,y,x) based on given ratio and grid size gs, optionally
-    retaining the original shape.
-    """
+    """Scales and pads an image tensor, optionally maintaining aspect ratio and padding to gs multiple."""
     if ratio == 1.0:
         return img
     h, w = img.shape[2:]
@@ -364,13 +439,6 @@ def scale_img(img, ratio=1.0, same_shape=False, gs=32):
     return F.pad(img, [0, w - s[1], 0, h - s[0]], value=0.447)  # value = imagenet mean
-def make_divisible(x, divisor):
-    """Returns nearest x divisible by divisor."""
-    if isinstance(divisor, torch.Tensor):
-        divisor = int(divisor.max())  # to int
-    return math.ceil(x / divisor) * divisor
 def copy_attr(a, b, include=(), exclude=()):
     """Copies attributes from object 'b' to object 'a', with options to include/exclude certain attributes."""
     for k, v in b.__dict__.items():
@@ -381,8 +449,13 @@ def copy_attr(a, b, include=(), exclude=()):
 def get_latest_opset():
-    """Return second-most (for maturity) recently supported ONNX opset by this version of torch."""
-    return max(int(k[14:]) for k in vars(torch.onnx) if "symbolic_opset" in k) - 1  # opset
+    """Return the second-most recent ONNX opset version supported by this version of PyTorch, adjusted for maturity."""
+    if TORCH_1_13:
+        # If the PyTorch>=1.13, dynamically compute the latest opset minus one using 'symbolic_opset'
+        return max(int(k[14:]) for k in vars(torch.onnx) if "symbolic_opset" in k) - 1
+    # Otherwise for PyTorch<=1.12 return the corresponding predefined opset
+    version = torch.onnx.producer_version.rsplit(".", 1)[0]  # i.e. '2.3'
+    return {"1.12": 15, "1.11": 14, "1.10": 13, "1.9": 12, "1.8": 12}.get(version, 12)
 def intersect_dicts(da, db, exclude=()):
@@ -427,14 +500,17 @@ def init_seeds(seed=0, deterministic=False):
 class ModelEMA:
-    """Updated Exponential Moving Average (EMA) from https://github.com/rwightman/pytorch-image-models
-    Keeps a moving average of everything in the model state_dict (parameters and buffers)
+    """
+    Updated Exponential Moving Average (EMA) from https://github.com/rwightman/pytorch-image-models. Keeps a moving
+    average of everything in the model state_dict (parameters and buffers).
     For EMA details see https://www.tensorflow.org/api_docs/python/tf/train/ExponentialMovingAverage
     To disable EMA set the `enabled` attribute to `False`.
     """
     def __init__(self, model, decay=0.9999, tau=2000, updates=0):
-        """Create EMA."""
+        """Initialize EMA for 'model' with given arguments."""
         self.ema = deepcopy(de_parallel(model)).eval()  # FP32 EMA
         self.updates = updates  # number of EMA updates
         self.decay = lambda x: decay * (1 - math.exp(-x / tau))  # decay exponential ramp (to help early epochs)
@@ -461,50 +537,113 @@ class ModelEMA:
             copy_attr(self.ema, model, include, exclude)
-def strip_optimizer(f: Union[str, Path] = "best.pt", s: str = "") -> None:
+def strip_optimizer(f: Union[str, Path] = "best.pt", s: str = "", updates: dict = None) -> dict:
     """
     Strip optimizer from 'f' to finalize training, optionally save as 's'.
     Args:
         f (str): file path to model to strip the optimizer from. Default is 'best.pt'.
         s (str): file path to save the model with stripped optimizer to. If not provided, 'f' will be overwritten.
+        updates (dict): a dictionary of updates to overlay onto the checkpoint before saving.
     Returns:
-        None
+        (dict): The combined checkpoint dictionary.
     Example:
         ```python
         from pathlib import Path
         from ultralytics.utils.torch_utils import strip_optimizer
-        for f in Path('path/to/weights').rglob('*.pt'):
+        for f in Path("path/to/model/checkpoints").rglob("*.pt"):
             strip_optimizer(f)
         ```
-    """
-    x = torch.load(f, map_location=torch.device("cpu"))
-    if "model" not in x:
-        LOGGER.info(f"Skipping {f}, not a valid Ultralytics model.")
-        return
+    Note:
+        Use `ultralytics.nn.torch_safe_load` for missing modules with `x = torch_safe_load(f)[0]`
+    """
+    try:
+        x = torch.load(f, map_location=torch.device("cpu"))
+        assert isinstance(x, dict), "checkpoint is not a Python dictionary"
+        assert "model" in x, "'model' missing from checkpoint"
+    except Exception as e:
+        LOGGER.warning(f"WARNING ⚠️ Skipping {f}, not a valid Ultralytics model: {e}")
+        return {}
+    metadata = {
+        "date": datetime.now().isoformat(),
+        "version": __version__,
+        "license": "AGPL-3.0 License (https://ultralytics.com/license)",
+        "docs": "https://docs.ultralytics.com",
+    }
+    # Update model
+    if x.get("ema"):
+        x["model"] = x["ema"]  # replace model with EMA
     if hasattr(x["model"], "args"):
         x["model"].args = dict(x["model"].args)  # convert from IterableSimpleNamespace to dict
-    args = {**DEFAULT_CFG_DICT, **x["train_args"]} if "train_args" in x else None  # combine args
-    if x.get("ema"):
-        x["model"] = x["ema"]  # replace model with ema
-    for k in "optimizer", "best_fitness", "ema", "updates":  # keys
-        x[k] = None
-    x["epoch"] = -1
+    if hasattr(x["model"], "criterion"):
+        x["model"].criterion = None  # strip loss criterion
     x["model"].half()  # to FP16
     for p in x["model"].parameters():
         p.requires_grad = False
+    # Update other keys
+    args = {**DEFAULT_CFG_DICT, **x.get("train_args", {})}  # combine args
+    for k in "optimizer", "best_fitness", "ema", "updates":  # keys
+        x[k] = None
+    x["epoch"] = -1
     x["train_args"] = {k: v for k, v in args.items() if k in DEFAULT_CFG_KEYS}  # strip non-default keys
     # x['model'].args = x['train_args']
-    torch.save(x, s or f)
+    # Save
+    combined = {**metadata, **x, **(updates or {})}
+    torch.save(combined, s or f)  # combine dicts (prefer to the right)
     mb = os.path.getsize(s or f) / 1e6  # file size
     LOGGER.info(f"Optimizer stripped from {f},{f' saved as {s},' if s else ''} {mb:.1f}MB")
+    return combined
+def convert_optimizer_state_dict_to_fp16(state_dict):
+    """
+    Converts the state_dict of a given optimizer to FP16, focusing on the 'state' key for tensor conversions.
+    This method aims to reduce storage size without altering 'param_groups' as they contain non-tensor data.
+    """
+    for state in state_dict["state"].values():
+        for k, v in state.items():
+            if k != "step" and isinstance(v, torch.Tensor) and v.dtype is torch.float32:
+                state[k] = v.half()
+    return state_dict
+@contextmanager
+def cuda_memory_usage(device=None):
+    """
+    Monitor and manage CUDA memory usage.
+    This function checks if CUDA is available and, if so, empties the CUDA cache to free up unused memory.
+    It then yields a dictionary containing memory usage information, which can be updated by the caller.
+    Finally, it updates the dictionary with the amount of memory reserved by CUDA on the specified device.
+    Args:
+        device (torch.device, optional): The CUDA device to query memory usage for. Defaults to None.
+    Yields:
+        (dict): A dictionary with a key 'memory' initialized to 0, which will be updated with the reserved memory.
+    """
+    cuda_info = dict(memory=0)
+    if torch.cuda.is_available():
+        torch.cuda.empty_cache()
+        try:
+            yield cuda_info
+        finally:
+            cuda_info["memory"] = torch.cuda.memory_reserved(device)
+    else:
+        yield cuda_info
-def profile(input, ops, n=10, device=None):
+def profile(input, ops, n=10, device=None, max_num_obj=0):
     """
     Ultralytics speed, memory and FLOPs profiler.
@@ -525,7 +664,8 @@ def profile(input, ops, n=10, device=None):
         f"{'Params':>12s}{'GFLOPs':>12s}{'GPU_mem (GB)':>14s}{'forward (ms)':>14s}{'backward (ms)':>14s}"
         f"{'input':>24s}{'output':>24s}"
     )
+    gc.collect()  # attempt to free unused memory
+    torch.cuda.empty_cache()
     for x in input if isinstance(input, list) else [input]:
         x = x.to(device)
         x.requires_grad = True
@@ -539,19 +679,31 @@ def profile(input, ops, n=10, device=None):
                 flops = 0
             try:
+                mem = 0
                 for _ in range(n):
-                    t[0] = time_sync()
-                    y = m(x)
-                    t[1] = time_sync()
-                    try:
-                        (sum(yi.sum() for yi in y) if isinstance(y, list) else y).sum().backward()
-                        t[2] = time_sync()
-                    except Exception:  # no backward method
-                        # print(e)  # for debug
-                        t[2] = float("nan")
+                    with cuda_memory_usage(device) as cuda_info:
+                        t[0] = time_sync()
+                        y = m(x)
+                        t[1] = time_sync()
+                        try:
+                            (sum(yi.sum() for yi in y) if isinstance(y, list) else y).sum().backward()
+                            t[2] = time_sync()
+                        except Exception:  # no backward method
+                            # print(e)  # for debug
+                            t[2] = float("nan")
+                    mem += cuda_info["memory"] / 1e9  # (GB)
                     tf += (t[1] - t[0]) * 1000 / n  # ms per op forward
                     tb += (t[2] - t[1]) * 1000 / n  # ms per op backward
-                mem = torch.cuda.memory_reserved() / 1e9 if torch.cuda.is_available() else 0  # (GB)
+                    if max_num_obj:  # simulate training with predictions per image grid (for AutoBatch)
+                        with cuda_memory_usage(device) as cuda_info:
+                            torch.randn(
+                                x.shape[0],
+                                max_num_obj,
+                                int(sum((x.shape[-1] / s) * (x.shape[-2] / s) for s in m.stride.tolist())),
+                                device=device,
+                                dtype=torch.float32,
+                            )
+                        mem += cuda_info["memory"] / 1e9  # (GB)
                 s_in, s_out = (tuple(x.shape) if isinstance(x, torch.Tensor) else "list" for x in (x, y))  # shapes
                 p = sum(x.numel() for x in m.parameters()) if isinstance(m, nn.Module) else 0  # parameters
                 LOGGER.info(f"{p:12}{flops:12.4g}{mem:>14.3f}{tf:14.4g}{tb:14.4g}{str(s_in):>24s}{str(s_out):>24s}")
@@ -559,7 +711,9 @@ def profile(input, ops, n=10, device=None):
             except Exception as e:
                 LOGGER.info(e)
                 results.append(None)
-            torch.cuda.empty_cache()
+            finally:
+                gc.collect()  # attempt to free unused memory
+                torch.cuda.empty_cache()
     return results
@@ -599,10 +753,56 @@ class EarlyStopping:
         self.possible_stop = delta >= (self.patience - 1)  # possible stop may occur next epoch
         stop = delta >= self.patience  # stop training if patience exceeded
         if stop:
+            prefix = colorstr("EarlyStopping: ")
             LOGGER.info(
-                f"Stopping training early as no improvement observed in last {self.patience} epochs. "
+                f"{prefix}Training stopped early as no improvement observed in last {self.patience} epochs. "
                 f"Best results observed at epoch {self.best_epoch}, best model saved as best.pt.\n"
                 f"To update EarlyStopping(patience={self.patience}) pass a new patience value, "
                 f"i.e. `patience=300` or use `patience=0` to disable EarlyStopping."
             )
         return stop
+class FXModel(nn.Module):
+    """
+    A custom model class for torch.fx compatibility.
+    This class extends `torch.nn.Module` and is designed to ensure compatibility with torch.fx for tracing and graph manipulation.
+    It copies attributes from an existing model and explicitly sets the model attribute to ensure proper copying.
+    Args:
+        model (torch.nn.Module): The original model to wrap for torch.fx compatibility.
+    """
+    def __init__(self, model):
+        """
+        Initialize the FXModel.
+        Args:
+            model (torch.nn.Module): The original model to wrap for torch.fx compatibility.
+        """
+        super().__init__()
+        copy_attr(self, model)
+        # Explicitly set `model` since `copy_attr` somehow does not copy it.
+        self.model = model.model
+    def forward(self, x):
+        """
+        Forward pass through the model.
+        This method performs the forward pass through the model, handling the dependencies between layers and saving intermediate outputs.
+        Args:
+            x (torch.Tensor): The input tensor to the model.
+        Returns:
+            (torch.Tensor): The output tensor from the model.
+        """
+        y = []  # outputs
+        for m in self.model:
+            if m.f != -1:  # if not from previous layer
+                # from earlier layers
+                x = y[m.f] if isinstance(m.f, int) else [x if j == -1 else y[j] for j in m.f]
+            x = m(x)  # run
+            y.append(x)  # save output
+        return x

ultralytics/utils/triton.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# Ultralytics YOLO 🚀, AGPL-3.0 license
+# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
 from typing import List
 from urllib.parse import urlsplit
@@ -66,6 +66,7 @@ class TritonRemoteModel:
         self.np_input_formats = [type_map[x] for x in self.input_formats]
         self.input_names = [x["name"] for x in config["input"]]
         self.output_names = [x["name"] for x in config["output"]]
+        self.metadata = eval(config.get("parameters", {}).get("metadata", {}).get("string_value", "None"))
     def __call__(self, *inputs: np.ndarray) -> List[np.ndarray]:
         """

ultralytics 8.1.29__py3-none-any.whl → 8.3.62__py3-none-any.whl

ultralytics 8.1.29py3-none-any.whl → 8.3.62py3-none-any.whl