PyPI - ultralytics - Versions diffs - 8.3.143__py3-none-any.whl → 8.3.144__py3-none-any.whl - Mend

ultralytics 8.3.143py3-none-any.whl → 8.3.144py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (148) hide show

tests/conftest.py +7 -24
tests/test_cli.py +1 -1
tests/test_cuda.py +7 -2
tests/test_engine.py +7 -8
tests/test_exports.py +16 -16
tests/test_integrations.py +1 -1
tests/test_solutions.py +11 -11
ultralytics/__init__.py +1 -1
ultralytics/cfg/__init__.py +16 -13
ultralytics/data/annotator.py +6 -5
ultralytics/data/augment.py +127 -126
ultralytics/data/base.py +54 -51
ultralytics/data/build.py +47 -23
ultralytics/data/converter.py +47 -43
ultralytics/data/dataset.py +51 -50
ultralytics/data/loaders.py +77 -44
ultralytics/data/split.py +22 -9
ultralytics/data/split_dota.py +63 -39
ultralytics/data/utils.py +59 -39
ultralytics/engine/exporter.py +79 -27
ultralytics/engine/model.py +39 -39
ultralytics/engine/predictor.py +37 -28
ultralytics/engine/results.py +187 -157
ultralytics/engine/trainer.py +36 -19
ultralytics/engine/tuner.py +12 -9
ultralytics/engine/validator.py +7 -9
ultralytics/hub/__init__.py +11 -13
ultralytics/hub/auth.py +22 -2
ultralytics/hub/google/__init__.py +19 -19
ultralytics/hub/session.py +37 -51
ultralytics/hub/utils.py +19 -5
ultralytics/models/fastsam/model.py +30 -12
ultralytics/models/fastsam/predict.py +5 -6
ultralytics/models/fastsam/utils.py +3 -3
ultralytics/models/fastsam/val.py +10 -6
ultralytics/models/nas/model.py +9 -5
ultralytics/models/nas/predict.py +6 -6
ultralytics/models/nas/val.py +3 -3
ultralytics/models/rtdetr/model.py +7 -6
ultralytics/models/rtdetr/predict.py +14 -7
ultralytics/models/rtdetr/train.py +10 -4
ultralytics/models/rtdetr/val.py +36 -9
ultralytics/models/sam/amg.py +30 -12
ultralytics/models/sam/build.py +22 -22
ultralytics/models/sam/model.py +10 -9
ultralytics/models/sam/modules/blocks.py +76 -80
ultralytics/models/sam/modules/decoders.py +6 -8
ultralytics/models/sam/modules/encoders.py +23 -26
ultralytics/models/sam/modules/memory_attention.py +13 -1
ultralytics/models/sam/modules/sam.py +57 -26
ultralytics/models/sam/modules/tiny_encoder.py +232 -237
ultralytics/models/sam/modules/transformer.py +13 -13
ultralytics/models/sam/modules/utils.py +11 -19
ultralytics/models/sam/predict.py +114 -101
ultralytics/models/utils/loss.py +98 -77
ultralytics/models/utils/ops.py +116 -67
ultralytics/models/yolo/classify/predict.py +5 -5
ultralytics/models/yolo/classify/train.py +32 -28
ultralytics/models/yolo/classify/val.py +7 -8
ultralytics/models/yolo/detect/predict.py +1 -0
ultralytics/models/yolo/detect/train.py +15 -14
ultralytics/models/yolo/detect/val.py +37 -36
ultralytics/models/yolo/model.py +106 -23
ultralytics/models/yolo/obb/predict.py +3 -4
ultralytics/models/yolo/obb/train.py +14 -6
ultralytics/models/yolo/obb/val.py +29 -23
ultralytics/models/yolo/pose/predict.py +9 -8
ultralytics/models/yolo/pose/train.py +24 -16
ultralytics/models/yolo/pose/val.py +44 -26
ultralytics/models/yolo/segment/predict.py +5 -5
ultralytics/models/yolo/segment/train.py +11 -7
ultralytics/models/yolo/segment/val.py +2 -2
ultralytics/models/yolo/world/train.py +33 -23
ultralytics/models/yolo/world/train_world.py +11 -3
ultralytics/models/yolo/yoloe/predict.py +11 -11
ultralytics/models/yolo/yoloe/train.py +73 -21
ultralytics/models/yolo/yoloe/train_seg.py +10 -7
ultralytics/models/yolo/yoloe/val.py +42 -18
ultralytics/nn/autobackend.py +59 -15
ultralytics/nn/modules/__init__.py +4 -4
ultralytics/nn/modules/activation.py +4 -1
ultralytics/nn/modules/block.py +178 -111
ultralytics/nn/modules/conv.py +6 -5
ultralytics/nn/modules/head.py +469 -121
ultralytics/nn/modules/transformer.py +147 -58
ultralytics/nn/tasks.py +227 -20
ultralytics/nn/text_model.py +30 -33
ultralytics/solutions/ai_gym.py +1 -1
ultralytics/solutions/analytics.py +7 -4
ultralytics/solutions/config.py +10 -10
ultralytics/solutions/distance_calculation.py +11 -10
ultralytics/solutions/heatmap.py +1 -1
ultralytics/solutions/instance_segmentation.py +6 -3
ultralytics/solutions/object_blurrer.py +3 -3
ultralytics/solutions/object_counter.py +15 -7
ultralytics/solutions/object_cropper.py +3 -2
ultralytics/solutions/parking_management.py +29 -28
ultralytics/solutions/queue_management.py +6 -6
ultralytics/solutions/region_counter.py +10 -3
ultralytics/solutions/security_alarm.py +3 -3
ultralytics/solutions/similarity_search.py +85 -24
ultralytics/solutions/solutions.py +184 -75
ultralytics/solutions/speed_estimation.py +28 -22
ultralytics/solutions/streamlit_inference.py +17 -12
ultralytics/solutions/trackzone.py +4 -4
ultralytics/trackers/basetrack.py +16 -23
ultralytics/trackers/bot_sort.py +30 -20
ultralytics/trackers/byte_tracker.py +70 -64
ultralytics/trackers/track.py +4 -8
ultralytics/trackers/utils/gmc.py +31 -58
ultralytics/trackers/utils/kalman_filter.py +37 -37
ultralytics/trackers/utils/matching.py +1 -1
ultralytics/utils/__init__.py +105 -89
ultralytics/utils/autobatch.py +16 -3
ultralytics/utils/autodevice.py +54 -24
ultralytics/utils/benchmarks.py +42 -28
ultralytics/utils/callbacks/base.py +3 -3
ultralytics/utils/callbacks/clearml.py +9 -9
ultralytics/utils/callbacks/comet.py +67 -25
ultralytics/utils/callbacks/dvc.py +7 -10
ultralytics/utils/callbacks/mlflow.py +2 -5
ultralytics/utils/callbacks/neptune.py +7 -13
ultralytics/utils/callbacks/raytune.py +1 -1
ultralytics/utils/callbacks/tensorboard.py +5 -6
ultralytics/utils/callbacks/wb.py +14 -14
ultralytics/utils/checks.py +14 -13
ultralytics/utils/dist.py +5 -5
ultralytics/utils/downloads.py +94 -67
ultralytics/utils/errors.py +5 -5
ultralytics/utils/export.py +61 -47
ultralytics/utils/files.py +23 -22
ultralytics/utils/instance.py +48 -52
ultralytics/utils/loss.py +78 -40
ultralytics/utils/metrics.py +186 -130
ultralytics/utils/ops.py +186 -190
ultralytics/utils/patches.py +15 -17
ultralytics/utils/plotting.py +71 -27
ultralytics/utils/tal.py +21 -15
ultralytics/utils/torch_utils.py +53 -50
ultralytics/utils/triton.py +5 -4
ultralytics/utils/tuner.py +5 -5
{ultralytics-8.3.143.dist-info → ultralytics-8.3.144.dist-info}/METADATA +1 -1
ultralytics-8.3.144.dist-info/RECORD +272 -0
ultralytics-8.3.143.dist-info/RECORD +0 -272
{ultralytics-8.3.143.dist-info → ultralytics-8.3.144.dist-info}/WHEEL +0 -0
{ultralytics-8.3.143.dist-info → ultralytics-8.3.144.dist-info}/entry_points.txt +0 -0
{ultralytics-8.3.143.dist-info → ultralytics-8.3.144.dist-info}/licenses/LICENSE +0 -0
{ultralytics-8.3.143.dist-info → ultralytics-8.3.144.dist-info}/top_level.txt +0 -0

ultralytics/models/yolo/yoloe/train.py CHANGED Viewed

@@ -2,6 +2,8 @@
 import itertools
 from copy import copy, deepcopy
+from pathlib import Path
+from typing import Dict, List, Optional, Union
 import torch
@@ -17,9 +19,22 @@ from .val import YOLOEDetectValidator
 class YOLOETrainer(DetectionTrainer):
-    """A base trainer for YOLOE training."""
+    """
+    A trainer class for YOLOE object detection models.
-    def __init__(self, cfg=DEFAULT_CFG, overrides=None, _callbacks=None):
+    This class extends DetectionTrainer to provide specialized training functionality for YOLOE models,
+    including custom model initialization, validation, and dataset building with multi-modal support.
+    Attributes:
+        loss_names (tuple): Names of loss components used during training.
+    Methods:
+        get_model: Initialize and return a YOLOEModel with specified configuration.
+        get_validator: Return a YOLOEDetectValidator for model validation.
+        build_dataset: Build YOLO dataset with multi-modal support for training.
+    """
+    def __init__(self, cfg=DEFAULT_CFG, overrides: Optional[Dict] = None, _callbacks=None):
         """
         Initialize the YOLOE Trainer with specified configurations.
@@ -36,14 +51,14 @@ class YOLOETrainer(DetectionTrainer):
         overrides["overlap_mask"] = False
         super().__init__(cfg, overrides, _callbacks)
-    def get_model(self, cfg=None, weights=None, verbose=True):
+    def get_model(self, cfg=None, weights=None, verbose: bool = True):
         """
         Return a YOLOEModel initialized with the specified configuration and weights.
         Args:
-            cfg (dict | str | None): Model configuration. Can be a dictionary containing a 'yaml_file' key,
+            cfg (dict | str, optional): Model configuration. Can be a dictionary containing a 'yaml_file' key,
                 a direct path to a YAML file, or None to use default configuration.
-            weights (str | Path | None): Path to pretrained weights file to load into the model.
+            weights (str | Path, optional): Path to pretrained weights file to load into the model.
             verbose (bool): Whether to display model information during initialization.
         Returns:
@@ -68,20 +83,20 @@ class YOLOETrainer(DetectionTrainer):
         return model
     def get_validator(self):
-        """Returns a DetectionValidator for YOLO model validation."""
+        """Return a YOLOEDetectValidator for YOLOE model validation."""
         self.loss_names = "box", "cls", "dfl"
         return YOLOEDetectValidator(
             self.test_loader, save_dir=self.save_dir, args=copy(self.args), _callbacks=self.callbacks
         )
-    def build_dataset(self, img_path, mode="train", batch=None):
+    def build_dataset(self, img_path: str, mode: str = "train", batch: Optional[int] = None):
         """
         Build YOLO Dataset.
         Args:
             img_path (str): Path to the folder containing images.
-            mode (str): `train` mode or `val` mode, users are able to customize different augmentations for each mode.
-            batch (int, optional): Size of batches, this is for `rect`.
+            mode (str): 'train' mode or 'val' mode, users are able to customize different augmentations for each mode.
+            batch (int, optional): Size of batches, this is for rectangular training.
         Returns:
             (Dataset): YOLO dataset configured for training or validation.
@@ -93,9 +108,17 @@ class YOLOETrainer(DetectionTrainer):
 class YOLOEPETrainer(DetectionTrainer):
-    """Fine-tune YOLOE model in linear probing way."""
+    """
+    Fine-tune YOLOE model using linear probing approach.
-    def get_model(self, cfg=None, weights=None, verbose=True):
+    This trainer freezes most model layers and only trains specific projection layers for efficient
+    fine-tuning on new datasets while preserving pretrained features.
+    Methods:
+        get_model: Initialize YOLOEModel with frozen layers except projection layers.
+    """
+    def get_model(self, cfg=None, weights=None, verbose: bool = True):
         """
         Return YOLOEModel initialized with specified config and weights.
@@ -139,9 +162,19 @@ class YOLOEPETrainer(DetectionTrainer):
 class YOLOETrainerFromScratch(YOLOETrainer, WorldTrainerFromScratch):
-    """Train YOLOE models from scratch."""
+    """
+    Train YOLOE models from scratch with text embedding support.
+    This trainer combines YOLOE training capabilities with world training features, enabling
+    training from scratch with text embeddings and grounding datasets.
-    def build_dataset(self, img_path, mode="train", batch=None):
+    Methods:
+        build_dataset: Build datasets for training with grounding support.
+        preprocess_batch: Process batches with text features.
+        generate_text_embeddings: Generate and cache text embeddings for training.
+    """
+    def build_dataset(self, img_path: Union[List[str], str], mode: str = "train", batch: Optional[int] = None):
         """
         Build YOLO Dataset for training or validation.
@@ -168,7 +201,7 @@ class YOLOETrainerFromScratch(YOLOETrainer, WorldTrainerFromScratch):
         batch["txt_feats"] = txt_feats
         return batch
-    def generate_text_embeddings(self, texts, batch, cache_dir):
+    def generate_text_embeddings(self, texts: List[str], batch: int, cache_dir: Path):
         """
         Generate text embeddings for a list of text samples.
@@ -196,21 +229,31 @@ class YOLOETrainerFromScratch(YOLOETrainer, WorldTrainerFromScratch):
 class YOLOEPEFreeTrainer(YOLOEPETrainer, YOLOETrainerFromScratch):
-    """Train prompt-free YOLOE model."""
+    """
+    Train prompt-free YOLOE model.
+    This trainer combines linear probing capabilities with from-scratch training for prompt-free
+    YOLOE models that don't require text prompts during inference.
+    Methods:
+        get_validator: Return standard DetectionValidator for validation.
+        preprocess_batch: Preprocess batches without text features.
+        set_text_embeddings: Set text embeddings for datasets (no-op for prompt-free).
+    """
     def get_validator(self):
-        """Returns a DetectionValidator for YOLO model validation."""
+        """Return a DetectionValidator for YOLO model validation."""
         self.loss_names = "box", "cls", "dfl"
         return DetectionValidator(
             self.test_loader, save_dir=self.save_dir, args=copy(self.args), _callbacks=self.callbacks
         )
     def preprocess_batch(self, batch):
-        """Preprocesses a batch of images for YOLOE training, adjusting formatting and dimensions as needed."""
+        """Preprocess a batch of images for YOLOE training, adjusting formatting and dimensions as needed."""
         batch = DetectionTrainer.preprocess_batch(self, batch)
         return batch
-    def set_text_embeddings(self, datasets, batch):
+    def set_text_embeddings(self, datasets, batch: int):
         """
         Set text embeddings for datasets to accelerate training by caching category names.
@@ -231,9 +274,18 @@ class YOLOEPEFreeTrainer(YOLOEPETrainer, YOLOETrainerFromScratch):
 class YOLOEVPTrainer(YOLOETrainerFromScratch):
-    """Train YOLOE model with visual prompts."""
+    """
+    Train YOLOE model with visual prompts.
+    This trainer extends YOLOETrainerFromScratch to support visual prompt-based training,
+    where visual cues are provided alongside images to guide the detection process.
+    Methods:
+        build_dataset: Build dataset with visual prompt loading transforms.
+        preprocess_batch: Preprocess batches with visual prompts.
+    """
-    def build_dataset(self, img_path, mode="train", batch=None):
+    def build_dataset(self, img_path: Union[List[str], str], mode: str = "train", batch: Optional[int] = None):
         """
         Build YOLO Dataset for training or validation with visual prompts.
@@ -263,7 +315,7 @@ class YOLOEVPTrainer(YOLOETrainerFromScratch):
             self.train_loader.dataset.transforms.append(LoadVisualPrompt())
     def preprocess_batch(self, batch):
-        """Preprocesses a batch of images for YOLOE training, moving visual prompts to the appropriate device."""
+        """Preprocess a batch of images for YOLOE training, moving visual prompts to the appropriate device."""
         batch = super().preprocess_batch(batch)
         batch["visuals"] = batch["visuals"].to(self.device)
         return batch

ultralytics/models/yolo/yoloe/train_seg.py CHANGED Viewed

@@ -14,8 +14,8 @@ class YOLOESegTrainer(YOLOETrainer, SegmentationTrainer):
     """
     Trainer class for YOLOE segmentation models.
-    This class combines YOLOETrainer and SegmentationTrainer to provide training functionality
-    specifically for YOLOE segmentation models.
+    This class combines YOLOETrainer and SegmentationTrainer to provide training functionality specifically for YOLOE
+    segmentation models, enabling both object detection and instance segmentation capabilities.
     Attributes:
         cfg (dict): Configuration dictionary with training parameters.
@@ -28,7 +28,7 @@ class YOLOESegTrainer(YOLOETrainer, SegmentationTrainer):
         Return YOLOESegModel initialized with specified config and weights.
         Args:
-            cfg (dict | str): Model configuration dictionary or YAML file path.
+            cfg (dict | str, optional): Model configuration dictionary or YAML file path.
             weights (str, optional): Path to pretrained weights file.
             verbose (bool): Whether to display model information.
@@ -66,7 +66,10 @@ class YOLOEPESegTrainer(SegmentationTrainer):
     Fine-tune YOLOESeg model in linear probing way.
     This trainer specializes in fine-tuning YOLOESeg models using a linear probing approach, which involves freezing
-    most of the model and only training specific layers.
+    most of the model and only training specific layers for efficient adaptation to new tasks.
+    Attributes:
+        data (dict): Dataset configuration containing channels, class names, and number of classes.
     """
     def get_model(self, cfg=None, weights=None, verbose=True):
@@ -74,7 +77,7 @@ class YOLOEPESegTrainer(SegmentationTrainer):
         Return YOLOESegModel initialized with specified config and weights for linear probing.
         Args:
-            cfg (dict | str): Model configuration dictionary or YAML file path.
+            cfg (dict | str, optional): Model configuration dictionary or YAML file path.
             weights (str, optional): Path to pretrained weights file.
             verbose (bool): Whether to display model information.
@@ -113,12 +116,12 @@ class YOLOEPESegTrainer(SegmentationTrainer):
 class YOLOESegTrainerFromScratch(YOLOETrainerFromScratch, YOLOESegTrainer):
-    """Trainer for YOLOE segmentation from scratch."""
+    """Trainer for YOLOE segmentation models trained from scratch without pretrained weights."""
     pass
 class YOLOESegVPTrainer(YOLOEVPTrainer, YOLOESegTrainerFromScratch):
-    """Trainer for YOLOE segmentation with VP."""
+    """Trainer for YOLOE segmentation models with Vision Prompt (VP) capabilities."""
     pass

ultralytics/models/yolo/yoloe/val.py CHANGED Viewed

@@ -1,6 +1,7 @@
 # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
 from copy import deepcopy
+from typing import Any, Dict, Optional, Union
 import torch
 from torch.nn import functional as F
@@ -18,26 +19,40 @@ from ultralytics.utils.torch_utils import select_device, smart_inference_mode
 class YOLOEDetectValidator(DetectionValidator):
     """
-    A mixin class for YOLOE model validation that handles both text and visual prompt embeddings.
+    A validator class for YOLOE detection models that handles both text and visual prompt embeddings.
-    This mixin provides functionality to validate YOLOE models using either text or visual prompt embeddings.
-    It includes methods for extracting visual prompt embeddings from samples, preprocessing batches, and
-    running validation with different prompt types.
+    This class extends DetectionValidator to provide specialized validation functionality for YOLOE models.
+    It supports validation using either text prompts or visual prompt embeddings extracted from training samples,
+    enabling flexible evaluation strategies for prompt-based object detection.
     Attributes:
         device (torch.device): The device on which validation is performed.
         args (namespace): Configuration arguments for validation.
         dataloader (DataLoader): DataLoader for validation data.
+    Methods:
+        get_visual_pe: Extract visual prompt embeddings from training samples.
+        preprocess: Preprocess batch data ensuring visuals are on the same device as images.
+        get_vpe_dataloader: Create a dataloader for LVIS training visual prompt samples.
+        __call__: Run validation using either text or visual prompt embeddings.
+    Examples:
+        Validate with text prompts
+        >>> validator = YOLOEDetectValidator()
+        >>> stats = validator(model=model, load_vp=False)
+        Validate with visual prompts
+        >>> stats = validator(model=model, refer_data="path/to/data.yaml", load_vp=True)
     """
     @smart_inference_mode()
-    def get_visual_pe(self, dataloader, model):
+    def get_visual_pe(self, dataloader: torch.utils.data.DataLoader, model: YOLOEModel) -> torch.Tensor:
         """
         Extract visual prompt embeddings from training samples.
-        This function processes a dataloader to compute visual prompt embeddings for each class
-        using a YOLOE model. It normalizes the embeddings and handles cases where no samples
-        exist for a class.
+        This method processes a dataloader to compute visual prompt embeddings for each class using a YOLOE model.
+        It normalizes the embeddings and handles cases where no samples exist for a class by setting their
+        embeddings to zero.
         Args:
             dataloader (torch.utils.data.DataLoader): The dataloader providing training samples.
@@ -53,6 +68,7 @@ class YOLOEDetectValidator(DetectionValidator):
         desc = "Get visual prompt embeddings from samples"
+        # Count samples per class
         for batch in dataloader:
             cls = batch["cls"].squeeze(-1).to(torch.int).unique()
             count = torch.bincount(cls, minlength=len(names))
@@ -60,6 +76,7 @@ class YOLOEDetectValidator(DetectionValidator):
         cls_visual_num = cls_visual_num.to(self.device)
+        # Extract visual prompt embeddings
         pbar = TQDM(dataloader, total=len(dataloader), desc=desc)
         for batch in pbar:
             batch = self.preprocess(batch)
@@ -73,30 +90,31 @@ class YOLOEDetectValidator(DetectionValidator):
                 for c in cls:
                     visual_pe[c] += preds[i][pad_cls == c].sum(0) / cls_visual_num[c]
+        # Normalize embeddings for classes with samples, set others to zero
         visual_pe[cls_visual_num != 0] = F.normalize(visual_pe[cls_visual_num != 0], dim=-1, p=2)
         visual_pe[cls_visual_num == 0] = 0
         return visual_pe.unsqueeze(0)
-    def preprocess(self, batch):
+    def preprocess(self, batch: Dict[str, Any]) -> Dict[str, Any]:
         """Preprocess batch data, ensuring visuals are on the same device as images."""
         batch = super().preprocess(batch)
         if "visuals" in batch:
             batch["visuals"] = batch["visuals"].to(batch["img"].device)
         return batch
-    def get_vpe_dataloader(self, data):
+    def get_vpe_dataloader(self, data: Dict[str, Any]) -> torch.utils.data.DataLoader:
         """
         Create a dataloader for LVIS training visual prompt samples.
-        This function prepares a dataloader for visual prompt embeddings (VPE) using the LVIS dataset.
-        It applies necessary transformations and configurations to the dataset and returns a dataloader
+        This method prepares a dataloader for visual prompt embeddings (VPE) using the specified dataset.
+        It applies necessary transformations including LoadVisualPrompt and configurations to the dataset
         for validation purposes.
         Args:
             data (dict): Dataset configuration dictionary containing paths and settings.
         Returns:
-            (torch.utils.data.DataLoader): The dataLoader for visual prompt samples.
+            (torch.utils.data.DataLoader): The dataloader for visual prompt samples.
         """
         dataset = build_yolo_dataset(
             self.args,
@@ -120,17 +138,23 @@ class YOLOEDetectValidator(DetectionValidator):
         )
     @smart_inference_mode()
-    def __call__(self, trainer=None, model=None, refer_data=None, load_vp=False):
+    def __call__(
+        self,
+        trainer: Optional[Any] = None,
+        model: Optional[Union[YOLOEModel, str]] = None,
+        refer_data: Optional[str] = None,
+        load_vp: bool = False,
+    ) -> Dict[str, Any]:
         """
         Run validation on the model using either text or visual prompt embeddings.
-        This method validates the model using either text prompts or visual prompts, depending
-        on the `load_vp` flag. It supports validation during training (using a trainer object)
-        or standalone validation with a provided model.
+        This method validates the model using either text prompts or visual prompts, depending on the load_vp flag.
+        It supports validation during training (using a trainer object) or standalone validation with a provided
+        model. For visual prompts, reference data can be specified to extract embeddings from a different dataset.
         Args:
             trainer (object, optional): Trainer object containing the model and device.
-            model (YOLOEModel, optional): Model to validate. Required if `trainer` is not provided.
+            model (YOLOEModel | str, optional): Model to validate. Required if trainer is not provided.
             refer_data (str, optional): Path to reference data for visual prompts.
             load_vp (bool): Whether to load visual prompts. If False, text prompts are used.

ultralytics/nn/autobackend.py CHANGED Viewed

@@ -6,7 +6,7 @@ import platform
 import zipfile
 from collections import OrderedDict, namedtuple
 from pathlib import Path
-from typing import List, Optional, Union
+from typing import Any, Dict, List, Optional, Tuple, Union
 import cv2
 import numpy as np
@@ -19,8 +19,19 @@ from ultralytics.utils.checks import check_requirements, check_suffix, check_ver
 from ultralytics.utils.downloads import attempt_download_asset, is_url
-def check_class_names(names):
-    """Check class names and convert to dict format if needed."""
+def check_class_names(names: Union[List, Dict]) -> Dict[int, str]:
+    """
+    Check class names and convert to dict format if needed.
+    Args:
+        names (list | dict): Class names as list or dict format.
+    Returns:
+        (dict): Class names in dict format with integer keys and string values.
+    Raises:
+        KeyError: If class indices are invalid for the dataset size.
+    """
     if isinstance(names, list):  # names is a list
         names = dict(enumerate(names))  # convert to dict
     if isinstance(names, dict):
@@ -38,8 +49,16 @@ def check_class_names(names):
     return names
-def default_class_names(data=None):
-    """Applies default class names to an input YAML file or returns numerical class names."""
+def default_class_names(data: Optional[Union[str, Path]] = None) -> Dict[int, str]:
+    """
+    Apply default class names to an input YAML file or return numerical class names.
+    Args:
+        data (str | Path, optional): Path to YAML file containing class names.
+    Returns:
+        (dict): Dictionary mapping class indices to class names.
+    """
     if data:
         try:
             return YAML.load(check_yaml(data))["names"]
@@ -50,7 +69,7 @@ def default_class_names(data=None):
 class AutoBackend(nn.Module):
     """
-    Handles dynamic backend selection for running inference using Ultralytics YOLO models.
+    Handle dynamic backend selection for running inference using Ultralytics YOLO models.
     The AutoBackend class is designed to provide an abstraction layer for various inference engines. It supports a wide
     range of formats, each with specific naming conventions as outlined below:
@@ -82,6 +101,24 @@ class AutoBackend(nn.Module):
         names (dict): A dictionary of class names that the model can detect.
         stride (int): The model stride, typically 32 for YOLO models.
         fp16 (bool): Whether the model uses half-precision (FP16) inference.
+        nhwc (bool): Whether the model expects NHWC input format instead of NCHW.
+        pt (bool): Whether the model is a PyTorch model.
+        jit (bool): Whether the model is a TorchScript model.
+        onnx (bool): Whether the model is an ONNX model.
+        xml (bool): Whether the model is an OpenVINO model.
+        engine (bool): Whether the model is a TensorRT engine.
+        coreml (bool): Whether the model is a CoreML model.
+        saved_model (bool): Whether the model is a TensorFlow SavedModel.
+        pb (bool): Whether the model is a TensorFlow GraphDef.
+        tflite (bool): Whether the model is a TensorFlow Lite model.
+        edgetpu (bool): Whether the model is a TensorFlow Edge TPU model.
+        tfjs (bool): Whether the model is a TensorFlow.js model.
+        paddle (bool): Whether the model is a PaddlePaddle model.
+        mnn (bool): Whether the model is an MNN model.
+        ncnn (bool): Whether the model is an NCNN model.
+        imx (bool): Whether the model is an IMX model.
+        rknn (bool): Whether the model is an RKNN model.
+        triton (bool): Whether the model is a Triton Inference Server model.
     Methods:
         forward: Run inference on an input image.
@@ -113,7 +150,7 @@ class AutoBackend(nn.Module):
             weights (str | List[str] | torch.nn.Module): Path to the model weights file or a module instance.
             device (torch.device): Device to run the model on.
             dnn (bool): Use OpenCV DNN module for ONNX inference.
-            data (str | Path | optional): Path to the additional data.yaml file containing class names.
+            data (str | Path, optional): Path to the additional data.yaml file containing class names.
             fp16 (bool): Enable half-precision inference. Supported only on specific backends.
             batch (int): Batch-size to assume for inference.
             fuse (bool): Fuse Conv2D + BatchNorm layers for optimization.
@@ -567,15 +604,22 @@ class AutoBackend(nn.Module):
         self.__dict__.update(locals())  # assign all variables to self
-    def forward(self, im, augment=False, visualize=False, embed=None, **kwargs):
+    def forward(
+        self,
+        im: torch.Tensor,
+        augment: bool = False,
+        visualize: bool = False,
+        embed: Optional[List] = None,
+        **kwargs: Any,
+    ) -> Union[torch.Tensor, List[torch.Tensor]]:
         """
-        Runs inference on the YOLOv8 MultiBackend model.
+        Run inference on an AutoBackend model.
         Args:
             im (torch.Tensor): The image tensor to perform inference on.
             augment (bool): Whether to perform data augmentation during inference.
             visualize (bool): Whether to visualize the output predictions.
-            embed (list | None): A list of feature vectors/embeddings to return.
+            embed (list, optional): A list of feature vectors/embeddings to return.
             **kwargs (Any): Additional keyword arguments for model configuration.
         Returns:
@@ -632,7 +676,7 @@ class AutoBackend(nn.Module):
                 results = [None] * n  # preallocate list with None to match the number of images
                 def callback(request, userdata):
-                    """Places result in preallocated list using userdata index."""
+                    """Place result in preallocated list using userdata index."""
                     results[userdata] = request.results
                 # Create AsyncInferQueue, set the callback and start asynchronous inference for each input image
@@ -780,7 +824,7 @@ class AutoBackend(nn.Module):
         else:
             return self.from_numpy(y)
-    def from_numpy(self, x):
+    def from_numpy(self, x: np.ndarray) -> torch.Tensor:
         """
         Convert a numpy array to a tensor.
@@ -792,7 +836,7 @@ class AutoBackend(nn.Module):
         """
         return torch.tensor(x).to(self.device) if isinstance(x, np.ndarray) else x
-    def warmup(self, imgsz=(1, 3, 640, 640)):
+    def warmup(self, imgsz: Tuple[int, int, int, int] = (1, 3, 640, 640)) -> None:
         """
         Warm up the model by running one forward pass with a dummy input.
@@ -808,9 +852,9 @@ class AutoBackend(nn.Module):
                 self.forward(im)  # warmup
     @staticmethod
-    def _model_type(p="path/to/model.pt"):
+    def _model_type(p: str = "path/to/model.pt") -> List[bool]:
         """
-        Takes a path to a model file and returns the model type.
+        Take a path to a model file and return the model type.
         Args:
             p (str): Path to the model file.

ultralytics/nn/modules/__init__.py CHANGED Viewed

@@ -1,12 +1,12 @@
 # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
 """
-Ultralytics modules.
+Ultralytics neural network modules.
-This module provides access to various neural network components used in Ultralytics models, including convolution blocks,
-attention mechanisms, transformer components, and detection/segmentation heads.
+This module provides access to various neural network components used in Ultralytics models, including convolution
+blocks, attention mechanisms, transformer components, and detection/segmentation heads.
 Examples:
-    Visualize a module with Netron.
+    Visualize a module with Netron
     >>> from ultralytics.nn.modules import *
     >>> import torch
     >>> import os

ultralytics/nn/modules/activation.py CHANGED Viewed

@@ -10,7 +10,7 @@ class AGLU(nn.Module):
     Unified activation function module from AGLU.
     This class implements a parameterized activation function with learnable parameters lambda and kappa, based on the
-    AGLU (Adaptive Gated Linear Unit) approach (https://github.com/kostas1515/AGLU).
+    AGLU (Adaptive Gated Linear Unit) approach.
     Attributes:
         act (nn.Softplus): Softplus activation function with negative beta.
@@ -27,6 +27,9 @@ class AGLU(nn.Module):
         >>> output = m(input)
         >>> print(output.shape)
         torch.Size([2])
+    References:
+        https://github.com/kostas1515/AGLU
     """
     def __init__(self, device=None, dtype=None) -> None:

ultralytics 8.3.143__py3-none-any.whl → 8.3.144__py3-none-any.whl

ultralytics 8.3.143py3-none-any.whl → 8.3.144py3-none-any.whl