PyPI - ultralytics - Versions diffs - 8.3.142__py3-none-any.whl → 8.3.144__py3-none-any.whl - Mend

ultralytics 8.3.142py3-none-any.whl → 8.3.144py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (148) hide show

tests/conftest.py +7 -24
tests/test_cli.py +1 -1
tests/test_cuda.py +7 -2
tests/test_engine.py +7 -8
tests/test_exports.py +16 -16
tests/test_integrations.py +1 -1
tests/test_solutions.py +12 -12
ultralytics/__init__.py +1 -1
ultralytics/cfg/__init__.py +16 -13
ultralytics/data/annotator.py +6 -5
ultralytics/data/augment.py +127 -126
ultralytics/data/base.py +54 -51
ultralytics/data/build.py +47 -23
ultralytics/data/converter.py +47 -43
ultralytics/data/dataset.py +51 -50
ultralytics/data/loaders.py +77 -44
ultralytics/data/split.py +22 -9
ultralytics/data/split_dota.py +63 -39
ultralytics/data/utils.py +59 -39
ultralytics/engine/exporter.py +79 -27
ultralytics/engine/model.py +39 -39
ultralytics/engine/predictor.py +37 -28
ultralytics/engine/results.py +187 -157
ultralytics/engine/trainer.py +36 -19
ultralytics/engine/tuner.py +12 -9
ultralytics/engine/validator.py +7 -9
ultralytics/hub/__init__.py +11 -13
ultralytics/hub/auth.py +22 -2
ultralytics/hub/google/__init__.py +19 -19
ultralytics/hub/session.py +37 -51
ultralytics/hub/utils.py +19 -5
ultralytics/models/fastsam/model.py +30 -12
ultralytics/models/fastsam/predict.py +5 -6
ultralytics/models/fastsam/utils.py +3 -3
ultralytics/models/fastsam/val.py +10 -6
ultralytics/models/nas/model.py +9 -5
ultralytics/models/nas/predict.py +6 -6
ultralytics/models/nas/val.py +3 -3
ultralytics/models/rtdetr/model.py +7 -6
ultralytics/models/rtdetr/predict.py +14 -7
ultralytics/models/rtdetr/train.py +10 -4
ultralytics/models/rtdetr/val.py +36 -9
ultralytics/models/sam/amg.py +30 -12
ultralytics/models/sam/build.py +22 -22
ultralytics/models/sam/model.py +10 -9
ultralytics/models/sam/modules/blocks.py +76 -80
ultralytics/models/sam/modules/decoders.py +6 -8
ultralytics/models/sam/modules/encoders.py +23 -26
ultralytics/models/sam/modules/memory_attention.py +13 -1
ultralytics/models/sam/modules/sam.py +57 -26
ultralytics/models/sam/modules/tiny_encoder.py +232 -237
ultralytics/models/sam/modules/transformer.py +13 -13
ultralytics/models/sam/modules/utils.py +11 -19
ultralytics/models/sam/predict.py +114 -101
ultralytics/models/utils/loss.py +98 -77
ultralytics/models/utils/ops.py +116 -67
ultralytics/models/yolo/classify/predict.py +5 -5
ultralytics/models/yolo/classify/train.py +32 -28
ultralytics/models/yolo/classify/val.py +7 -8
ultralytics/models/yolo/detect/predict.py +1 -0
ultralytics/models/yolo/detect/train.py +15 -14
ultralytics/models/yolo/detect/val.py +37 -36
ultralytics/models/yolo/model.py +106 -23
ultralytics/models/yolo/obb/predict.py +3 -4
ultralytics/models/yolo/obb/train.py +14 -6
ultralytics/models/yolo/obb/val.py +29 -23
ultralytics/models/yolo/pose/predict.py +9 -8
ultralytics/models/yolo/pose/train.py +24 -16
ultralytics/models/yolo/pose/val.py +44 -26
ultralytics/models/yolo/segment/predict.py +5 -5
ultralytics/models/yolo/segment/train.py +11 -7
ultralytics/models/yolo/segment/val.py +2 -2
ultralytics/models/yolo/world/train.py +33 -23
ultralytics/models/yolo/world/train_world.py +11 -3
ultralytics/models/yolo/yoloe/predict.py +11 -11
ultralytics/models/yolo/yoloe/train.py +73 -21
ultralytics/models/yolo/yoloe/train_seg.py +10 -7
ultralytics/models/yolo/yoloe/val.py +42 -18
ultralytics/nn/autobackend.py +59 -15
ultralytics/nn/modules/__init__.py +4 -4
ultralytics/nn/modules/activation.py +4 -1
ultralytics/nn/modules/block.py +178 -111
ultralytics/nn/modules/conv.py +6 -5
ultralytics/nn/modules/head.py +469 -121
ultralytics/nn/modules/transformer.py +147 -58
ultralytics/nn/tasks.py +227 -20
ultralytics/nn/text_model.py +30 -33
ultralytics/solutions/ai_gym.py +1 -1
ultralytics/solutions/analytics.py +7 -4
ultralytics/solutions/config.py +10 -10
ultralytics/solutions/distance_calculation.py +11 -10
ultralytics/solutions/heatmap.py +1 -1
ultralytics/solutions/instance_segmentation.py +6 -3
ultralytics/solutions/object_blurrer.py +3 -3
ultralytics/solutions/object_counter.py +16 -8
ultralytics/solutions/object_cropper.py +12 -5
ultralytics/solutions/parking_management.py +29 -28
ultralytics/solutions/queue_management.py +6 -6
ultralytics/solutions/region_counter.py +10 -3
ultralytics/solutions/security_alarm.py +3 -3
ultralytics/solutions/similarity_search.py +85 -24
ultralytics/solutions/solutions.py +215 -85
ultralytics/solutions/speed_estimation.py +28 -22
ultralytics/solutions/streamlit_inference.py +17 -12
ultralytics/solutions/trackzone.py +4 -4
ultralytics/trackers/basetrack.py +16 -23
ultralytics/trackers/bot_sort.py +30 -20
ultralytics/trackers/byte_tracker.py +70 -64
ultralytics/trackers/track.py +4 -8
ultralytics/trackers/utils/gmc.py +31 -58
ultralytics/trackers/utils/kalman_filter.py +37 -37
ultralytics/trackers/utils/matching.py +1 -1
ultralytics/utils/__init__.py +105 -89
ultralytics/utils/autobatch.py +16 -3
ultralytics/utils/autodevice.py +54 -24
ultralytics/utils/benchmarks.py +42 -28
ultralytics/utils/callbacks/base.py +3 -3
ultralytics/utils/callbacks/clearml.py +9 -9
ultralytics/utils/callbacks/comet.py +67 -25
ultralytics/utils/callbacks/dvc.py +7 -10
ultralytics/utils/callbacks/mlflow.py +2 -5
ultralytics/utils/callbacks/neptune.py +7 -13
ultralytics/utils/callbacks/raytune.py +1 -1
ultralytics/utils/callbacks/tensorboard.py +5 -6
ultralytics/utils/callbacks/wb.py +14 -14
ultralytics/utils/checks.py +14 -13
ultralytics/utils/dist.py +5 -5
ultralytics/utils/downloads.py +94 -67
ultralytics/utils/errors.py +5 -5
ultralytics/utils/export.py +61 -47
ultralytics/utils/files.py +23 -22
ultralytics/utils/instance.py +48 -52
ultralytics/utils/loss.py +78 -40
ultralytics/utils/metrics.py +186 -130
ultralytics/utils/ops.py +186 -190
ultralytics/utils/patches.py +15 -17
ultralytics/utils/plotting.py +71 -27
ultralytics/utils/tal.py +21 -15
ultralytics/utils/torch_utils.py +53 -50
ultralytics/utils/triton.py +5 -4
ultralytics/utils/tuner.py +5 -5
{ultralytics-8.3.142.dist-info → ultralytics-8.3.144.dist-info}/METADATA +1 -1
ultralytics-8.3.144.dist-info/RECORD +272 -0
ultralytics-8.3.142.dist-info/RECORD +0 -272
{ultralytics-8.3.142.dist-info → ultralytics-8.3.144.dist-info}/WHEEL +0 -0
{ultralytics-8.3.142.dist-info → ultralytics-8.3.144.dist-info}/entry_points.txt +0 -0
{ultralytics-8.3.142.dist-info → ultralytics-8.3.144.dist-info}/licenses/LICENSE +0 -0
{ultralytics-8.3.142.dist-info → ultralytics-8.3.144.dist-info}/top_level.txt +0 -0

ultralytics/nn/tasks.py CHANGED Viewed

@@ -94,7 +94,30 @@ from ultralytics.utils.torch_utils import (
 class BaseModel(torch.nn.Module):
-    """The BaseModel class serves as a base class for all the models in the Ultralytics YOLO family."""
+    """
+    Base class for all YOLO models in the Ultralytics family.
+    This class provides common functionality for YOLO models including forward pass handling, model fusion,
+    information display, and weight loading capabilities.
+    Attributes:
+        model (torch.nn.Module): The neural network model.
+        save (list): List of layer indices to save outputs from.
+        stride (torch.Tensor): Model stride values.
+    Methods:
+        forward: Perform forward pass for training or inference.
+        predict: Perform inference on input tensor.
+        fuse: Fuse Conv2d and BatchNorm2d layers for optimization.
+        info: Print model information.
+        load: Load weights into the model.
+        loss: Compute loss for training.
+    Examples:
+        Create a BaseModel instance
+        >>> model = BaseModel()
+        >>> model.info()  # Display model information
+    """
     def forward(self, x, *args, **kwargs):
         """
@@ -319,7 +342,33 @@ class BaseModel(torch.nn.Module):
 class DetectionModel(BaseModel):
-    """YOLO detection model."""
+    """
+    YOLO detection model.
+    This class implements the YOLO detection architecture, handling model initialization, forward pass,
+    augmented inference, and loss computation for object detection tasks.
+    Attributes:
+        yaml (dict): Model configuration dictionary.
+        model (torch.nn.Sequential): The neural network model.
+        save (list): List of layer indices to save outputs from.
+        names (dict): Class names dictionary.
+        inplace (bool): Whether to use inplace operations.
+        end2end (bool): Whether the model uses end-to-end detection.
+        stride (torch.Tensor): Model stride values.
+    Methods:
+        __init__: Initialize the YOLO detection model.
+        _predict_augment: Perform augmented inference.
+        _descale_pred: De-scale predictions following augmented inference.
+        _clip_augmented: Clip YOLO augmented inference tails.
+        init_criterion: Initialize the loss criterion.
+    Examples:
+        Initialize a detection model
+        >>> model = DetectionModel("yolo11n.yaml", ch=3, nc=80)
+        >>> results = model.predict(image_tensor)
+    """
     def __init__(self, cfg="yolo11n.yaml", ch=3, nc=None, verbose=True):
         """
@@ -447,7 +496,21 @@ class DetectionModel(BaseModel):
 class OBBModel(DetectionModel):
-    """YOLO Oriented Bounding Box (OBB) model."""
+    """
+    YOLO Oriented Bounding Box (OBB) model.
+    This class extends DetectionModel to handle oriented bounding box detection tasks, providing specialized
+    loss computation for rotated object detection.
+    Methods:
+        __init__: Initialize YOLO OBB model.
+        init_criterion: Initialize the loss criterion for OBB detection.
+    Examples:
+        Initialize an OBB model
+        >>> model = OBBModel("yolo11n-obb.yaml", ch=3, nc=80)
+        >>> results = model.predict(image_tensor)
+    """
     def __init__(self, cfg="yolo11n-obb.yaml", ch=3, nc=None, verbose=True):
         """
@@ -467,7 +530,21 @@ class OBBModel(DetectionModel):
 class SegmentationModel(DetectionModel):
-    """YOLO segmentation model."""
+    """
+    YOLO segmentation model.
+    This class extends DetectionModel to handle instance segmentation tasks, providing specialized
+    loss computation for pixel-level object detection and segmentation.
+    Methods:
+        __init__: Initialize YOLO segmentation model.
+        init_criterion: Initialize the loss criterion for segmentation.
+    Examples:
+        Initialize a segmentation model
+        >>> model = SegmentationModel("yolo11n-seg.yaml", ch=3, nc=80)
+        >>> results = model.predict(image_tensor)
+    """
     def __init__(self, cfg="yolo11n-seg.yaml", ch=3, nc=None, verbose=True):
         """
@@ -487,7 +564,24 @@ class SegmentationModel(DetectionModel):
 class PoseModel(DetectionModel):
-    """YOLO pose model."""
+    """
+    YOLO pose model.
+    This class extends DetectionModel to handle human pose estimation tasks, providing specialized
+    loss computation for keypoint detection and pose estimation.
+    Attributes:
+        kpt_shape (tuple): Shape of keypoints data (num_keypoints, num_dimensions).
+    Methods:
+        __init__: Initialize YOLO pose model.
+        init_criterion: Initialize the loss criterion for pose estimation.
+    Examples:
+        Initialize a pose model
+        >>> model = PoseModel("yolo11n-pose.yaml", ch=3, nc=1, data_kpt_shape=(17, 3))
+        >>> results = model.predict(image_tensor)
+    """
     def __init__(self, cfg="yolo11n-pose.yaml", ch=3, nc=None, data_kpt_shape=(None, None), verbose=True):
         """
@@ -513,7 +607,29 @@ class PoseModel(DetectionModel):
 class ClassificationModel(BaseModel):
-    """YOLO classification model."""
+    """
+    YOLO classification model.
+    This class implements the YOLO classification architecture for image classification tasks,
+    providing model initialization, configuration, and output reshaping capabilities.
+    Attributes:
+        yaml (dict): Model configuration dictionary.
+        model (torch.nn.Sequential): The neural network model.
+        stride (torch.Tensor): Model stride values.
+        names (dict): Class names dictionary.
+    Methods:
+        __init__: Initialize ClassificationModel.
+        _from_yaml: Set model configurations and define architecture.
+        reshape_outputs: Update model to specified class count.
+        init_criterion: Initialize the loss criterion.
+    Examples:
+        Initialize a classification model
+        >>> model = ClassificationModel("yolo11n-cls.yaml", ch=3, nc=1000)
+        >>> results = model.predict(image_tensor)
+    """
     def __init__(self, cfg="yolo11n-cls.yaml", ch=3, nc=None, verbose=True):
         """
@@ -594,10 +710,20 @@ class RTDETRDetectionModel(DetectionModel):
     the training and inference processes. RTDETR is an object detection and tracking model that extends from the
     DetectionModel base class.
+    Attributes:
+        nc (int): Number of classes for detection.
+        criterion (RTDETRDetectionLoss): Loss function for training.
     Methods:
-        init_criterion: Initializes the criterion used for loss calculation.
-        loss: Computes and returns the loss during training.
-        predict: Performs a forward pass through the network and returns the output.
+        __init__: Initialize the RTDETRDetectionModel.
+        init_criterion: Initialize the loss criterion.
+        loss: Compute loss for training.
+        predict: Perform forward pass through the model.
+    Examples:
+        Initialize an RTDETR model
+        >>> model = RTDETRDetectionModel("rtdetr-l.yaml", ch=3, nc=80)
+        >>> results = model.predict(image_tensor)
     """
     def __init__(self, cfg="rtdetr-l.yaml", ch=3, nc=None, verbose=True):
@@ -627,7 +753,8 @@ class RTDETRDetectionModel(DetectionModel):
             preds (torch.Tensor, optional): Precomputed model predictions.
         Returns:
-            (tuple): A tuple containing the total loss and main three losses in a tensor.
+            loss_sum (torch.Tensor): Total loss value.
+            loss_items (torch.Tensor): Main three losses in a tensor.
         """
         if not hasattr(self, "criterion"):
             self.criterion = self.init_criterion()
@@ -700,7 +827,29 @@ class RTDETRDetectionModel(DetectionModel):
 class WorldModel(DetectionModel):
-    """YOLOv8 World Model."""
+    """
+    YOLOv8 World Model.
+    This class implements the YOLOv8 World model for open-vocabulary object detection, supporting text-based
+    class specification and CLIP model integration for zero-shot detection capabilities.
+    Attributes:
+        txt_feats (torch.Tensor): Text feature embeddings for classes.
+        clip_model (torch.nn.Module): CLIP model for text encoding.
+    Methods:
+        __init__: Initialize YOLOv8 world model.
+        set_classes: Set classes for offline inference.
+        get_text_pe: Get text positional embeddings.
+        predict: Perform forward pass with text features.
+        loss: Compute loss with text features.
+    Examples:
+        Initialize a world model
+        >>> model = WorldModel("yolov8s-world.yaml", ch=3, nc=80)
+        >>> model.set_classes(["person", "car", "bicycle"])
+        >>> results = model.predict(image_tensor)
+    """
     def __init__(self, cfg="yolov8s-world.yaml", ch=3, nc=None, verbose=True):
         """
@@ -815,7 +964,32 @@ class WorldModel(DetectionModel):
 class YOLOEModel(DetectionModel):
-    """YOLOE detection model."""
+    """
+    YOLOE detection model.
+    This class implements the YOLOE architecture for efficient object detection with text and visual prompts,
+    supporting both prompt-based and prompt-free inference modes.
+    Attributes:
+        pe (torch.Tensor): Prompt embeddings for classes.
+        clip_model (torch.nn.Module): CLIP model for text encoding.
+    Methods:
+        __init__: Initialize YOLOE model.
+        get_text_pe: Get text positional embeddings.
+        get_visual_pe: Get visual embeddings.
+        set_vocab: Set vocabulary for prompt-free model.
+        get_vocab: Get fused vocabulary layer.
+        set_classes: Set classes for offline inference.
+        get_cls_pe: Get class positional embeddings.
+        predict: Perform forward pass with prompts.
+        loss: Compute loss with prompts.
+    Examples:
+        Initialize a YOLOE model
+        >>> model = YOLOEModel("yoloe-v8s.yaml", ch=3, nc=80)
+        >>> results = model.predict(image_tensor, tpe=text_embeddings)
+    """
     def __init__(self, cfg="yoloe-v8s.yaml", ch=3, nc=None, verbose=True):
         """
@@ -861,7 +1035,7 @@ class YOLOEModel(DetectionModel):
         assert not self.training
         head = self.model[-1]
         assert isinstance(head, YOLOEDetect)
-        return head.get_tpe(txt_feats)  # run axuiliary text head
+        return head.get_tpe(txt_feats)  # run auxiliary text head
     @smart_inference_mode()
     def get_visual_pe(self, img, visual):
@@ -1040,7 +1214,21 @@ class YOLOEModel(DetectionModel):
 class YOLOESegModel(YOLOEModel, SegmentationModel):
-    """YOLOE segmentation model."""
+    """
+    YOLOE segmentation model.
+    This class extends YOLOEModel to handle instance segmentation tasks with text and visual prompts,
+    providing specialized loss computation for pixel-level object detection and segmentation.
+    Methods:
+        __init__: Initialize YOLOE segmentation model.
+        loss: Compute loss with prompts for segmentation.
+    Examples:
+        Initialize a YOLOE segmentation model
+        >>> model = YOLOESegModel("yoloe-v8s-seg.yaml", ch=3, nc=80)
+        >>> results = model.predict(image_tensor, tpe=text_embeddings)
+    """
     def __init__(self, cfg="yoloe-v8s-seg.yaml", ch=3, nc=None, verbose=True):
         """
@@ -1074,7 +1262,23 @@ class YOLOESegModel(YOLOEModel, SegmentationModel):
 class Ensemble(torch.nn.ModuleList):
-    """Ensemble of models."""
+    """
+    Ensemble of models.
+    This class allows combining multiple YOLO models into an ensemble for improved performance through
+    model averaging or other ensemble techniques.
+    Methods:
+        __init__: Initialize an ensemble of models.
+        forward: Generate predictions from all models in the ensemble.
+    Examples:
+        Create an ensemble of models
+        >>> ensemble = Ensemble()
+        >>> ensemble.append(model1)
+        >>> ensemble.append(model2)
+        >>> results = ensemble(image_tensor)
+    """
     def __init__(self):
         """Initialize an ensemble of models."""
@@ -1091,7 +1295,8 @@ class Ensemble(torch.nn.ModuleList):
             visualize (bool): Whether to visualize the features.
         Returns:
-            (tuple): Tuple containing the concatenated predictions and None.
+            y (torch.Tensor): Concatenated predictions from all models.
+            train_out (None): Always None for ensemble inference.
         """
         y = [module(x, augment, profile, visualize)[0] for module in self]
         # y = torch.stack(y).max(0)[0]  # max ensemble
@@ -1195,7 +1400,7 @@ class SafeUnpickler(pickle.Unpickler):
 def torch_safe_load(weight, safe_only=False):
     """
-    Attempts to load a PyTorch model with the torch.load() function. If a ModuleNotFoundError is raised, it catches the
+    Attempt to load a PyTorch model with the torch.load() function. If a ModuleNotFoundError is raised, it catches the
     error, logs a warning message, and attempts to install the missing module via the check_requirements() function.
     After installation, the function again attempts to load the model using torch.load().
@@ -1329,7 +1534,8 @@ def attempt_load_one_weight(weight, device=None, inplace=True, fuse=False):
         fuse (bool): Whether to fuse model.
     Returns:
-        (tuple): Tuple containing the model and checkpoint.
+        model (torch.nn.Module): Loaded model.
+        ckpt (dict): Model checkpoint dictionary.
     """
     ckpt, weight = torch_safe_load(weight)  # load ckpt
     args = {**DEFAULT_CFG_DICT, **(ckpt.get("train_args", {}))}  # combine model and default args, preferring model args
@@ -1355,7 +1561,7 @@ def attempt_load_one_weight(weight, device=None, inplace=True, fuse=False):
     return model, ckpt
-def parse_model(d, ch, verbose=True):  # model_dict, input_channels(3)
+def parse_model(d, ch, verbose=True):
     """
     Parse a YOLO model.yaml dictionary into a PyTorch model.
@@ -1365,7 +1571,8 @@ def parse_model(d, ch, verbose=True):  # model_dict, input_channels(3)
         verbose (bool): Whether to print model details.
     Returns:
-        (tuple): Tuple containing the PyTorch model and sorted list of output layers.
+        model (torch.nn.Sequential): PyTorch model.
+        save (list): Sorted list of output layers.
     """
     import ast

ultralytics/nn/text_model.py CHANGED Viewed

@@ -2,6 +2,7 @@
 from abc import abstractmethod
 from pathlib import Path
+from typing import List, Union
 import torch
 import torch.nn as nn
@@ -21,11 +22,11 @@ class TextModel(nn.Module):
     Abstract base class for text encoding models.
     This class defines the interface for text encoding models used in vision-language tasks. Subclasses must implement
-    the tokenize and encode_text methods.
+    the tokenize and encode_text methods to provide text tokenization and encoding functionality.
     Methods:
-        tokenize: Convert input texts to tokens.
-        encode_text: Encode tokenized texts into feature vectors.
+        tokenize: Convert input texts to tokens for model processing.
+        encode_text: Encode tokenized texts into normalized feature vectors.
     """
     def __init__(self):
@@ -33,12 +34,12 @@ class TextModel(nn.Module):
         super().__init__()
     @abstractmethod
-    def tokenize(texts):
+    def tokenize(self, texts):
         """Convert input texts to tokens for model processing."""
         pass
     @abstractmethod
-    def encode_text(texts, dtype):
+    def encode_text(self, texts, dtype):
         """Encode tokenized texts into normalized feature vectors."""
         pass
@@ -59,7 +60,6 @@ class CLIP(TextModel):
         encode_text: Encode tokenized texts into normalized feature vectors.
     Examples:
-        >>> from ultralytics.models.sam import CLIP
         >>> import torch
         >>> device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
         >>> clip_model = CLIP(size="ViT-B/32", device=device)
@@ -68,7 +68,7 @@ class CLIP(TextModel):
         >>> print(text_features.shape)
     """
-    def __init__(self, size, device):
+    def __init__(self, size: str, device: torch.device):
         """
         Initialize the CLIP text encoder.
@@ -81,7 +81,6 @@ class CLIP(TextModel):
         Examples:
             >>> import torch
-            >>> from ultralytics.models.sam.modules.clip import CLIP
             >>> clip_model = CLIP("ViT-B/32", device=torch.device("cuda:0"))
             >>> text_features = clip_model.encode_text(["a photo of a cat", "a photo of a dog"])
         """
@@ -91,7 +90,7 @@ class CLIP(TextModel):
         self.device = device
         self.eval()
-    def tokenize(self, texts):
+    def tokenize(self, texts: Union[str, List[str]]):
         """
         Convert input texts to CLIP tokens.
@@ -109,7 +108,7 @@ class CLIP(TextModel):
         return clip.tokenize(texts).to(self.device)
     @smart_inference_mode()
-    def encode_text(self, texts, dtype=torch.float32):
+    def encode_text(self, texts: torch.Tensor, dtype: torch.dtype = torch.float32):
         """
         Encode tokenized texts into normalized feature vectors.
@@ -118,7 +117,7 @@ class CLIP(TextModel):
         Args:
             texts (torch.Tensor): Tokenized text inputs, typically created using the tokenize() method.
-            dtype (torch.dtype, optional): Data type for output features. Default is torch.float32.
+            dtype (torch.dtype, optional): Data type for output features.
         Returns:
             (torch.Tensor): Normalized text feature vectors with unit length (L2 norm = 1).
@@ -140,7 +139,7 @@ class MobileCLIP(TextModel):
     Implement Apple's MobileCLIP text encoder for efficient text encoding.
     This class implements the TextModel interface using Apple's MobileCLIP model, providing efficient text encoding
-    capabilities for vision-language tasks.
+    capabilities for vision-language tasks with reduced computational requirements compared to standard CLIP models.
     Attributes:
         model (mobileclip.model.MobileCLIP): The loaded MobileCLIP model.
@@ -161,7 +160,7 @@ class MobileCLIP(TextModel):
     config_size_map = {"s0": "s0", "s1": "s1", "s2": "s2", "b": "b", "blt": "b"}
-    def __init__(self, size, device):
+    def __init__(self, size: str, device: torch.device):
         """
         Initialize the MobileCLIP text encoder.
@@ -172,7 +171,6 @@ class MobileCLIP(TextModel):
             device (torch.device): Device to load the model on.
         Examples:
-            >>> from ultralytics.nn.modules import MobileCLIP
             >>> import torch
             >>> model = MobileCLIP("s0", device=torch.device("cpu"))
             >>> tokens = model.tokenize(["a photo of a cat", "a photo of a dog"])
@@ -203,12 +201,12 @@ class MobileCLIP(TextModel):
         self.device = device
         self.eval()
-    def tokenize(self, texts):
+    def tokenize(self, texts: List[str]):
         """
         Convert input texts to MobileCLIP tokens.
         Args:
-            texts (list[str]): List of text strings to tokenize.
+            texts (List[str]): List of text strings to tokenize.
         Returns:
             (torch.Tensor): Tokenized text inputs with shape (batch_size, sequence_length).
@@ -220,7 +218,7 @@ class MobileCLIP(TextModel):
         return self.tokenizer(texts).to(self.device)
     @smart_inference_mode()
-    def encode_text(self, texts, dtype=torch.float32):
+    def encode_text(self, texts: torch.Tensor, dtype: torch.dtype = torch.float32):
         """
         Encode tokenized texts into normalized feature vectors.
@@ -247,11 +245,11 @@ class MobileCLIPTS(TextModel):
     """
     Load a TorchScript traced version of MobileCLIP.
-    This class implements the TextModel interface using Apple's MobileCLIP model, providing efficient text encoding
-    capabilities for vision-language tasks.
+    This class implements the TextModel interface using Apple's MobileCLIP model in TorchScript format, providing
+    efficient text encoding capabilities for vision-language tasks with optimized inference performance.
     Attributes:
-        encoder (mobileclip.model.MobileCLIP): The loaded MobileCLIP text encoder.
+        encoder (torch.jit.ScriptModule): The loaded TorchScript MobileCLIP text encoder.
         tokenizer (callable): Tokenizer function for processing text inputs.
         device (torch.device): Device where the model is loaded.
@@ -261,24 +259,23 @@ class MobileCLIPTS(TextModel):
     Examples:
         >>> device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-        >>> text_encoder = MobileCLIP(device=device)
+        >>> text_encoder = MobileCLIPTS(device=device)
         >>> tokens = text_encoder.tokenize(["a photo of a cat", "a photo of a dog"])
         >>> features = text_encoder.encode_text(tokens)
     """
-    def __init__(self, device):
+    def __init__(self, device: torch.device):
         """
-        Initialize the MobileCLIP text encoder.
+        Initialize the MobileCLIP TorchScript text encoder.
-        This class implements the TextModel interface using Apple's MobileCLIP model for efficient text encoding.
+        This class implements the TextModel interface using Apple's MobileCLIP model in TorchScript format for
+        efficient text encoding with optimized inference performance.
         Args:
             device (torch.device): Device to load the model on.
         Examples:
-            >>> from ultralytics.nn.modules import MobileCLIP
-            >>> import torch
-            >>> model = MobileCLIP(device=torch.device("cpu"))
+            >>> model = MobileCLIPTS(device=torch.device("cpu"))
             >>> tokens = model.tokenize(["a photo of a cat", "a photo of a dog"])
             >>> features = model.encode_text(tokens)
         """
@@ -289,24 +286,24 @@ class MobileCLIPTS(TextModel):
         self.tokenizer = clip.clip.tokenize
         self.device = device
-    def tokenize(self, texts):
+    def tokenize(self, texts: List[str]):
         """
         Convert input texts to MobileCLIP tokens.
         Args:
-            texts (list[str]): List of text strings to tokenize.
+            texts (List[str]): List of text strings to tokenize.
         Returns:
             (torch.Tensor): Tokenized text inputs with shape (batch_size, sequence_length).
         Examples:
-            >>> model = MobileCLIP("cpu")
+            >>> model = MobileCLIPTS("cpu")
             >>> tokens = model.tokenize(["a photo of a cat", "a photo of a dog"])
         """
         return self.tokenizer(texts).to(self.device)
     @smart_inference_mode()
-    def encode_text(self, texts, dtype=torch.float32):
+    def encode_text(self, texts: torch.Tensor, dtype: torch.dtype = torch.float32):
         """
         Encode tokenized texts into normalized feature vectors.
@@ -318,7 +315,7 @@ class MobileCLIPTS(TextModel):
             (torch.Tensor): Normalized text feature vectors with L2 normalization applied.
         Examples:
-            >>> model = MobileCLIP(device="cpu")
+            >>> model = MobileCLIPTS(device="cpu")
             >>> tokens = model.tokenize(["a photo of a cat", "a photo of a dog"])
             >>> features = model.encode_text(tokens)
             >>> features.shape
@@ -328,7 +325,7 @@ class MobileCLIPTS(TextModel):
         return self.encoder(texts)
-def build_text_model(variant, device=None):
+def build_text_model(variant: str, device: torch.device = None):
     """
     Build a text encoding model based on the specified variant.

ultralytics/solutions/ai_gym.py CHANGED Viewed

@@ -19,7 +19,7 @@ class AIGym(BaseSolution):
         kpts (List[int]): Indices of keypoints used for angle calculation.
     Methods:
-        process: Processes a frame to detect poses, calculate angles, and count repetitions.
+        process: Process a frame to detect poses, calculate angles, and count repetitions.
     Examples:
         >>> gym = AIGym(model="yolo11n-pose.pt")

ultralytics/solutions/analytics.py CHANGED Viewed

@@ -1,6 +1,7 @@
 # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
 from itertools import cycle
+from typing import Dict, Optional
 import cv2
 import numpy as np
@@ -86,7 +87,7 @@ class Analytics(BaseSolution):
             if self.type == "pie":  # Ensure pie chart is circular
                 self.ax.axis("equal")
-    def process(self, im0, frame_number):
+    def process(self, im0: np.ndarray, frame_number: int) -> SolutionResults:
         """
         Process image data and run object tracking to update analytics charts.
@@ -126,14 +127,16 @@ class Analytics(BaseSolution):
         # return output dictionary with summary for more usage
         return SolutionResults(plot_im=plot_im, total_tracks=len(self.track_ids), classwise_count=self.clswise_count)
-    def update_graph(self, frame_number, count_dict=None, plot="line"):
+    def update_graph(
+        self, frame_number: int, count_dict: Optional[Dict[str, int]] = None, plot: str = "line"
+    ) -> np.ndarray:
         """
         Update the graph with new data for single or multiple classes.
         Args:
             frame_number (int): The current frame number.
-            count_dict (Dict[str, int] | None): Dictionary with class names as keys and counts as values for multiple
-                classes. If None, updates a single line graph.
+            count_dict (Dict[str, int], optional): Dictionary with class names as keys and counts as values for
+                multiple classes. If None, updates a single line graph.
             plot (str): Type of the plot. Options are 'line', 'bar', 'pie', or 'area'.
         Returns:

ultralytics 8.3.142__py3-none-any.whl → 8.3.144__py3-none-any.whl

ultralytics 8.3.142py3-none-any.whl → 8.3.144py3-none-any.whl