PyPI - ultralytics - Versions diffs - 8.3.143__py3-none-any.whl → 8.3.145__py3-none-any.whl - Mend

ultralytics 8.3.143py3-none-any.whl → 8.3.145py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (148) hide show

tests/conftest.py +7 -24
tests/test_cli.py +1 -1
tests/test_cuda.py +7 -2
tests/test_engine.py +7 -8
tests/test_exports.py +16 -16
tests/test_integrations.py +1 -1
tests/test_solutions.py +11 -11
ultralytics/__init__.py +1 -1
ultralytics/cfg/__init__.py +16 -13
ultralytics/data/annotator.py +6 -5
ultralytics/data/augment.py +127 -126
ultralytics/data/base.py +54 -51
ultralytics/data/build.py +47 -23
ultralytics/data/converter.py +47 -43
ultralytics/data/dataset.py +51 -50
ultralytics/data/loaders.py +77 -44
ultralytics/data/split.py +22 -9
ultralytics/data/split_dota.py +63 -39
ultralytics/data/utils.py +59 -39
ultralytics/engine/exporter.py +79 -27
ultralytics/engine/model.py +52 -51
ultralytics/engine/predictor.py +37 -28
ultralytics/engine/results.py +191 -161
ultralytics/engine/trainer.py +36 -19
ultralytics/engine/tuner.py +12 -9
ultralytics/engine/validator.py +7 -9
ultralytics/hub/__init__.py +11 -13
ultralytics/hub/auth.py +22 -2
ultralytics/hub/google/__init__.py +19 -19
ultralytics/hub/session.py +37 -51
ultralytics/hub/utils.py +19 -5
ultralytics/models/fastsam/model.py +30 -12
ultralytics/models/fastsam/predict.py +5 -6
ultralytics/models/fastsam/utils.py +3 -3
ultralytics/models/fastsam/val.py +10 -6
ultralytics/models/nas/model.py +9 -5
ultralytics/models/nas/predict.py +6 -6
ultralytics/models/nas/val.py +3 -3
ultralytics/models/rtdetr/model.py +7 -6
ultralytics/models/rtdetr/predict.py +14 -7
ultralytics/models/rtdetr/train.py +10 -4
ultralytics/models/rtdetr/val.py +36 -9
ultralytics/models/sam/amg.py +30 -12
ultralytics/models/sam/build.py +22 -22
ultralytics/models/sam/model.py +10 -9
ultralytics/models/sam/modules/blocks.py +76 -80
ultralytics/models/sam/modules/decoders.py +6 -8
ultralytics/models/sam/modules/encoders.py +23 -26
ultralytics/models/sam/modules/memory_attention.py +13 -1
ultralytics/models/sam/modules/sam.py +57 -26
ultralytics/models/sam/modules/tiny_encoder.py +232 -237
ultralytics/models/sam/modules/transformer.py +13 -13
ultralytics/models/sam/modules/utils.py +11 -19
ultralytics/models/sam/predict.py +114 -101
ultralytics/models/utils/loss.py +98 -77
ultralytics/models/utils/ops.py +116 -67
ultralytics/models/yolo/classify/predict.py +5 -5
ultralytics/models/yolo/classify/train.py +32 -28
ultralytics/models/yolo/classify/val.py +7 -8
ultralytics/models/yolo/detect/predict.py +1 -0
ultralytics/models/yolo/detect/train.py +15 -14
ultralytics/models/yolo/detect/val.py +37 -36
ultralytics/models/yolo/model.py +106 -23
ultralytics/models/yolo/obb/predict.py +3 -4
ultralytics/models/yolo/obb/train.py +14 -6
ultralytics/models/yolo/obb/val.py +29 -23
ultralytics/models/yolo/pose/predict.py +9 -8
ultralytics/models/yolo/pose/train.py +24 -16
ultralytics/models/yolo/pose/val.py +44 -26
ultralytics/models/yolo/segment/predict.py +5 -5
ultralytics/models/yolo/segment/train.py +11 -7
ultralytics/models/yolo/segment/val.py +2 -2
ultralytics/models/yolo/world/train.py +33 -23
ultralytics/models/yolo/world/train_world.py +11 -3
ultralytics/models/yolo/yoloe/predict.py +11 -11
ultralytics/models/yolo/yoloe/train.py +73 -21
ultralytics/models/yolo/yoloe/train_seg.py +10 -7
ultralytics/models/yolo/yoloe/val.py +42 -18
ultralytics/nn/autobackend.py +59 -15
ultralytics/nn/modules/__init__.py +4 -4
ultralytics/nn/modules/activation.py +4 -1
ultralytics/nn/modules/block.py +178 -111
ultralytics/nn/modules/conv.py +6 -5
ultralytics/nn/modules/head.py +469 -121
ultralytics/nn/modules/transformer.py +147 -58
ultralytics/nn/tasks.py +227 -20
ultralytics/nn/text_model.py +30 -33
ultralytics/solutions/ai_gym.py +4 -6
ultralytics/solutions/analytics.py +7 -4
ultralytics/solutions/config.py +10 -10
ultralytics/solutions/distance_calculation.py +11 -10
ultralytics/solutions/heatmap.py +2 -2
ultralytics/solutions/instance_segmentation.py +7 -4
ultralytics/solutions/object_blurrer.py +3 -3
ultralytics/solutions/object_counter.py +15 -11
ultralytics/solutions/object_cropper.py +3 -2
ultralytics/solutions/parking_management.py +29 -28
ultralytics/solutions/queue_management.py +6 -6
ultralytics/solutions/region_counter.py +10 -3
ultralytics/solutions/security_alarm.py +3 -3
ultralytics/solutions/similarity_search.py +85 -24
ultralytics/solutions/solutions.py +189 -79
ultralytics/solutions/speed_estimation.py +28 -22
ultralytics/solutions/streamlit_inference.py +17 -12
ultralytics/solutions/trackzone.py +4 -4
ultralytics/trackers/basetrack.py +16 -23
ultralytics/trackers/bot_sort.py +30 -20
ultralytics/trackers/byte_tracker.py +70 -64
ultralytics/trackers/track.py +4 -8
ultralytics/trackers/utils/gmc.py +31 -58
ultralytics/trackers/utils/kalman_filter.py +37 -37
ultralytics/trackers/utils/matching.py +1 -1
ultralytics/utils/__init__.py +105 -89
ultralytics/utils/autobatch.py +16 -3
ultralytics/utils/autodevice.py +54 -24
ultralytics/utils/benchmarks.py +45 -29
ultralytics/utils/callbacks/base.py +3 -3
ultralytics/utils/callbacks/clearml.py +9 -9
ultralytics/utils/callbacks/comet.py +67 -25
ultralytics/utils/callbacks/dvc.py +7 -10
ultralytics/utils/callbacks/mlflow.py +2 -5
ultralytics/utils/callbacks/neptune.py +7 -13
ultralytics/utils/callbacks/raytune.py +1 -1
ultralytics/utils/callbacks/tensorboard.py +5 -6
ultralytics/utils/callbacks/wb.py +14 -14
ultralytics/utils/checks.py +14 -13
ultralytics/utils/dist.py +5 -5
ultralytics/utils/downloads.py +94 -67
ultralytics/utils/errors.py +5 -5
ultralytics/utils/export.py +61 -47
ultralytics/utils/files.py +23 -22
ultralytics/utils/instance.py +48 -52
ultralytics/utils/loss.py +78 -40
ultralytics/utils/metrics.py +186 -130
ultralytics/utils/ops.py +186 -190
ultralytics/utils/patches.py +15 -17
ultralytics/utils/plotting.py +71 -27
ultralytics/utils/tal.py +21 -15
ultralytics/utils/torch_utils.py +53 -50
ultralytics/utils/triton.py +5 -4
ultralytics/utils/tuner.py +5 -5
{ultralytics-8.3.143.dist-info → ultralytics-8.3.145.dist-info}/METADATA +2 -2
ultralytics-8.3.145.dist-info/RECORD +272 -0
ultralytics-8.3.143.dist-info/RECORD +0 -272
{ultralytics-8.3.143.dist-info → ultralytics-8.3.145.dist-info}/WHEEL +0 -0
{ultralytics-8.3.143.dist-info → ultralytics-8.3.145.dist-info}/entry_points.txt +0 -0
{ultralytics-8.3.143.dist-info → ultralytics-8.3.145.dist-info}/licenses/LICENSE +0 -0
{ultralytics-8.3.143.dist-info → ultralytics-8.3.145.dist-info}/top_level.txt +0 -0

ultralytics/models/yolo/pose/val.py CHANGED Viewed

@@ -1,6 +1,7 @@
 # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
 from pathlib import Path
+from typing import Any, Dict, List, Optional, Tuple
 import numpy as np
 import torch
@@ -26,18 +27,20 @@ class PoseValidator(DetectionValidator):
         metrics (PoseMetrics): Metrics object for pose evaluation.
     Methods:
-        preprocess: Preprocesses batch data for pose validation.
-        get_desc: Returns description of evaluation metrics.
-        init_metrics: Initializes pose metrics for the model.
-        _prepare_batch: Prepares a batch for processing.
-        _prepare_pred: Prepares and scales predictions for evaluation.
-        update_metrics: Updates metrics with new predictions.
-        _process_batch: Processes batch to compute IoU between detections and ground truth.
-        plot_val_samples: Plots validation samples with ground truth annotations.
-        plot_predictions: Plots model predictions.
-        save_one_txt: Saves detections to a text file.
-        pred_to_json: Converts predictions to COCO JSON format.
-        eval_json: Evaluates model using COCO JSON format.
+        preprocess: Preprocess batch by converting keypoints data to float and moving it to the device.
+        get_desc: Return description of evaluation metrics in string format.
+        init_metrics: Initialize pose estimation metrics for YOLO model.
+        _prepare_batch: Prepare a batch for processing by converting keypoints to float and scaling to original
+            dimensions.
+        _prepare_pred: Prepare and scale keypoints in predictions for pose processing.
+        update_metrics: Update metrics with new predictions and ground truth data.
+        _process_batch: Return correct prediction matrix by computing Intersection over Union (IoU) between
+            detections and ground truth.
+        plot_val_samples: Plot and save validation set samples with ground truth bounding boxes and keypoints.
+        plot_predictions: Plot and save model predictions with bounding boxes and keypoints.
+        save_one_txt: Save YOLO pose detections to a text file in normalized coordinates.
+        pred_to_json: Convert YOLO predictions to COCO JSON format.
+        eval_json: Evaluate object detection model using COCO JSON format.
     Examples:
         >>> from ultralytics.models.yolo.pose import PoseValidator
@@ -82,13 +85,13 @@ class PoseValidator(DetectionValidator):
                 "See https://github.com/ultralytics/ultralytics/issues/4031."
             )
-    def preprocess(self, batch):
+    def preprocess(self, batch: Dict[str, Any]) -> Dict[str, Any]:
         """Preprocess batch by converting keypoints data to float and moving it to the device."""
         batch = super().preprocess(batch)
         batch["keypoints"] = batch["keypoints"].to(self.device).float()
         return batch
-    def get_desc(self):
+    def get_desc(self) -> str:
         """Return description of evaluation metrics in string format."""
         return ("%22s" + "%11s" * 10) % (
             "Class",
@@ -113,7 +116,7 @@ class PoseValidator(DetectionValidator):
         self.sigma = OKS_SIGMA if is_pose else np.ones(nkpt) / nkpt
         self.stats = dict(tp_p=[], tp=[], conf=[], pred_cls=[], target_cls=[], target_img=[])
-    def _prepare_batch(self, si, batch):
+    def _prepare_batch(self, si: int, batch: Dict[str, Any]) -> Dict[str, Any]:
         """
         Prepare a batch for processing by converting keypoints to float and scaling to original dimensions.
@@ -122,7 +125,7 @@ class PoseValidator(DetectionValidator):
             batch (dict): Dictionary containing batch data with keys like 'keypoints', 'batch_idx', etc.
         Returns:
-            pbatch (dict): Prepared batch with keypoints scaled to original image dimensions.
+            (dict): Prepared batch with keypoints scaled to original image dimensions.
         Notes:
             This method extends the parent class's _prepare_batch method by adding keypoint processing.
@@ -138,7 +141,7 @@ class PoseValidator(DetectionValidator):
         pbatch["kpts"] = kpts
         return pbatch
-    def _prepare_pred(self, pred, pbatch):
+    def _prepare_pred(self, pred: torch.Tensor, pbatch: Dict[str, Any]) -> Tuple[torch.Tensor, torch.Tensor]:
         """
         Prepare and scale keypoints in predictions for pose processing.
@@ -155,6 +158,7 @@ class PoseValidator(DetectionValidator):
         Returns:
             predn (torch.Tensor): Processed prediction boxes scaled to original image dimensions.
+            pred_kpts (torch.Tensor): Predicted keypoints scaled to original image dimensions.
         """
         predn = super()._prepare_pred(pred, pbatch)
         nk = pbatch["kpts"].shape[1]
@@ -162,7 +166,7 @@ class PoseValidator(DetectionValidator):
         ops.scale_coords(pbatch["imgsz"], pred_kpts, pbatch["ori_shape"], ratio_pad=pbatch["ratio_pad"])
         return predn, pred_kpts
-    def update_metrics(self, preds, batch):
+    def update_metrics(self, preds: List[torch.Tensor], batch: Dict[str, Any]):
         """
         Update metrics with new predictions and ground truth data.
@@ -224,7 +228,14 @@ class PoseValidator(DetectionValidator):
                     self.save_dir / "labels" / f"{Path(batch['im_file'][si]).stem}.txt",
                 )
-    def _process_batch(self, detections, gt_bboxes, gt_cls, pred_kpts=None, gt_kpts=None):
+    def _process_batch(
+        self,
+        detections: torch.Tensor,
+        gt_bboxes: torch.Tensor,
+        gt_cls: torch.Tensor,
+        pred_kpts: Optional[torch.Tensor] = None,
+        gt_kpts: Optional[torch.Tensor] = None,
+    ) -> torch.Tensor:
         """
         Return correct prediction matrix by computing Intersection over Union (IoU) between detections and ground truth.
@@ -234,9 +245,9 @@ class PoseValidator(DetectionValidator):
             gt_bboxes (torch.Tensor): Tensor with shape (M, 4) representing ground truth bounding boxes, where each
                 box is of the format (x1, y1, x2, y2).
             gt_cls (torch.Tensor): Tensor with shape (M,) representing ground truth class indices.
-            pred_kpts (torch.Tensor | None): Optional tensor with shape (N, 51) representing predicted keypoints, where
+            pred_kpts (torch.Tensor, optional): Tensor with shape (N, 51) representing predicted keypoints, where
                 51 corresponds to 17 keypoints each having 3 values.
-            gt_kpts (torch.Tensor | None): Optional tensor with shape (N, 51) representing ground truth keypoints.
+            gt_kpts (torch.Tensor, optional): Tensor with shape (N, 51) representing ground truth keypoints.
         Returns:
             (torch.Tensor): A tensor with shape (N, 10) representing the correct prediction matrix for 10 IoU levels,
@@ -255,7 +266,7 @@ class PoseValidator(DetectionValidator):
         return self.match_predictions(detections[:, 5], gt_cls, iou)
-    def plot_val_samples(self, batch, ni):
+    def plot_val_samples(self, batch: Dict[str, Any], ni: int):
         """
         Plot and save validation set samples with ground truth bounding boxes and keypoints.
@@ -281,7 +292,7 @@ class PoseValidator(DetectionValidator):
             on_plot=self.on_plot,
         )
-    def plot_predictions(self, batch, preds, ni):
+    def plot_predictions(self, batch: Dict[str, Any], preds: List[torch.Tensor], ni: int):
         """
         Plot and save model predictions with bounding boxes and keypoints.
@@ -305,7 +316,14 @@ class PoseValidator(DetectionValidator):
             on_plot=self.on_plot,
         )  # pred
-    def save_one_txt(self, predn, pred_kpts, save_conf, shape, file):
+    def save_one_txt(
+        self,
+        predn: torch.Tensor,
+        pred_kpts: torch.Tensor,
+        save_conf: bool,
+        shape: Tuple[int, int],
+        file: Path,
+    ):
         """
         Save YOLO pose detections to a text file in normalized coordinates.
@@ -331,7 +349,7 @@ class PoseValidator(DetectionValidator):
             keypoints=pred_kpts,
         ).save_txt(file, save_conf=save_conf)
-    def pred_to_json(self, predn, filename):
+    def pred_to_json(self, predn: torch.Tensor, filename: str):
         """
         Convert YOLO predictions to COCO JSON format.
@@ -364,7 +382,7 @@ class PoseValidator(DetectionValidator):
                 }
             )
-    def eval_json(self, stats):
+    def eval_json(self, stats: Dict[str, Any]) -> Dict[str, Any]:
         """Evaluate object detection model using COCO JSON format."""
         if self.args.save_json and self.is_coco and len(self.jdict):
             anno_json = self.data["path"] / "annotations/person_keypoints_val2017.json"  # annotations

ultralytics/models/yolo/segment/predict.py CHANGED Viewed

@@ -18,9 +18,9 @@ class SegmentationPredictor(DetectionPredictor):
         batch (list): Current batch of images being processed.
     Methods:
-        postprocess: Applies non-max suppression and processes detections.
-        construct_results: Constructs a list of result objects from predictions.
-        construct_result: Constructs a single result object from a prediction.
+        postprocess: Apply non-max suppression and process segmentation detections.
+        construct_results: Construct a list of result objects from predictions.
+        construct_result: Construct a single result object from a prediction.
     Examples:
         >>> from ultralytics.utils import ASSETS
@@ -38,7 +38,7 @@ class SegmentationPredictor(DetectionPredictor):
         prediction results.
         Args:
-            cfg (dict): Configuration for the predictor. Defaults to Ultralytics DEFAULT_CFG.
+            cfg (dict): Configuration for the predictor.
             overrides (dict, optional): Configuration overrides that take precedence over cfg.
             _callbacks (list, optional): List of callback functions to be invoked during prediction.
         """
@@ -56,7 +56,7 @@ class SegmentationPredictor(DetectionPredictor):
         Returns:
             (list): List of Results objects containing the segmentation predictions for each image in the batch.
-                   Each Results object includes both bounding boxes and segmentation masks.
+                Each Results object includes both bounding boxes and segmentation masks.
         Examples:
             >>> predictor = SegmentationPredictor(overrides=dict(model="yolo11n-seg.pt"))

ultralytics/models/yolo/segment/train.py CHANGED Viewed

@@ -1,6 +1,8 @@
 # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
 from copy import copy
+from pathlib import Path
+from typing import Dict, Optional, Union
 from ultralytics.models import yolo
 from ultralytics.nn.tasks import SegmentationModel
@@ -25,7 +27,7 @@ class SegmentationTrainer(yolo.detect.DetectionTrainer):
         >>> trainer.train()
     """
-    def __init__(self, cfg=DEFAULT_CFG, overrides=None, _callbacks=None):
+    def __init__(self, cfg=DEFAULT_CFG, overrides: Optional[Dict] = None, _callbacks=None):
         """
         Initialize a SegmentationTrainer object.
@@ -33,7 +35,7 @@ class SegmentationTrainer(yolo.detect.DetectionTrainer):
         functionality. It sets the task to 'segment' and prepares the trainer for training segmentation models.
         Args:
-            cfg (dict): Configuration dictionary with default training settings. Defaults to DEFAULT_CFG.
+            cfg (dict): Configuration dictionary with default training settings.
             overrides (dict, optional): Dictionary of parameter overrides for the default configuration.
             _callbacks (list, optional): List of callback functions to be executed during training.
@@ -48,13 +50,15 @@ class SegmentationTrainer(yolo.detect.DetectionTrainer):
         overrides["task"] = "segment"
         super().__init__(cfg, overrides, _callbacks)
-    def get_model(self, cfg=None, weights=None, verbose=True):
+    def get_model(
+        self, cfg: Optional[Union[Dict, str]] = None, weights: Optional[Union[str, Path]] = None, verbose: bool = True
+    ):
         """
         Initialize and return a SegmentationModel with specified configuration and weights.
         Args:
-            cfg (dict | str | None): Model configuration. Can be a dictionary, a path to a YAML file, or None.
-            weights (str | Path | None): Path to pretrained weights file.
+            cfg (dict | str, optional): Model configuration. Can be a dictionary, a path to a YAML file, or None.
+            weights (str | Path, optional): Path to pretrained weights file.
             verbose (bool): Whether to display model information during initialization.
         Returns:
@@ -78,7 +82,7 @@ class SegmentationTrainer(yolo.detect.DetectionTrainer):
             self.test_loader, save_dir=self.save_dir, args=copy(self.args), _callbacks=self.callbacks
         )
-    def plot_training_samples(self, batch, ni):
+    def plot_training_samples(self, batch: Dict, ni: int):
         """
         Plot training sample images with labels, bounding boxes, and masks.
@@ -119,5 +123,5 @@ class SegmentationTrainer(yolo.detect.DetectionTrainer):
         )
     def plot_metrics(self):
-        """Plots training/val metrics."""
+        """Plot training/validation metrics."""
         plot_results(file=self.csv, segment=True, on_plot=self.on_plot)  # save results.png

ultralytics/models/yolo/segment/val.py CHANGED Viewed

@@ -69,7 +69,7 @@ class SegmentationValidator(DetectionValidator):
         self.plot_masks = []
         if self.args.save_json:
             check_requirements("pycocotools>=2.0.6")
-        # more accurate vs faster
+        # More accurate vs faster
         self.process = ops.process_mask_native if self.args.save_json or self.args.save_txt else ops.process_mask
         self.stats = dict(tp_m=[], tp=[], conf=[], pred_cls=[], target_cls=[], target_img=[])
@@ -247,7 +247,7 @@ class SegmentationValidator(DetectionValidator):
         Returns:
             (torch.Tensor): A correct prediction matrix of shape (N, 10), where 10 represents different IoU levels.
-        Note:
+        Notes:
             - If `masks` is True, the function computes IoU between predicted and ground truth masks.
             - If `overlap` is True and `masks` is True, overlapping masks are taken into account when computing IoU.

ultralytics/models/yolo/world/train.py CHANGED Viewed

@@ -2,6 +2,7 @@
 import itertools
 from pathlib import Path
+from typing import Any, Dict, List, Optional
 import torch
@@ -12,8 +13,8 @@ from ultralytics.utils import DEFAULT_CFG, LOGGER, RANK
 from ultralytics.utils.torch_utils import de_parallel
-def on_pretrain_routine_end(trainer):
-    """Callback to set up model classes and text encoder at the end of the pretrain routine."""
+def on_pretrain_routine_end(trainer) -> None:
+    """Set up model classes and text encoder at the end of the pretrain routine."""
     if RANK in {-1, 0}:
         # Set class names for evaluation
         names = [name.split("/", 1)[0] for name in list(trainer.test_loader.dataset.data["names"].values())]
@@ -22,45 +23,54 @@ def on_pretrain_routine_end(trainer):
 class WorldTrainer(DetectionTrainer):
     """
-    A class to fine-tune a world model on a close-set dataset.
+    A trainer class for fine-tuning YOLO World models on close-set datasets.
-    This trainer extends the DetectionTrainer to support training YOLO World models, which combine
-    visual and textual features for improved object detection and understanding.
+    This trainer extends the DetectionTrainer to support training YOLO World models, which combine visual and textual
+    features for improved object detection and understanding. It handles text embedding generation and caching to
+    accelerate training with multi-modal data.
     Attributes:
-        clip (module): The CLIP module for text-image understanding.
-        text_model (module): The text encoder model from CLIP.
+        text_embeddings (Dict[str, torch.Tensor] | None): Cached text embeddings for category names to accelerate
+            training.
         model (WorldModel): The YOLO World model being trained.
-        data (dict): Dataset configuration containing class information.
-        args (dict): Training arguments and configuration.
+        data (Dict[str, Any]): Dataset configuration containing class information.
+        args (Any): Training arguments and configuration.
+    Methods:
+        get_model: Return WorldModel initialized with specified config and weights.
+        build_dataset: Build YOLO Dataset for training or validation.
+        set_text_embeddings: Set text embeddings for datasets to accelerate training.
+        generate_text_embeddings: Generate text embeddings for a list of text samples.
+        preprocess_batch: Preprocess a batch of images and text for YOLOWorld training.
     Examples:
-        >>> from ultralytics.models.yolo.world import WorldModel
+        Initialize and train a YOLO World model
+        >>> from ultralytics.models.yolo.world import WorldTrainer
         >>> args = dict(model="yolov8s-world.pt", data="coco8.yaml", epochs=3)
         >>> trainer = WorldTrainer(overrides=args)
         >>> trainer.train()
     """
-    def __init__(self, cfg=DEFAULT_CFG, overrides=None, _callbacks=None):
+    def __init__(self, cfg=DEFAULT_CFG, overrides: Optional[Dict[str, Any]] = None, _callbacks=None):
         """
         Initialize a WorldTrainer object with given arguments.
         Args:
-            cfg (dict): Configuration for the trainer.
-            overrides (dict, optional): Configuration overrides.
-            _callbacks (list, optional): List of callback functions.
+            cfg (Dict[str, Any]): Configuration for the trainer.
+            overrides (Dict[str, Any], optional): Configuration overrides.
+            _callbacks (List[Any], optional): List of callback functions.
         """
         if overrides is None:
             overrides = {}
         super().__init__(cfg, overrides, _callbacks)
         self.text_embeddings = None
-    def get_model(self, cfg=None, weights=None, verbose=True):
+    def get_model(self, cfg=None, weights: Optional[str] = None, verbose: bool = True) -> WorldModel:
         """
         Return WorldModel initialized with specified config and weights.
         Args:
-            cfg (Dict | str, optional): Model configuration.
+            cfg (Dict[str, Any] | str, optional): Model configuration.
             weights (str, optional): Path to pretrained weights.
             verbose (bool): Whether to display model info.
@@ -81,7 +91,7 @@ class WorldTrainer(DetectionTrainer):
         return model
-    def build_dataset(self, img_path, mode="train", batch=None):
+    def build_dataset(self, img_path: str, mode: str = "train", batch: Optional[int] = None):
         """
         Build YOLO Dataset for training or validation.
@@ -91,7 +101,7 @@ class WorldTrainer(DetectionTrainer):
             batch (int, optional): Size of batches, this is for `rect`.
         Returns:
-            (Dataset): YOLO dataset configured for training or validation.
+            (Any): YOLO dataset configured for training or validation.
         """
         gs = max(int(de_parallel(self.model).stride.max() if self.model else 0), 32)
         dataset = build_yolo_dataset(
@@ -101,7 +111,7 @@ class WorldTrainer(DetectionTrainer):
             self.set_text_embeddings([dataset], batch)  # cache text embeddings to accelerate training
         return dataset
-    def set_text_embeddings(self, datasets, batch):
+    def set_text_embeddings(self, datasets: List[Any], batch: Optional[int]) -> None:
         """
         Set text embeddings for datasets to accelerate training by caching category names.
@@ -109,7 +119,7 @@ class WorldTrainer(DetectionTrainer):
         for these categories to improve training efficiency.
         Args:
-            datasets (List[Dataset]): List of datasets from which to extract category names.
+            datasets (List[Any]): List of datasets from which to extract category names.
             batch (int | None): Batch size used for processing.
         Notes:
@@ -127,7 +137,7 @@ class WorldTrainer(DetectionTrainer):
             )
         self.text_embeddings = text_embeddings
-    def generate_text_embeddings(self, texts, batch, cache_dir):
+    def generate_text_embeddings(self, texts: List[str], batch: int, cache_dir: Path) -> Dict[str, torch.Tensor]:
         """
         Generate text embeddings for a list of text samples.
@@ -137,7 +147,7 @@ class WorldTrainer(DetectionTrainer):
             cache_dir (Path): Directory to save/load cached embeddings.
         Returns:
-            (dict): Dictionary mapping text samples to their embeddings.
+            (Dict[str, torch.Tensor]): Dictionary mapping text samples to their embeddings.
         """
         model = "clip:ViT-B/32"
         cache_path = cache_dir / f"text_embeddings_{model.replace(':', '_').replace('/', '_')}.pt"
@@ -153,7 +163,7 @@ class WorldTrainer(DetectionTrainer):
         torch.save(txt_map, cache_path)
         return txt_map
-    def preprocess_batch(self, batch):
+    def preprocess_batch(self, batch: Dict[str, Any]) -> Dict[str, Any]:
         """Preprocess a batch of images and text for YOLOWorld training."""
         batch = DetectionTrainer.preprocess_batch(self, batch)

ultralytics/models/yolo/world/train_world.py CHANGED Viewed

@@ -18,6 +18,14 @@ class WorldTrainerFromScratch(WorldTrainer):
         cfg (dict): Configuration dictionary with default parameters for model training.
         overrides (dict): Dictionary of parameter overrides to customize the configuration.
         _callbacks (list): List of callback functions to be executed during different stages of training.
+        data (dict): Final processed data configuration containing train/val paths and metadata.
+        training_data (dict): Dictionary mapping training dataset paths to their configurations.
+    Methods:
+        build_dataset: Build YOLO Dataset for training or validation with mixed dataset support.
+        get_dataset: Get train and validation paths from data dictionary.
+        plot_training_labels: Skip label plotting for YOLO-World training.
+        final_eval: Perform final evaluation and validation for the YOLO-World model.
     Examples:
         >>> from ultralytics.models.yolo.world.train_world import WorldTrainerFromScratch
@@ -111,8 +119,8 @@ class WorldTrainerFromScratch(WorldTrainer):
         handling both YOLO detection datasets and grounding datasets.
         Returns:
-            (str): Train dataset path.
-            (str): Validation dataset path.
+            train_path (str): Train dataset path.
+            val_path (str): Validation dataset path.
         Raises:
             AssertionError: If train or validation datasets are not found, or if validation has multiple datasets.
@@ -159,7 +167,7 @@ class WorldTrainerFromScratch(WorldTrainer):
         return final_data
     def plot_training_labels(self):
-        """Do not plot labels for YOLO-World training."""
+        """Skip label plotting for YOLO-World training."""
         pass
     def final_eval(self):

ultralytics/models/yolo/yoloe/predict.py CHANGED Viewed

@@ -18,23 +18,23 @@ class YOLOEVPDetectPredictor(DetectionPredictor):
     Attributes:
         model (torch.nn.Module): The YOLO model for inference.
         device (torch.device): Device to run the model on (CPU or CUDA).
-        prompts (dict): Visual prompts containing class indices and bounding boxes or masks.
+        prompts (dict | torch.Tensor): Visual prompts containing class indices and bounding boxes or masks.
     Methods:
         setup_model: Initialize the YOLO model and set it to evaluation mode.
-        set_return_vpe: Set whether to return visual prompt embeddings.
         set_prompts: Set the visual prompts for the model.
         pre_transform: Preprocess images and prompts before inference.
         inference: Run inference with visual prompts.
+        get_vpe: Process source to get visual prompt embeddings.
     """
-    def setup_model(self, model, verbose=True):
+    def setup_model(self, model, verbose: bool = True):
         """
-        Sets up the model for prediction.
+        Set up the model for prediction.
         Args:
             model (torch.nn.Module): Model to load or use.
-            verbose (bool): If True, provides detailed logging.
+            verbose (bool, optional): If True, provides detailed logging.
         """
         super().setup_model(model, verbose=verbose)
         self.done_warmup = True
@@ -95,17 +95,17 @@ class YOLOEVPDetectPredictor(DetectionPredictor):
     def _process_single_image(self, dst_shape, src_shape, category, bboxes=None, masks=None):
         """
-        Processes a single image by resizing bounding boxes or masks and generating visuals.
+        Process a single image by resizing bounding boxes or masks and generating visuals.
         Args:
             dst_shape (tuple): The target shape (height, width) of the image.
             src_shape (tuple): The original shape (height, width) of the image.
             category (str): The category of the image for visual prompts.
-            bboxes (list | np.ndarray, optional): A list of bounding boxes in the format [x1, y1, x2, y2]. Defaults to None.
-            masks (np.ndarray, optional): A list of masks corresponding to the image. Defaults to None.
+            bboxes (list | np.ndarray, optional): A list of bounding boxes in the format [x1, y1, x2, y2].
+            masks (np.ndarray, optional): A list of masks corresponding to the image.
         Returns:
-            visuals: The processed visuals for the image.
+            (torch.Tensor): The processed visuals for the image.
         Raises:
             ValueError: If neither `bboxes` nor `masks` are provided.
@@ -146,7 +146,7 @@ class YOLOEVPDetectPredictor(DetectionPredictor):
     def get_vpe(self, source):
         """
-        Processes the source to get the visual prompt embeddings (VPE).
+        Process the source to get the visual prompt embeddings (VPE).
         Args:
             source (str | Path | int | PIL.Image | np.ndarray | torch.Tensor | List | Tuple): The source
@@ -164,6 +164,6 @@ class YOLOEVPDetectPredictor(DetectionPredictor):
 class YOLOEVPSegPredictor(YOLOEVPDetectPredictor, SegmentationPredictor):
-    """Predictor for YOLOE VP segmentation."""
+    """Predictor for YOLO-EVP segmentation tasks combining detection and segmentation capabilities."""
     pass

ultralytics 8.3.143__py3-none-any.whl → 8.3.145__py3-none-any.whl

ultralytics 8.3.143py3-none-any.whl → 8.3.145py3-none-any.whl