PyPI - ultralytics - Versions diffs - 8.3.37__py3-none-any.whl → 8.3.39__py3-none-any.whl - Mend

ultralytics 8.3.37py3-none-any.whl → 8.3.39py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (31) hide show

ultralytics/__init__.py +1 -1
ultralytics/cfg/__init__.py +5 -5
ultralytics/cfg/default.yaml +0 -1
ultralytics/data/augment.py +3 -4
ultralytics/data/loaders.py +1 -1
ultralytics/engine/exporter.py +1 -0
ultralytics/engine/model.py +29 -0
ultralytics/models/sam/__init__.py +2 -2
ultralytics/models/sam/model.py +1 -1
ultralytics/models/sam/modules/sam.py +16 -39
ultralytics/models/sam/predict.py +817 -28
ultralytics/models/yolo/classify/predict.py +1 -1
ultralytics/models/yolo/detect/train.py +1 -1
ultralytics/nn/modules/block.py +2 -2
ultralytics/nn/modules/conv.py +1 -1
ultralytics/nn/modules/head.py +1 -1
ultralytics/solutions/parking_management.py +1 -1
ultralytics/trackers/basetrack.py +1 -1
ultralytics/trackers/utils/matching.py +3 -4
ultralytics/utils/__init__.py +8 -6
ultralytics/utils/loss.py +2 -3
ultralytics/utils/metrics.py +12 -13
ultralytics/utils/ops.py +22 -17
ultralytics/utils/plotting.py +68 -24
ultralytics/utils/torch_utils.py +5 -11
{ultralytics-8.3.37.dist-info → ultralytics-8.3.39.dist-info}/METADATA +12 -8
{ultralytics-8.3.37.dist-info → ultralytics-8.3.39.dist-info}/RECORD +31 -31
{ultralytics-8.3.37.dist-info → ultralytics-8.3.39.dist-info}/LICENSE +0 -0
{ultralytics-8.3.37.dist-info → ultralytics-8.3.39.dist-info}/WHEEL +0 -0
{ultralytics-8.3.37.dist-info → ultralytics-8.3.39.dist-info}/entry_points.txt +0 -0
{ultralytics-8.3.37.dist-info → ultralytics-8.3.39.dist-info}/top_level.txt +0 -0

ultralytics/__init__.py CHANGED Viewed

@@ -1,6 +1,6 @@
 # Ultralytics YOLO 🚀, AGPL-3.0 license
-__version__ = "8.3.37"
+__version__ = "8.3.39"
 import os

ultralytics/cfg/__init__.py CHANGED Viewed

@@ -11,7 +11,6 @@ import cv2
 from ultralytics.utils import (
     ASSETS,
-    ASSETS_URL,
     DEFAULT_CFG,
     DEFAULT_CFG_DICT,
     DEFAULT_CFG_PATH,
@@ -160,7 +159,6 @@ CFG_FRACTION_KEYS = {  # fractional float arguments with 0.0<=values<=1.0
     "weight_decay",
     "warmup_momentum",
     "warmup_bias_lr",
-    "label_smoothing",
     "hsv_h",
     "hsv_s",
     "hsv_v",
@@ -436,6 +434,9 @@ def _handle_deprecation(custom):
         if key == "line_thickness":
             deprecation_warn(key, "line_width")
             custom["line_width"] = custom.pop("line_thickness")
+        if key == "label_smoothing":
+            deprecation_warn(key)
+            custom.pop("label_smoothing")
     return custom
@@ -738,9 +739,8 @@ def parse_key_value_pair(pair: str = "key=value"):
         pair (str): A string containing a key-value pair in the format "key=value".
     Returns:
-        (tuple): A tuple containing two elements:
-            - key (str): The parsed key.
-            - value (str): The parsed value.
+        key (str): The parsed key.
+        value (str): The parsed value.
     Raises:
         AssertionError: If the value is missing or empty.

ultralytics/cfg/default.yaml CHANGED Viewed

@@ -99,7 +99,6 @@ cls: 0.5 # (float) cls loss gain (scale with pixels)
 dfl: 1.5 # (float) dfl loss gain
 pose: 12.0 # (float) pose loss gain
 kobj: 1.0 # (float) keypoint obj loss gain
-label_smoothing: 0.0 # (float) label smoothing (fraction)
 nbs: 64 # (int) nominal batch size
 hsv_h: 0.015 # (float) image HSV-Hue augmentation (fraction)
 hsv_s: 0.7 # (float) image HSV-Saturation augmentation (fraction)

ultralytics/data/augment.py CHANGED Viewed

@@ -2111,10 +2111,9 @@ class Format:
             h (int): Height of the image.
         Returns:
-            (tuple): Tuple containing:
-                masks (numpy.ndarray): Bitmap masks with shape (N, H, W) or (1, H, W) if mask_overlap is True.
-                instances (Instances): Updated instances object with sorted segments if mask_overlap is True.
-                cls (numpy.ndarray): Updated class labels, sorted if mask_overlap is True.
+            masks (numpy.ndarray): Bitmap masks with shape (N, H, W) or (1, H, W) if mask_overlap is True.
+            instances (Instances): Updated instances object with sorted segments if mask_overlap is True.
+            cls (numpy.ndarray): Updated class labels, sorted if mask_overlap is True.
         Notes:
             - If self.mask_overlap is True, masks are overlapped and sorted by area.

ultralytics/data/loaders.py CHANGED Viewed

@@ -354,7 +354,7 @@ class LoadImagesAndVideos:
         self.nf = ni + nv  # number of files
         self.ni = ni  # number of images
         self.video_flag = [False] * ni + [True] * nv
-        self.mode = "image"
+        self.mode = "video" if ni == 0 else "image"  # default to video if no images
         self.vid_stride = vid_stride  # video frame-rate stride
         self.bs = batch
         if any(videos):

ultralytics/engine/exporter.py CHANGED Viewed

@@ -220,6 +220,7 @@ class Exporter:
             self.args.device = "0"
         if fmt == "engine" and "dla" in str(self.args.device):  # convert int/list to str first
             dla = self.args.device.split(":")[-1]
+            self.args.device = "0"  # update device to "0"
             assert dla in {"0", "1"}, f"Expected self.args.device='dla:0' or 'dla:1, but got {self.args.device}."
         self.device = select_device("cpu" if self.args.device is None else self.args.device)

ultralytics/engine/model.py CHANGED Viewed

@@ -144,6 +144,9 @@ class Model(nn.Module):
         else:
             self._load(model, task=task)
+        # Delete super().training for accessing self.model.training
+        del self.training
     def __call__(
         self,
         source: Union[str, Path, int, Image.Image, list, tuple, np.ndarray, torch.Tensor] = None,
@@ -1143,3 +1146,29 @@ class Model(nn.Module):
         """
         self.model.eval()
         return self
+    def __getattr__(self, name):
+        """
+        Enables accessing model attributes directly through the Model class.
+        This method provides a way to access attributes of the underlying model directly through the Model class
+        instance. It first checks if the requested attribute is 'model', in which case it returns the model from
+        the module dictionary. Otherwise, it delegates the attribute lookup to the underlying model.
+        Args:
+            name (str): The name of the attribute to retrieve.
+        Returns:
+            (Any): The requested attribute value.
+        Raises:
+            AttributeError: If the requested attribute does not exist in the model.
+        Examples:
+            >>> model = YOLO("yolo11n.pt")
+            >>> print(model.stride)
+            >>> print(model.task)
+        """
+        if name == "model":
+            return self._modules["model"]
+        return getattr(self.model, name)

ultralytics/models/sam/__init__.py CHANGED Viewed

@@ -1,6 +1,6 @@
 # Ultralytics YOLO 🚀, AGPL-3.0 license
 from .model import SAM
-from .predict import Predictor, SAM2Predictor
+from .predict import Predictor, SAM2Predictor, SAM2VideoPredictor
-__all__ = "SAM", "Predictor", "SAM2Predictor"  # tuple or list
+__all__ = "SAM", "Predictor", "SAM2Predictor", "SAM2VideoPredictor"  # tuple or list

ultralytics/models/sam/model.py CHANGED Viewed

@@ -148,7 +148,7 @@ class SAM(Model):
             verbose (bool): If True, prints the information to the console.
         Returns:
-            (Tuple): A tuple containing the model's information (string representations of the model).
+            (tuple): A tuple containing the model's information (string representations of the model).
         Examples:
             >>> sam = SAM("sam_b.pt")

ultralytics/models/sam/modules/sam.py CHANGED Viewed

@@ -36,8 +36,6 @@ class SAMModel(nn.Module):
         image_encoder (ImageEncoderViT): Backbone for encoding images into embeddings.
         prompt_encoder (PromptEncoder): Encoder for various types of input prompts.
         mask_decoder (MaskDecoder): Predicts object masks from image and prompt embeddings.
-        pixel_mean (torch.Tensor): Mean pixel values for image normalization, shape (3, 1, 1).
-        pixel_std (torch.Tensor): Standard deviation values for image normalization, shape (3, 1, 1).
     Methods:
         __init__: Initializes the SAMModel with encoders, decoder, and normalization parameters.
@@ -349,8 +347,7 @@ class SAM2Model(torch.nn.Module):
         self.sam_prompt_embed_dim = self.hidden_dim
         self.sam_image_embedding_size = self.image_size // self.backbone_stride
-        # build PromptEncoder and MaskDecoder from SAM
-        # (their hyperparameters like `mask_in_chans=16` are from SAM code)
+        # Build PromptEncoder and MaskDecoder from SAM (hyperparameters like `mask_in_chans=16` are from SAM code)
         self.sam_prompt_encoder = PromptEncoder(
             embed_dim=self.sam_prompt_embed_dim,
             image_embedding_size=(
@@ -425,8 +422,8 @@ class SAM2Model(torch.nn.Module):
                 low_res_multimasks: Tensor of shape (B, M, H*4, W*4) with SAM output mask logits.
                 high_res_multimasks: Tensor of shape (B, M, H*16, W*16) with upsampled mask logits.
                 ious: Tensor of shape (B, M) with estimated IoU for each output mask.
-                low_res_masks: Tensor of shape (B, 1, H*4, W*4) with best low-resolution mask.
-                high_res_masks: Tensor of shape (B, 1, H*16, W*16) with best high-resolution mask.
+                low_res_masks: Tensor of shape (B, 1, H*4, W*4) with the best low-resolution mask.
+                high_res_masks: Tensor of shape (B, 1, H*16, W*16) with the best high-resolution mask.
                 obj_ptr: Tensor of shape (B, C) with object pointer vector for the output mask.
                 object_score_logits: Tensor of shape (B,) with object score logits.
@@ -488,12 +485,7 @@ class SAM2Model(torch.nn.Module):
             boxes=None,
             masks=sam_mask_prompt,
         )
-        (
-            low_res_multimasks,
-            ious,
-            sam_output_tokens,
-            object_score_logits,
-        ) = self.sam_mask_decoder(
+        low_res_multimasks, ious, sam_output_tokens, object_score_logits = self.sam_mask_decoder(
             image_embeddings=backbone_features,
             image_pe=self.sam_prompt_encoder.get_dense_pe(),
             sparse_prompt_embeddings=sparse_embeddings,
@@ -505,13 +497,8 @@ class SAM2Model(torch.nn.Module):
         if self.pred_obj_scores:
             is_obj_appearing = object_score_logits > 0
-            # Mask used for spatial memories is always a *hard* choice between obj and no obj,
-            # consistent with the actual mask prediction
-            low_res_multimasks = torch.where(
-                is_obj_appearing[:, None, None],
-                low_res_multimasks,
-                NO_OBJ_SCORE,
-            )
+            # Spatial memory mask is a *hard* choice between obj and no obj, consistent with actual mask prediction
+            low_res_multimasks = torch.where(is_obj_appearing[:, None, None], low_res_multimasks, NO_OBJ_SCORE)
         # convert masks from possibly bfloat16 (or float16) to float32
         # (older PyTorch versions before 2.1 don't support `interpolate` on bf16)
@@ -617,7 +604,6 @@ class SAM2Model(torch.nn.Module):
     def _prepare_backbone_features(self, backbone_out):
         """Prepares and flattens visual features from the image backbone output for further processing."""
-        backbone_out = backbone_out.copy()
         assert len(backbone_out["backbone_fpn"]) == len(backbone_out["vision_pos_enc"])
         assert len(backbone_out["backbone_fpn"]) >= self.num_feature_levels
@@ -826,11 +812,7 @@ class SAM2Model(torch.nn.Module):
             mask_for_mem = mask_for_mem * self.sigmoid_scale_for_mem_enc
         if self.sigmoid_bias_for_mem_enc != 0.0:
             mask_for_mem = mask_for_mem + self.sigmoid_bias_for_mem_enc
-        maskmem_out = self.memory_encoder(
-            pix_feat,
-            mask_for_mem,
-            skip_mask_sigmoid=True,  # sigmoid already applied
-        )
+        maskmem_out = self.memory_encoder(pix_feat, mask_for_mem, skip_mask_sigmoid=True)  # sigmoid already applied
         maskmem_features = maskmem_out["vision_features"]
         maskmem_pos_enc = maskmem_out["vision_pos_enc"]
         # add a no-object embedding to the spatial memory to indicate that the frame
@@ -965,16 +947,7 @@ class SAM2Model(torch.nn.Module):
             track_in_reverse,
             prev_sam_mask_logits,
         )
-        (
-            _,
-            _,
-            _,
-            low_res_masks,
-            high_res_masks,
-            obj_ptr,
-            object_score_logits,
-        ) = sam_outputs
+        _, _, _, low_res_masks, high_res_masks, obj_ptr, object_score_logits = sam_outputs
         current_out["pred_masks"] = low_res_masks
         current_out["pred_masks_high_res"] = high_res_masks
@@ -984,8 +957,7 @@ class SAM2Model(torch.nn.Module):
             # it's mainly used in the demo to encode spatial memories w/ consolidated masks)
             current_out["object_score_logits"] = object_score_logits
-        # Finally run the memory encoder on the predicted mask to encode
-        # it into a new memory feature (that can be used in future frames)
+        # Run memory encoder on the predicted mask to encode it into a new memory feature (for use in future frames)
         self._encode_memory_in_output(
             current_vision_feats,
             feat_sizes,
@@ -1007,8 +979,9 @@ class SAM2Model(torch.nn.Module):
             and (self.multimask_min_pt_num <= num_pts <= self.multimask_max_pt_num)
         )
-    def _apply_non_overlapping_constraints(self, pred_masks):
-        """Applies non-overlapping constraints to masks, keeping highest scoring object per location."""
+    @staticmethod
+    def _apply_non_overlapping_constraints(pred_masks):
+        """Applies non-overlapping constraints to masks, keeping the highest scoring object per location."""
         batch_size = pred_masks.size(0)
         if batch_size == 1:
             return pred_masks
@@ -1024,6 +997,10 @@ class SAM2Model(torch.nn.Module):
         pred_masks = torch.where(keep, pred_masks, torch.clamp(pred_masks, max=-10.0))
         return pred_masks
+    def set_binarize(self, binarize=False):
+        """Set binarize for VideoPredictor."""
+        self.binarize_mask_from_pts_for_mem_enc = binarize
     def set_imgsz(self, imgsz):
         """
         Set image size to make model compatible with different image sizes.

ultralytics 8.3.37__py3-none-any.whl → 8.3.39__py3-none-any.whl

ultralytics 8.3.37py3-none-any.whl → 8.3.39py3-none-any.whl