PyPI - ultralytics - Versions diffs - 8.3.36__py3-none-any.whl → 8.3.38__py3-none-any.whl - Mend

ultralytics 8.3.36py3-none-any.whl → 8.3.38py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (27) hide show

ultralytics/__init__.py +1 -1
ultralytics/cfg/__init__.py +7 -6
ultralytics/cfg/default.yaml +1 -2
ultralytics/data/augment.py +4 -5
ultralytics/data/loaders.py +1 -1
ultralytics/engine/exporter.py +5 -4
ultralytics/engine/model.py +17 -0
ultralytics/models/sam/__init__.py +2 -2
ultralytics/models/sam/model.py +1 -1
ultralytics/models/sam/modules/sam.py +16 -39
ultralytics/models/sam/predict.py +817 -28
ultralytics/nn/modules/block.py +2 -2
ultralytics/nn/modules/conv.py +1 -1
ultralytics/solutions/parking_management.py +1 -1
ultralytics/trackers/basetrack.py +1 -1
ultralytics/trackers/utils/matching.py +3 -4
ultralytics/utils/__init__.py +8 -6
ultralytics/utils/loss.py +2 -3
ultralytics/utils/metrics.py +12 -13
ultralytics/utils/ops.py +20 -14
ultralytics/utils/plotting.py +14 -14
{ultralytics-8.3.36.dist-info → ultralytics-8.3.38.dist-info}/METADATA +3 -3
{ultralytics-8.3.36.dist-info → ultralytics-8.3.38.dist-info}/RECORD +27 -27
{ultralytics-8.3.36.dist-info → ultralytics-8.3.38.dist-info}/LICENSE +0 -0
{ultralytics-8.3.36.dist-info → ultralytics-8.3.38.dist-info}/WHEEL +0 -0
{ultralytics-8.3.36.dist-info → ultralytics-8.3.38.dist-info}/entry_points.txt +0 -0
{ultralytics-8.3.36.dist-info → ultralytics-8.3.38.dist-info}/top_level.txt +0 -0

ultralytics/__init__.py CHANGED Viewed

@@ -1,6 +1,6 @@
 # Ultralytics YOLO 🚀, AGPL-3.0 license
-__version__ = "8.3.36"
+__version__ = "8.3.38"
 import os

ultralytics/cfg/__init__.py CHANGED Viewed

@@ -83,13 +83,13 @@ SOLUTIONS_HELP_MSG = f"""
                 See all ARGS at https://docs.ultralytics.com/usage/cfg or with 'yolo cfg'
     1. Call object counting solution
-        yolo solutions count source="path/to/video/file.mp4" region=[(20, 400), (1080, 404), (1080, 360), (20, 360)]
+        yolo solutions count source="path/to/video/file.mp4" region=[(20, 400), (1080, 400), (1080, 360), (20, 360)]
     2. Call heatmaps solution
         yolo solutions heatmap colormap=cv2.COLORMAP_PARAULA model=yolo11n.pt
     3. Call queue management solution
-        yolo solutions queue region=[(20, 400), (1080, 404), (1080, 360), (20, 360)] model=yolo11n.pt
+        yolo solutions queue region=[(20, 400), (1080, 400), (1080, 360), (20, 360)] model=yolo11n.pt
     4. Call workouts monitoring solution for push-ups
         yolo solutions workout model=yolo11n-pose.pt kpts=[6, 8, 10]
@@ -160,7 +160,6 @@ CFG_FRACTION_KEYS = {  # fractional float arguments with 0.0<=values<=1.0
     "weight_decay",
     "warmup_momentum",
     "warmup_bias_lr",
-    "label_smoothing",
     "hsv_h",
     "hsv_s",
     "hsv_v",
@@ -436,6 +435,9 @@ def _handle_deprecation(custom):
         if key == "line_thickness":
             deprecation_warn(key, "line_width")
             custom["line_width"] = custom.pop("line_thickness")
+        if key == "label_smoothing":
+            deprecation_warn(key)
+            custom.pop("label_smoothing")
     return custom
@@ -738,9 +740,8 @@ def parse_key_value_pair(pair: str = "key=value"):
         pair (str): A string containing a key-value pair in the format "key=value".
     Returns:
-        (tuple): A tuple containing two elements:
-            - key (str): The parsed key.
-            - value (str): The parsed value.
+        key (str): The parsed key.
+        value (str): The parsed value.
     Raises:
         AssertionError: If the value is missing or empty.

ultralytics/cfg/default.yaml CHANGED Viewed

@@ -83,7 +83,7 @@ int8: False # (bool) CoreML/TF INT8 quantization
 dynamic: False # (bool) ONNX/TF/TensorRT: dynamic axes
 simplify: True # (bool) ONNX: simplify model using `onnxslim`
 opset: # (int, optional) ONNX: opset version
-workspace: 4 # (int) TensorRT: workspace size (GB)
+workspace: None # (float, optional) TensorRT: workspace size (GiB), `None` will let TensorRT auto-allocate memory
 nms: False # (bool) CoreML: add NMS
 # Hyperparameters ------------------------------------------------------------------------------------------------------
@@ -99,7 +99,6 @@ cls: 0.5 # (float) cls loss gain (scale with pixels)
 dfl: 1.5 # (float) dfl loss gain
 pose: 12.0 # (float) pose loss gain
 kobj: 1.0 # (float) keypoint obj loss gain
-label_smoothing: 0.0 # (float) label smoothing (fraction)
 nbs: 64 # (int) nominal batch size
 hsv_h: 0.015 # (float) image HSV-Hue augmentation (fraction)
 hsv_s: 0.7 # (float) image HSV-Saturation augmentation (fraction)

ultralytics/data/augment.py CHANGED Viewed

@@ -1591,7 +1591,7 @@ class LetterBox:
             labels["ratio_pad"] = (labels["ratio_pad"], (left, top))  # for evaluation
         if len(labels):
-            labels = self._update_labels(labels, ratio, dw, dh)
+            labels = self._update_labels(labels, ratio, left, top)
             labels["img"] = img
             labels["resized_shape"] = new_shape
             return labels
@@ -2111,10 +2111,9 @@ class Format:
             h (int): Height of the image.
         Returns:
-            (tuple): Tuple containing:
-                masks (numpy.ndarray): Bitmap masks with shape (N, H, W) or (1, H, W) if mask_overlap is True.
-                instances (Instances): Updated instances object with sorted segments if mask_overlap is True.
-                cls (numpy.ndarray): Updated class labels, sorted if mask_overlap is True.
+            masks (numpy.ndarray): Bitmap masks with shape (N, H, W) or (1, H, W) if mask_overlap is True.
+            instances (Instances): Updated instances object with sorted segments if mask_overlap is True.
+            cls (numpy.ndarray): Updated class labels, sorted if mask_overlap is True.
         Notes:
             - If self.mask_overlap is True, masks are overlapped and sorted by area.

ultralytics/data/loaders.py CHANGED Viewed

@@ -354,7 +354,7 @@ class LoadImagesAndVideos:
         self.nf = ni + nv  # number of files
         self.ni = ni  # number of images
         self.video_flag = [False] * ni + [True] * nv
-        self.mode = "image"
+        self.mode = "video" if ni == 0 else "image"  # default to video if no images
         self.vid_stride = vid_stride  # video frame-rate stride
         self.bs = batch
         if any(videos):

ultralytics/engine/exporter.py CHANGED Viewed

@@ -220,6 +220,7 @@ class Exporter:
             self.args.device = "0"
         if fmt == "engine" and "dla" in str(self.args.device):  # convert int/list to str first
             dla = self.args.device.split(":")[-1]
+            self.args.device = "0"  # update device to "0"
             assert dla in {"0", "1"}, f"Expected self.args.device='dla:0' or 'dla:1, but got {self.args.device}."
         self.device = select_device("cpu" if self.args.device is None else self.args.device)
@@ -781,10 +782,10 @@ class Exporter:
         # Engine builder
         builder = trt.Builder(logger)
         config = builder.create_builder_config()
-        workspace = int(self.args.workspace * (1 << 30))
-        if is_trt10:
+        workspace = int(self.args.workspace * (1 << 30)) if self.args.workspace is not None else 0
+        if is_trt10 and workspace > 0:
             config.set_memory_pool_limit(trt.MemoryPoolType.WORKSPACE, workspace)
-        else:  # TensorRT versions 7, 8
+        elif workspace > 0 and not is_trt10:  # TensorRT versions 7, 8
             config.max_workspace_size = workspace
         flag = 1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
         network = builder.create_network(flag)
@@ -823,7 +824,7 @@ class Exporter:
                 LOGGER.warning(f"{prefix} WARNING ⚠️ 'dynamic=True' model requires max batch size, i.e. 'batch=16'")
             profile = builder.create_optimization_profile()
             min_shape = (1, shape[1], 32, 32)  # minimum input shape
-            max_shape = (*shape[:2], *(int(max(1, self.args.workspace) * d) for d in shape[2:]))  # max input shape
+            max_shape = (*shape[:2], *(int(max(1, workspace) * d) for d in shape[2:]))  # max input shape
             for inp in inputs:
                 profile.set_shape(inp.name, min=min_shape, opt=shape, max=max_shape)
             config.add_optimization_profile(profile)

ultralytics/engine/model.py CHANGED Viewed

@@ -1126,3 +1126,20 @@ class Model(nn.Module):
             description of the expected behavior and structure.
         """
         raise NotImplementedError("Please provide task map for your model!")
+    def eval(self):
+        """
+        Sets the model to evaluation mode.
+        This method changes the model's mode to evaluation, which affects layers like dropout and batch normalization
+        that behave differently during training and evaluation.
+        Returns:
+            (Model): The model instance with evaluation mode set.
+        Examples:
+            >> model = YOLO("yolo11n.pt")
+            >> model.eval()
+        """
+        self.model.eval()
+        return self

ultralytics/models/sam/__init__.py CHANGED Viewed

@@ -1,6 +1,6 @@
 # Ultralytics YOLO 🚀, AGPL-3.0 license
 from .model import SAM
-from .predict import Predictor, SAM2Predictor
+from .predict import Predictor, SAM2Predictor, SAM2VideoPredictor
-__all__ = "SAM", "Predictor", "SAM2Predictor"  # tuple or list
+__all__ = "SAM", "Predictor", "SAM2Predictor", "SAM2VideoPredictor"  # tuple or list

ultralytics/models/sam/model.py CHANGED Viewed

@@ -148,7 +148,7 @@ class SAM(Model):
             verbose (bool): If True, prints the information to the console.
         Returns:
-            (Tuple): A tuple containing the model's information (string representations of the model).
+            (tuple): A tuple containing the model's information (string representations of the model).
         Examples:
             >>> sam = SAM("sam_b.pt")

ultralytics/models/sam/modules/sam.py CHANGED Viewed

@@ -36,8 +36,6 @@ class SAMModel(nn.Module):
         image_encoder (ImageEncoderViT): Backbone for encoding images into embeddings.
         prompt_encoder (PromptEncoder): Encoder for various types of input prompts.
         mask_decoder (MaskDecoder): Predicts object masks from image and prompt embeddings.
-        pixel_mean (torch.Tensor): Mean pixel values for image normalization, shape (3, 1, 1).
-        pixel_std (torch.Tensor): Standard deviation values for image normalization, shape (3, 1, 1).
     Methods:
         __init__: Initializes the SAMModel with encoders, decoder, and normalization parameters.
@@ -349,8 +347,7 @@ class SAM2Model(torch.nn.Module):
         self.sam_prompt_embed_dim = self.hidden_dim
         self.sam_image_embedding_size = self.image_size // self.backbone_stride
-        # build PromptEncoder and MaskDecoder from SAM
-        # (their hyperparameters like `mask_in_chans=16` are from SAM code)
+        # Build PromptEncoder and MaskDecoder from SAM (hyperparameters like `mask_in_chans=16` are from SAM code)
         self.sam_prompt_encoder = PromptEncoder(
             embed_dim=self.sam_prompt_embed_dim,
             image_embedding_size=(
@@ -425,8 +422,8 @@ class SAM2Model(torch.nn.Module):
                 low_res_multimasks: Tensor of shape (B, M, H*4, W*4) with SAM output mask logits.
                 high_res_multimasks: Tensor of shape (B, M, H*16, W*16) with upsampled mask logits.
                 ious: Tensor of shape (B, M) with estimated IoU for each output mask.
-                low_res_masks: Tensor of shape (B, 1, H*4, W*4) with best low-resolution mask.
-                high_res_masks: Tensor of shape (B, 1, H*16, W*16) with best high-resolution mask.
+                low_res_masks: Tensor of shape (B, 1, H*4, W*4) with the best low-resolution mask.
+                high_res_masks: Tensor of shape (B, 1, H*16, W*16) with the best high-resolution mask.
                 obj_ptr: Tensor of shape (B, C) with object pointer vector for the output mask.
                 object_score_logits: Tensor of shape (B,) with object score logits.
@@ -488,12 +485,7 @@ class SAM2Model(torch.nn.Module):
             boxes=None,
             masks=sam_mask_prompt,
         )
-        (
-            low_res_multimasks,
-            ious,
-            sam_output_tokens,
-            object_score_logits,
-        ) = self.sam_mask_decoder(
+        low_res_multimasks, ious, sam_output_tokens, object_score_logits = self.sam_mask_decoder(
             image_embeddings=backbone_features,
             image_pe=self.sam_prompt_encoder.get_dense_pe(),
             sparse_prompt_embeddings=sparse_embeddings,
@@ -505,13 +497,8 @@ class SAM2Model(torch.nn.Module):
         if self.pred_obj_scores:
             is_obj_appearing = object_score_logits > 0
-            # Mask used for spatial memories is always a *hard* choice between obj and no obj,
-            # consistent with the actual mask prediction
-            low_res_multimasks = torch.where(
-                is_obj_appearing[:, None, None],
-                low_res_multimasks,
-                NO_OBJ_SCORE,
-            )
+            # Spatial memory mask is a *hard* choice between obj and no obj, consistent with actual mask prediction
+            low_res_multimasks = torch.where(is_obj_appearing[:, None, None], low_res_multimasks, NO_OBJ_SCORE)
         # convert masks from possibly bfloat16 (or float16) to float32
         # (older PyTorch versions before 2.1 don't support `interpolate` on bf16)
@@ -617,7 +604,6 @@ class SAM2Model(torch.nn.Module):
     def _prepare_backbone_features(self, backbone_out):
         """Prepares and flattens visual features from the image backbone output for further processing."""
-        backbone_out = backbone_out.copy()
         assert len(backbone_out["backbone_fpn"]) == len(backbone_out["vision_pos_enc"])
         assert len(backbone_out["backbone_fpn"]) >= self.num_feature_levels
@@ -826,11 +812,7 @@ class SAM2Model(torch.nn.Module):
             mask_for_mem = mask_for_mem * self.sigmoid_scale_for_mem_enc
         if self.sigmoid_bias_for_mem_enc != 0.0:
             mask_for_mem = mask_for_mem + self.sigmoid_bias_for_mem_enc
-        maskmem_out = self.memory_encoder(
-            pix_feat,
-            mask_for_mem,
-            skip_mask_sigmoid=True,  # sigmoid already applied
-        )
+        maskmem_out = self.memory_encoder(pix_feat, mask_for_mem, skip_mask_sigmoid=True)  # sigmoid already applied
         maskmem_features = maskmem_out["vision_features"]
         maskmem_pos_enc = maskmem_out["vision_pos_enc"]
         # add a no-object embedding to the spatial memory to indicate that the frame
@@ -965,16 +947,7 @@ class SAM2Model(torch.nn.Module):
             track_in_reverse,
             prev_sam_mask_logits,
         )
-        (
-            _,
-            _,
-            _,
-            low_res_masks,
-            high_res_masks,
-            obj_ptr,
-            object_score_logits,
-        ) = sam_outputs
+        _, _, _, low_res_masks, high_res_masks, obj_ptr, object_score_logits = sam_outputs
         current_out["pred_masks"] = low_res_masks
         current_out["pred_masks_high_res"] = high_res_masks
@@ -984,8 +957,7 @@ class SAM2Model(torch.nn.Module):
             # it's mainly used in the demo to encode spatial memories w/ consolidated masks)
             current_out["object_score_logits"] = object_score_logits
-        # Finally run the memory encoder on the predicted mask to encode
-        # it into a new memory feature (that can be used in future frames)
+        # Run memory encoder on the predicted mask to encode it into a new memory feature (for use in future frames)
         self._encode_memory_in_output(
             current_vision_feats,
             feat_sizes,
@@ -1007,8 +979,9 @@ class SAM2Model(torch.nn.Module):
             and (self.multimask_min_pt_num <= num_pts <= self.multimask_max_pt_num)
         )
-    def _apply_non_overlapping_constraints(self, pred_masks):
-        """Applies non-overlapping constraints to masks, keeping highest scoring object per location."""
+    @staticmethod
+    def _apply_non_overlapping_constraints(pred_masks):
+        """Applies non-overlapping constraints to masks, keeping the highest scoring object per location."""
         batch_size = pred_masks.size(0)
         if batch_size == 1:
             return pred_masks
@@ -1024,6 +997,10 @@ class SAM2Model(torch.nn.Module):
         pred_masks = torch.where(keep, pred_masks, torch.clamp(pred_masks, max=-10.0))
         return pred_masks
+    def set_binarize(self, binarize=False):
+        """Set binarize for VideoPredictor."""
+        self.binarize_mask_from_pts_for_mem_enc = binarize
     def set_imgsz(self, imgsz):
         """
         Set image size to make model compatible with different image sizes.

ultralytics 8.3.36__py3-none-any.whl → 8.3.38__py3-none-any.whl

ultralytics 8.3.36py3-none-any.whl → 8.3.38py3-none-any.whl