PyPI - ultralytics - Versions diffs - 8.3.163__py3-none-any.whl → 8.3.164__py3-none-any.whl - Mend

ultralytics 8.3.163py3-none-any.whl → 8.3.164py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (35) hide show

ultralytics/__init__.py +1 -1
ultralytics/data/augment.py +182 -153
ultralytics/data/build.py +23 -3
ultralytics/data/dataset.py +6 -2
ultralytics/data/loaders.py +2 -2
ultralytics/data/utils.py +9 -7
ultralytics/engine/exporter.py +7 -3
ultralytics/engine/results.py +42 -42
ultralytics/models/fastsam/model.py +1 -1
ultralytics/models/fastsam/predict.py +1 -1
ultralytics/models/sam/model.py +4 -4
ultralytics/models/sam/modules/blocks.py +5 -5
ultralytics/models/sam/modules/memory_attention.py +19 -19
ultralytics/models/sam/modules/transformer.py +24 -22
ultralytics/models/yolo/detect/val.py +2 -2
ultralytics/models/yolo/world/train_world.py +9 -1
ultralytics/solutions/distance_calculation.py +1 -1
ultralytics/solutions/instance_segmentation.py +2 -2
ultralytics/solutions/object_blurrer.py +2 -2
ultralytics/solutions/object_counter.py +2 -2
ultralytics/solutions/object_cropper.py +1 -1
ultralytics/solutions/queue_management.py +1 -1
ultralytics/solutions/security_alarm.py +2 -2
ultralytics/solutions/templates/similarity-search.html +0 -24
ultralytics/solutions/vision_eye.py +1 -1
ultralytics/utils/benchmarks.py +2 -2
ultralytics/utils/export.py +0 -2
ultralytics/utils/instance.py +32 -25
ultralytics/utils/ops.py +8 -8
{ultralytics-8.3.163.dist-info → ultralytics-8.3.164.dist-info}/METADATA +1 -1
{ultralytics-8.3.163.dist-info → ultralytics-8.3.164.dist-info}/RECORD +35 -35
{ultralytics-8.3.163.dist-info → ultralytics-8.3.164.dist-info}/WHEEL +0 -0
{ultralytics-8.3.163.dist-info → ultralytics-8.3.164.dist-info}/entry_points.txt +0 -0
{ultralytics-8.3.163.dist-info → ultralytics-8.3.164.dist-info}/licenses/LICENSE +0 -0
{ultralytics-8.3.163.dist-info → ultralytics-8.3.164.dist-info}/top_level.txt +0 -0

ultralytics/data/build.py CHANGED Viewed

@@ -3,13 +3,14 @@
 import os
 import random
 from pathlib import Path
-from typing import Any, Iterator
+from typing import Any, Dict, Iterator
 import numpy as np
 import torch
 from PIL import Image
 from torch.utils.data import dataloader, distributed
+from ultralytics.cfg import IterableSimpleNamespace
 from ultralytics.data.dataset import GroundingDataset, YOLODataset, YOLOMultiModalDataset
 from ultralytics.data.loaders import (
     LOADERS,
@@ -111,7 +112,16 @@ def seed_worker(worker_id: int):  # noqa
     random.seed(worker_seed)
-def build_yolo_dataset(cfg, img_path, batch, data, mode="train", rect=False, stride=32, multi_modal=False):
+def build_yolo_dataset(
+    cfg: IterableSimpleNamespace,
+    img_path: str,
+    batch: int,
+    data: Dict[str, Any],
+    mode: str = "train",
+    rect: bool = False,
+    stride: int = 32,
+    multi_modal: bool = False,
+):
     """Build and return a YOLO dataset based on configuration parameters."""
     dataset = YOLOMultiModalDataset if multi_modal else YOLODataset
     return dataset(
@@ -133,11 +143,21 @@ def build_yolo_dataset(cfg, img_path, batch, data, mode="train", rect=False, str
     )
-def build_grounding(cfg, img_path, json_file, batch, mode="train", rect=False, stride=32):
+def build_grounding(
+    cfg: IterableSimpleNamespace,
+    img_path: str,
+    json_file: str,
+    batch: int,
+    mode: str = "train",
+    rect: bool = False,
+    stride: int = 32,
+    max_samples: int = 80,
+):
     """Build and return a GroundingDataset based on configuration parameters."""
     return GroundingDataset(
         img_path=img_path,
         json_file=json_file,
+        max_samples=max_samples,
         imgsz=cfg.imgsz,
         batch_size=batch,
         augment=mode == "train",  # augmentation

ultralytics/data/dataset.py CHANGED Viewed

@@ -411,6 +411,7 @@ class YOLOMultiModalDataset(YOLODataset):
     @staticmethod
     def _get_neg_texts(category_freq: Dict, threshold: int = 100) -> List[str]:
         """Get negative text samples based on frequency threshold."""
+        threshold = min(max(category_freq.values()), 100)
         return [k for k, v in category_freq.items() if v >= threshold]
@@ -434,18 +435,20 @@ class GroundingDataset(YOLODataset):
         >>> len(dataset)  # Number of valid images with annotations
     """
-    def __init__(self, *args, task: str = "detect", json_file: str = "", **kwargs):
+    def __init__(self, *args, task: str = "detect", json_file: str = "", max_samples: int = 80, **kwargs):
         """
         Initialize a GroundingDataset for object detection.
         Args:
             json_file (str): Path to the JSON file containing annotations.
             task (str): Must be 'detect' or 'segment' for GroundingDataset.
+            max_samples (int): Maximum number of samples to load for text augmentation.
             *args (Any): Additional positional arguments for the parent class.
             **kwargs (Any): Additional keyword arguments for the parent class.
         """
         assert task in {"detect", "segment"}, "GroundingDataset currently only supports `detect` and `segment` tasks"
         self.json_file = json_file
+        self.max_samples = max_samples
         super().__init__(*args, task=task, data={"channels": 3}, **kwargs)
     def get_img_files(self, img_path: str) -> List:
@@ -625,7 +628,7 @@ class GroundingDataset(YOLODataset):
             # the strategy of selecting negative is restricted in one dataset,
             # while official pre-saved neg embeddings from all datasets at once.
             transform = RandomLoadText(
-                max_samples=80,
+                max_samples=min(self.max_samples, 80),
                 padding=True,
                 padding_value=self._get_neg_texts(self.category_freq),
             )
@@ -651,6 +654,7 @@ class GroundingDataset(YOLODataset):
     @staticmethod
     def _get_neg_texts(category_freq: Dict, threshold: int = 100) -> List[str]:
         """Get negative text samples based on frequency threshold."""
+        threshold = min(max(category_freq.values()), 100)
         return [k for k, v in category_freq.items() if v >= threshold]

ultralytics/data/loaders.py CHANGED Viewed

@@ -451,9 +451,9 @@ class LoadImagesAndVideos:
                 self.mode = "image"
                 if path.rpartition(".")[-1].lower() == "heic":
                     # Load HEIC image using Pillow with pillow-heif
-                    check_requirements("pillow-heif")
+                    check_requirements("pi-heif")
-                    from pillow_heif import register_heif_opener
+                    from pi_heif import register_heif_opener
                     register_heif_opener()  # Register HEIF opener with Pillow
                     with Image.open(path) as img:

ultralytics/data/utils.py CHANGED Viewed

@@ -9,7 +9,7 @@ import zipfile
 from multiprocessing.pool import ThreadPool
 from pathlib import Path
 from tarfile import is_tarfile
-from typing import Dict, List, Tuple, Union
+from typing import Any, Dict, List, Tuple, Union
 import cv2
 import numpy as np
@@ -284,7 +284,7 @@ def visualize_image_annotations(image_path: str, txt_path: str, label_map: Dict[
             w = width * img_width
             h = height * img_height
             annotations.append((x, y, w, h, int(class_id)))
-    fig, ax = plt.subplots(1)  # Plot the image and annotations
+    _, ax = plt.subplots(1)  # Plot the image and annotations
     for x, y, w, h, label in annotations:
         color = tuple(c / 255 for c in colors(label, True))  # Get and normalize the RGB color
         rect = plt.Rectangle((x, y), w, h, linewidth=2, edgecolor=color, facecolor="none")  # Create a rectangle
@@ -384,7 +384,7 @@ def find_dataset_yaml(path: Path) -> Path:
     return files[0]
-def check_det_dataset(dataset: str, autodownload: bool = True) -> Dict:
+def check_det_dataset(dataset: str, autodownload: bool = True) -> Dict[str, Any]:
     """
     Download, verify, and/or unzip a dataset if not found locally.
@@ -397,7 +397,7 @@ def check_det_dataset(dataset: str, autodownload: bool = True) -> Dict:
         autodownload (bool, optional): Whether to automatically download the dataset if not found.
     Returns:
-        (Dict): Parsed dataset information and paths.
+        (Dict[str, Any]): Parsed dataset information and paths.
     """
     file = check_file(dataset)
@@ -479,7 +479,7 @@ def check_det_dataset(dataset: str, autodownload: bool = True) -> Dict:
     return data  # dictionary
-def check_cls_dataset(dataset: Union[str, Path], split: str = "") -> Dict:
+def check_cls_dataset(dataset: Union[str, Path], split: str = "") -> Dict[str, Any]:
     """
     Check a classification dataset such as Imagenet.
@@ -491,13 +491,13 @@ def check_cls_dataset(dataset: Union[str, Path], split: str = "") -> Dict:
         split (str, optional): The split of the dataset. Either 'val', 'test', or ''.
     Returns:
-        (Dict): A dictionary containing the following keys:
+        (Dict[str, Any]): A dictionary containing the following keys:
             - 'train' (Path): The directory path containing the training set of the dataset.
             - 'val' (Path): The directory path containing the validation set of the dataset.
             - 'test' (Path): The directory path containing the test set of the dataset.
             - 'nc' (int): The number of classes in the dataset.
-            - 'names' (Dict): A dictionary of class names in the dataset.
+            - 'names' (Dict[int, str]): A dictionary of class names in the dataset.
     """
     # Download (optional if dataset=https://file.zip is passed directly)
     if str(dataset).startswith(("http:/", "https:/")):
@@ -535,6 +535,8 @@ def check_cls_dataset(dataset: Union[str, Path], split: str = "") -> Dict:
         if (data_dir / "val").exists()
         else data_dir / "validation"
         if (data_dir / "validation").exists()
+        else data_dir / "valid"
+        if (data_dir / "valid").exists()
         else None
     )  # data/test or data/val
     test_set = data_dir / "test" if (data_dir / "test").exists() else None  # data/val or data/test

ultralytics/engine/exporter.py CHANGED Viewed

@@ -294,10 +294,10 @@ class Exporter:
         # Device
         dla = None
-        if fmt == "engine" and self.args.device is None:
+        if engine and self.args.device is None:
             LOGGER.warning("TensorRT requires GPU export, automatically assigning device=0")
             self.args.device = "0"
-        if fmt == "engine" and "dla" in str(self.args.device):  # convert int/list to str first
+        if engine and "dla" in str(self.args.device):  # convert int/list to str first
             dla = self.args.device.rsplit(":", 1)[-1]
             self.args.device = "0"  # update device to "0"
             assert dla in {"0", "1"}, f"Expected self.args.device='dla:0' or 'dla:1, but got {self.args.device}."
@@ -348,6 +348,10 @@ class Exporter:
                 LOGGER.warning("'nms=True' is not available for end2end models. Forcing 'nms=False'.")
                 self.args.nms = False
             self.args.conf = self.args.conf or 0.25  # set conf default value for nms export
+        if (engine or self.args.nms) and self.args.dynamic and self.args.batch == 1:
+            LOGGER.warning(
+                f"'dynamic=True' model with '{'nms=True' if self.args.nms else 'format=engine'}' requires max batch size, i.e. 'batch=16'"
+            )
         if edgetpu:
             if not LINUX or ARM64:
                 raise SystemError(
@@ -516,7 +520,7 @@ class Exporter:
                 f"work. Use export 'imgsz={max(self.imgsz)}' if val is required."
             )
             imgsz = self.imgsz[0] if square else str(self.imgsz)[1:-1].replace(" ", "")
-            predict_data = f"data={data}" if model.task == "segment" and fmt == "pb" else ""
+            predict_data = f"data={data}" if model.task == "segment" and pb else ""
             q = "int8" if self.args.int8 else "half" if self.args.half else ""  # quantization
             LOGGER.info(
                 f"\nExport complete ({time.time() - t:.1f}s)"

ultralytics/engine/results.py CHANGED Viewed

@@ -196,7 +196,7 @@ class Results(SimpleClass, DataExportMixin):
     It supports visualization, data export, and various coordinate transformations.
     Attributes:
-        orig_img (numpy.ndarray): The original image as a numpy array.
+        orig_img (np.ndarray): The original image as a numpy array.
         orig_shape (Tuple[int, int]): Original image shape in (height, width) format.
         boxes (Boxes | None): Detected bounding boxes.
         masks (Masks | None): Segmentation masks.
@@ -254,7 +254,7 @@ class Results(SimpleClass, DataExportMixin):
         Initialize the Results class for storing and manipulating inference results.
         Args:
-            orig_img (numpy.ndarray): The original image as a numpy array.
+            orig_img (np.ndarray): The original image as a numpy array.
             path (str): The path to the image file.
             names (dict): A dictionary of class names.
             boxes (torch.Tensor | None): A 2D tensor of bounding box coordinates for each detection.
@@ -862,16 +862,16 @@ class Boxes(BaseTensor):
     methods for easy manipulation and conversion between different coordinate systems.
     Attributes:
-        data (torch.Tensor | numpy.ndarray): The raw tensor containing detection boxes and associated data.
+        data (torch.Tensor | np.ndarray): The raw tensor containing detection boxes and associated data.
         orig_shape (Tuple[int, int]): The original image dimensions (height, width).
         is_track (bool): Indicates whether tracking IDs are included in the box data.
-        xyxy (torch.Tensor | numpy.ndarray): Boxes in [x1, y1, x2, y2] format.
-        conf (torch.Tensor | numpy.ndarray): Confidence scores for each box.
-        cls (torch.Tensor | numpy.ndarray): Class labels for each box.
+        xyxy (torch.Tensor | np.ndarray): Boxes in [x1, y1, x2, y2] format.
+        conf (torch.Tensor | np.ndarray): Confidence scores for each box.
+        cls (torch.Tensor | np.ndarray): Class labels for each box.
         id (torch.Tensor | None): Tracking IDs for each box (if available).
-        xywh (torch.Tensor | numpy.ndarray): Boxes in [x, y, width, height] format.
-        xyxyn (torch.Tensor | numpy.ndarray): Normalized [x1, y1, x2, y2] boxes relative to orig_shape.
-        xywhn (torch.Tensor | numpy.ndarray): Normalized [x, y, width, height] boxes relative to orig_shape.
+        xywh (torch.Tensor | np.ndarray): Boxes in [x, y, width, height] format.
+        xyxyn (torch.Tensor | np.ndarray): Normalized [x1, y1, x2, y2] boxes relative to orig_shape.
+        xywhn (torch.Tensor | np.ndarray): Normalized [x, y, width, height] boxes relative to orig_shape.
     Methods:
         cpu: Return a copy of the object with all tensors on CPU memory.
@@ -931,7 +931,7 @@ class Boxes(BaseTensor):
         Return bounding boxes in [x1, y1, x2, y2] format.
         Returns:
-            (torch.Tensor | numpy.ndarray): A tensor or numpy array of shape (n, 4) containing bounding box
+            (torch.Tensor | np.ndarray): A tensor or numpy array of shape (n, 4) containing bounding box
                 coordinates in [x1, y1, x2, y2] format, where n is the number of boxes.
         Examples:
@@ -948,7 +948,7 @@ class Boxes(BaseTensor):
         Return the confidence scores for each detection box.
         Returns:
-            (torch.Tensor | numpy.ndarray): A 1D tensor or array containing confidence scores for each detection,
+            (torch.Tensor | np.ndarray): A 1D tensor or array containing confidence scores for each detection,
                 with shape (N,) where N is the number of detections.
         Examples:
@@ -965,7 +965,7 @@ class Boxes(BaseTensor):
         Return the class ID tensor representing category predictions for each bounding box.
         Returns:
-            (torch.Tensor | numpy.ndarray): A tensor or numpy array containing the class IDs for each detection box.
+            (torch.Tensor | np.ndarray): A tensor or numpy array containing the class IDs for each detection box.
                 The shape is (N,), where N is the number of boxes.
         Examples:
@@ -1008,7 +1008,7 @@ class Boxes(BaseTensor):
         Convert bounding boxes from [x1, y1, x2, y2] format to [x, y, width, height] format.
         Returns:
-            (torch.Tensor | numpy.ndarray): Boxes in [x_center, y_center, width, height] format, where x_center,
+            (torch.Tensor | np.ndarray): Boxes in [x_center, y_center, width, height] format, where x_center,
                 y_center are the coordinates of the center point of the bounding box, width, height are the
                 dimensions of the bounding box and the shape of the returned tensor is (N, 4), where N is the
                 number of boxes.
@@ -1032,7 +1032,7 @@ class Boxes(BaseTensor):
         normalized to the range [0, 1] based on the original image dimensions.
         Returns:
-            (torch.Tensor | numpy.ndarray): Normalized bounding box coordinates with shape (N, 4), where N is
+            (torch.Tensor | np.ndarray): Normalized bounding box coordinates with shape (N, 4), where N is
                 the number of boxes. Each row contains [x1, y1, x2, y2] values normalized to [0, 1].
         Examples:
@@ -1056,7 +1056,7 @@ class Boxes(BaseTensor):
         [x_center, y_center, width, height], where all values are relative to the original image dimensions.
         Returns:
-            (torch.Tensor | numpy.ndarray): Normalized bounding boxes with shape (N, 4), where N is the
+            (torch.Tensor | np.ndarray): Normalized bounding boxes with shape (N, 4), where N is the
                 number of boxes. Each row contains [x_center, y_center, width, height] values normalized
                 to [0, 1] based on the original image dimensions.
@@ -1080,10 +1080,10 @@ class Masks(BaseTensor):
     including methods for converting between pixel and normalized coordinates.
     Attributes:
-        data (torch.Tensor | numpy.ndarray): The raw tensor or array containing mask data.
+        data (torch.Tensor | np.ndarray): The raw tensor or array containing mask data.
         orig_shape (tuple): Original image shape in (height, width) format.
-        xy (List[numpy.ndarray]): A list of segments in pixel coordinates.
-        xyn (List[numpy.ndarray]): A list of normalized segments.
+        xy (List[np.ndarray]): A list of segments in pixel coordinates.
+        xyn (List[np.ndarray]): A list of normalized segments.
     Methods:
         cpu: Return a copy of the Masks object with the mask tensor on CPU memory.
@@ -1128,7 +1128,7 @@ class Masks(BaseTensor):
         are normalized relative to the original image shape.
         Returns:
-            (List[numpy.ndarray]): A list of numpy arrays, where each array contains the normalized xy-coordinates
+            (List[np.ndarray]): A list of numpy arrays, where each array contains the normalized xy-coordinates
                 of a single segmentation mask. Each array has shape (N, 2), where N is the number of points in the
                 mask contour.
@@ -1153,7 +1153,7 @@ class Masks(BaseTensor):
         Masks object. The coordinates are scaled to match the original image dimensions.
         Returns:
-            (List[numpy.ndarray]): A list of numpy arrays, where each array contains the [x, y] pixel
+            (List[np.ndarray]): A list of numpy arrays, where each array contains the [x, y] pixel
                 coordinates for a single segmentation mask. Each array has shape (N, 2), where N is the
                 number of points in the segment.
@@ -1257,7 +1257,7 @@ class Keypoints(BaseTensor):
         Return normalized coordinates (x, y) of keypoints relative to the original image size.
         Returns:
-            (torch.Tensor | numpy.ndarray): A tensor or array of shape (N, K, 2) containing normalized keypoint
+            (torch.Tensor | np.ndarray): A tensor or array of shape (N, K, 2) containing normalized keypoint
                 coordinates, where N is the number of instances, K is the number of keypoints, and the last
                 dimension contains [x, y] values in the range [0, 1].
@@ -1299,12 +1299,12 @@ class Probs(BaseTensor):
     classification probabilities, including top-1 and top-5 predictions.
     Attributes:
-        data (torch.Tensor | numpy.ndarray): The raw tensor or array containing classification probabilities.
+        data (torch.Tensor | np.ndarray): The raw tensor or array containing classification probabilities.
         orig_shape (tuple | None): The original image shape as (height, width). Not used in this class.
         top1 (int): Index of the class with the highest probability.
         top5 (List[int]): Indices of the top 5 classes by probability.
-        top1conf (torch.Tensor | numpy.ndarray): Confidence score of the top 1 class.
-        top5conf (torch.Tensor | numpy.ndarray): Confidence scores of the top 5 classes.
+        top1conf (torch.Tensor | np.ndarray): Confidence score of the top 1 class.
+        top5conf (torch.Tensor | np.ndarray): Confidence scores of the top 5 classes.
     Methods:
         cpu: Return a copy of the probabilities tensor on CPU memory.
@@ -1399,7 +1399,7 @@ class Probs(BaseTensor):
         from the classification results.
         Returns:
-            (torch.Tensor | numpy.ndarray): A tensor containing the confidence score of the top 1 class.
+            (torch.Tensor | np.ndarray): A tensor containing the confidence score of the top 1 class.
         Examples:
             >>> results = model("image.jpg")  # classify an image
@@ -1420,7 +1420,7 @@ class Probs(BaseTensor):
         along with their associated confidence levels.
         Returns:
-            (torch.Tensor | numpy.ndarray): A tensor or array containing the confidence scores for the
+            (torch.Tensor | np.ndarray): A tensor or array containing the confidence scores for the
                 top 5 predicted classes, sorted in descending order of probability.
         Examples:
@@ -1444,13 +1444,13 @@ class OBB(BaseTensor):
         data (torch.Tensor): The raw OBB tensor containing box coordinates and associated data.
         orig_shape (tuple): Original image size as (height, width).
         is_track (bool): Indicates whether tracking IDs are included in the box data.
-        xywhr (torch.Tensor | numpy.ndarray): Boxes in [x_center, y_center, width, height, rotation] format.
-        conf (torch.Tensor | numpy.ndarray): Confidence scores for each box.
-        cls (torch.Tensor | numpy.ndarray): Class labels for each box.
-        id (torch.Tensor | numpy.ndarray): Tracking IDs for each box, if available.
-        xyxyxyxy (torch.Tensor | numpy.ndarray): Boxes in 8-point [x1, y1, x2, y2, x3, y3, x4, y4] format.
-        xyxyxyxyn (torch.Tensor | numpy.ndarray): Normalized 8-point coordinates relative to orig_shape.
-        xyxy (torch.Tensor | numpy.ndarray): Axis-aligned bounding boxes in [x1, y1, x2, y2] format.
+        xywhr (torch.Tensor | np.ndarray): Boxes in [x_center, y_center, width, height, rotation] format.
+        conf (torch.Tensor | np.ndarray): Confidence scores for each box.
+        cls (torch.Tensor | np.ndarray): Class labels for each box.
+        id (torch.Tensor | np.ndarray): Tracking IDs for each box, if available.
+        xyxyxyxy (torch.Tensor | np.ndarray): Boxes in 8-point [x1, y1, x2, y2, x3, y3, x4, y4] format.
+        xyxyxyxyn (torch.Tensor | np.ndarray): Normalized 8-point coordinates relative to orig_shape.
+        xyxy (torch.Tensor | np.ndarray): Axis-aligned bounding boxes in [x1, y1, x2, y2] format.
     Methods:
         cpu: Return a copy of the OBB object with all tensors on CPU memory.
@@ -1474,13 +1474,13 @@ class OBB(BaseTensor):
         various properties and methods to access and transform the OBB data.
         Args:
-            boxes (torch.Tensor | numpy.ndarray): A tensor or numpy array containing the detection boxes,
+            boxes (torch.Tensor | np.ndarray): A tensor or numpy array containing the detection boxes,
                 with shape (num_boxes, 7) or (num_boxes, 8). The last two columns contain confidence and class values.
                 If present, the third last column contains track IDs, and the fifth column contains rotation.
             orig_shape (Tuple[int, int]): Original image size, in the format (height, width).
         Attributes:
-            data (torch.Tensor | numpy.ndarray): The raw OBB tensor.
+            data (torch.Tensor | np.ndarray): The raw OBB tensor.
             orig_shape (Tuple[int, int]): The original image shape.
             is_track (bool): Whether the boxes include tracking IDs.
@@ -1508,7 +1508,7 @@ class OBB(BaseTensor):
         Return boxes in [x_center, y_center, width, height, rotation] format.
         Returns:
-            (torch.Tensor | numpy.ndarray): A tensor or numpy array containing the oriented bounding boxes with format
+            (torch.Tensor | np.ndarray): A tensor or numpy array containing the oriented bounding boxes with format
                 [x_center, y_center, width, height, rotation]. The shape is (N, 5) where N is the number of boxes.
         Examples:
@@ -1529,7 +1529,7 @@ class OBB(BaseTensor):
         represents the model's certainty in the detection.
         Returns:
-            (torch.Tensor | numpy.ndarray): A tensor or numpy array of shape (N,) containing confidence scores
+            (torch.Tensor | np.ndarray): A tensor or numpy array of shape (N,) containing confidence scores
                 for N detections, where each score is in the range [0, 1].
         Examples:
@@ -1546,7 +1546,7 @@ class OBB(BaseTensor):
         Return the class values of the oriented bounding boxes.
         Returns:
-            (torch.Tensor | numpy.ndarray): A tensor or numpy array containing the class values for each oriented
+            (torch.Tensor | np.ndarray): A tensor or numpy array containing the class values for each oriented
                 bounding box. The shape is (N,), where N is the number of boxes.
         Examples:
@@ -1564,7 +1564,7 @@ class OBB(BaseTensor):
         Return the tracking IDs of the oriented bounding boxes (if available).
         Returns:
-            (torch.Tensor | numpy.ndarray | None): A tensor or numpy array containing the tracking IDs for each
+            (torch.Tensor | np.ndarray | None): A tensor or numpy array containing the tracking IDs for each
                 oriented bounding box. Returns None if tracking IDs are not available.
         Examples:
@@ -1584,7 +1584,7 @@ class OBB(BaseTensor):
         Convert OBB format to 8-point (xyxyxyxy) coordinate format for rotated bounding boxes.
         Returns:
-            (torch.Tensor | numpy.ndarray): Rotated bounding boxes in xyxyxyxy format with shape (N, 4, 2), where N is
+            (torch.Tensor | np.ndarray): Rotated bounding boxes in xyxyxyxy format with shape (N, 4, 2), where N is
                 the number of boxes. Each box is represented by 4 points (x, y), starting from the top-left corner and
                 moving clockwise.
@@ -1603,7 +1603,7 @@ class OBB(BaseTensor):
         Convert rotated bounding boxes to normalized xyxyxyxy format.
         Returns:
-            (torch.Tensor | numpy.ndarray): Normalized rotated bounding boxes in xyxyxyxy format with shape (N, 4, 2),
+            (torch.Tensor | np.ndarray): Normalized rotated bounding boxes in xyxyxyxy format with shape (N, 4, 2),
                 where N is the number of boxes. Each box is represented by 4 points (x, y), normalized relative to
                 the original image dimensions.
@@ -1629,7 +1629,7 @@ class OBB(BaseTensor):
         as IoU calculation with non-rotated boxes.
         Returns:
-            (torch.Tensor | numpy.ndarray): Axis-aligned bounding boxes in xyxy format with shape (N, 4), where N
+            (torch.Tensor | np.ndarray): Axis-aligned bounding boxes in xyxy format with shape (N, 4), where N
                 is the number of boxes. Each row contains [x1, y1, x2, y2] coordinates.
         Examples:

ultralytics/models/fastsam/model.py CHANGED Viewed

@@ -58,7 +58,7 @@ class FastSAM(Model):
         prompts and passes them to the parent class predict method for processing.
         Args:
-            source (str | PIL.Image | numpy.ndarray): Input source for prediction, can be a file path, URL, PIL image,
+            source (str | PIL.Image | np.ndarray): Input source for prediction, can be a file path, URL, PIL image,
                 or numpy array.
             stream (bool): Whether to enable real-time streaming mode for video inputs.
             bboxes (List, optional): Bounding box coordinates for prompted segmentation in format [[x1, y1, x2, y2]].

ultralytics/models/fastsam/predict.py CHANGED Viewed

@@ -54,7 +54,7 @@ class FastSAMPredictor(SegmentationPredictor):
         Args:
             preds (List[torch.Tensor]): Raw predictions from the model.
             img (torch.Tensor): Input image tensor that was fed to the model.
-            orig_imgs (List[numpy.ndarray]): Original images before preprocessing.
+            orig_imgs (List[np.ndarray]): Original images before preprocessing.
         Returns:
             (List[Results]): Processed results with prompts applied.

ultralytics/models/sam/model.py CHANGED Viewed

@@ -87,8 +87,8 @@ class SAM(Model):
         Perform segmentation prediction on the given image or video source.
         Args:
-            source (str | PIL.Image | numpy.ndarray): Path to the image or video file, or a PIL.Image object, or
-                a numpy.ndarray object.
+            source (str | PIL.Image | np.ndarray): Path to the image or video file, or a PIL.Image object, or
+                a np.ndarray object.
             stream (bool): If True, enables real-time streaming.
             bboxes (List[List[float]] | None): List of bounding box coordinates for prompted segmentation.
             points (List[List[float]] | None): List of points for prompted segmentation.
@@ -117,8 +117,8 @@ class SAM(Model):
         for segmentation tasks.
         Args:
-            source (str | PIL.Image | numpy.ndarray | None): Path to the image or video file, or a PIL.Image
-                object, or a numpy.ndarray object.
+            source (str | PIL.Image | np.ndarray | None): Path to the image or video file, or a PIL.Image
+                object, or a np.ndarray object.
             stream (bool): If True, enables real-time streaming.
             bboxes (List[List[float]] | None): List of bounding box coordinates for prompted segmentation.
             points (List[List[float]] | None): List of points for prompted segmentation.

ultralytics/models/sam/modules/blocks.py CHANGED Viewed

@@ -411,7 +411,7 @@ class RoPEAttention(Attention):
     Attributes:
         compute_cis (Callable): Function to compute axial complex numbers for rotary encoding.
-        freqs_cis (Tensor): Precomputed frequency tensor for rotary encoding.
+        freqs_cis (torch.Tensor): Precomputed frequency tensor for rotary encoding.
         rope_k_repeat (bool): Flag to repeat query RoPE to match key length for cross-attention to memories.
     Methods:
@@ -443,7 +443,7 @@ class RoPEAttention(Attention):
         self.freqs_cis = freqs_cis
         self.rope_k_repeat = rope_k_repeat  # repeat q rope to match k length, needed for cross-attention to memories
-    def forward(self, q: Tensor, k: Tensor, v: Tensor, num_k_exclude_rope: int = 0) -> Tensor:
+    def forward(self, q: torch.Tensor, k: torch.Tensor, v: torch.Tensor, num_k_exclude_rope: int = 0) -> torch.Tensor:
         """Apply rotary position encoding and compute attention between query, key, and value tensors."""
         q = self.q_proj(q)
         k = self.k_proj(k)
@@ -744,7 +744,7 @@ class PositionEmbeddingSine(nn.Module):
         self.cache = {}
-    def _encode_xy(self, x: Tensor, y: Tensor) -> Tuple[Tensor, Tensor]:
+    def _encode_xy(self, x: torch.Tensor, y: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
         """Encode 2D positions using sine/cosine functions for transformer positional embeddings."""
         assert len(x) == len(y) and x.ndim == y.ndim == 1
         x_embed = x * self.scale
@@ -760,7 +760,7 @@ class PositionEmbeddingSine(nn.Module):
         return pos_x, pos_y
     @torch.no_grad()
-    def encode_boxes(self, x: Tensor, y: Tensor, w: Tensor, h: Tensor) -> Tensor:
+    def encode_boxes(self, x: torch.Tensor, y: torch.Tensor, w: torch.Tensor, h: torch.Tensor) -> torch.Tensor:
         """Encode box coordinates and dimensions into positional embeddings for detection."""
         pos_x, pos_y = self._encode_xy(x, y)
         return torch.cat((pos_y, pos_x, h[:, None], w[:, None]), dim=1)
@@ -768,7 +768,7 @@ class PositionEmbeddingSine(nn.Module):
     encode = encode_boxes  # Backwards compatibility
     @torch.no_grad()
-    def encode_points(self, x: Tensor, y: Tensor, labels: Tensor) -> Tensor:
+    def encode_points(self, x: torch.Tensor, y: torch.Tensor, labels: torch.Tensor) -> torch.Tensor:
         """Encode 2D points with sinusoidal embeddings and append labels."""
         (bx, nx), (by, ny), (bl, nl) = x.shape, y.shape, labels.shape
         assert bx == by and nx == ny and bx == bl and nx == nl

ultralytics 8.3.163__py3-none-any.whl → 8.3.164__py3-none-any.whl

ultralytics 8.3.163py3-none-any.whl → 8.3.164py3-none-any.whl