PyPI - ultralytics - Versions diffs - 8.3.55__tar.gz → 8.3.57__tar.gz - Mend

ultralytics 8.3.55tar.gz → 8.3.57tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (245) hide show

{ultralytics-8.3.55/ultralytics.egg-info → ultralytics-8.3.57}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: ultralytics
-Version: 8.3.55
+Version: 8.3.57
 Summary: Ultralytics YOLO 🚀 for SOTA object detection, multi-object tracking, instance segmentation, pose estimation and image classification.
 Author-email: Glenn Jocher <glenn.jocher@ultralytics.com>, Jing Qiu <jing.qiu@ultralytics.com>
 Maintainer-email: Ultralytics <hello@ultralytics.com>
@@ -57,7 +57,6 @@ Requires-Dist: coverage[toml]; extra == "dev"
 Requires-Dist: mkdocs>=1.6.0; extra == "dev"
 Requires-Dist: mkdocs-material>=9.5.9; extra == "dev"
 Requires-Dist: mkdocstrings[python]; extra == "dev"
-Requires-Dist: mkdocs-jupyter; extra == "dev"
 Requires-Dist: mkdocs-redirects; extra == "dev"
 Requires-Dist: mkdocs-ultralytics-plugin>=0.1.8; extra == "dev"
 Requires-Dist: mkdocs-macros-plugin>=1.0.5; extra == "dev"

{ultralytics-8.3.55 → ultralytics-8.3.57}/pyproject.toml RENAMED Viewed

@@ -90,7 +90,6 @@ dev = [
     "mkdocs>=1.6.0",
     "mkdocs-material>=9.5.9",
     "mkdocstrings[python]",
-    "mkdocs-jupyter", # notebooks
     "mkdocs-redirects", # 301 redirects
     "mkdocs-ultralytics-plugin>=0.1.8", # for meta descriptions and images, dates and authors
     "mkdocs-macros-plugin>=1.0.5"  # duplicating content (i.e. export tables) in multiple places

{ultralytics-8.3.55 → ultralytics-8.3.57}/tests/test_solutions.py RENAMED Viewed

@@ -14,46 +14,53 @@ POSE_VIDEO = "solution_ci_pose_demo.mp4"
 @pytest.mark.slow
 def test_major_solutions():
-    """Test the object counting, heatmap, speed estimation and queue management solution."""
+    """Test the object counting, heatmap, speed estimation, trackzone and queue management solution."""
     safe_download(url=f"{ASSETS_URL}/{DEMO_VIDEO}", dir=TMP)
     cap = cv2.VideoCapture(str(TMP / DEMO_VIDEO))
     assert cap.isOpened(), "Error reading video file"
     region_points = [(20, 400), (1080, 400), (1080, 360), (20, 360)]
     counter = solutions.ObjectCounter(region=region_points, model="yolo11n.pt", show=False)  # Test object counter
     heatmap = solutions.Heatmap(colormap=cv2.COLORMAP_PARULA, model="yolo11n.pt", show=False)  # Test heatmaps
+    heatmap_count = solutions.Heatmap(
+        colormap=cv2.COLORMAP_PARULA, model="yolo11n.pt", show=False, region=region_points
+    )  # Test heatmaps with object counting
     speed = solutions.SpeedEstimator(region=region_points, model="yolo11n.pt", show=False)  # Test queue manager
     queue = solutions.QueueManager(region=region_points, model="yolo11n.pt", show=False)  # Test speed estimation
     line_analytics = solutions.Analytics(analytics_type="line", model="yolo11n.pt", show=False)  # line analytics
     pie_analytics = solutions.Analytics(analytics_type="pie", model="yolo11n.pt", show=False)  # line analytics
     bar_analytics = solutions.Analytics(analytics_type="bar", model="yolo11n.pt", show=False)  # line analytics
     area_analytics = solutions.Analytics(analytics_type="area", model="yolo11n.pt", show=False)  # line analytics
+    trackzone = solutions.TrackZone(region=region_points, model="yolo11n.pt", show=False)  # Test trackzone
     frame_count = 0  # Required for analytics
     while cap.isOpened():
         success, im0 = cap.read()
         if not success:
             break
+        frame_count += 1
         original_im0 = im0.copy()
         _ = counter.count(original_im0.copy())
         _ = heatmap.generate_heatmap(original_im0.copy())
+        _ = heatmap_count.generate_heatmap(original_im0.copy())
         _ = speed.estimate_speed(original_im0.copy())
         _ = queue.process_queue(original_im0.copy())
         _ = line_analytics.process_data(original_im0.copy(), frame_count)
         _ = pie_analytics.process_data(original_im0.copy(), frame_count)
         _ = bar_analytics.process_data(original_im0.copy(), frame_count)
         _ = area_analytics.process_data(original_im0.copy(), frame_count)
+        _ = trackzone.trackzone(original_im0.copy())
     cap.release()
     # Test workouts monitoring
     safe_download(url=f"{ASSETS_URL}/{POSE_VIDEO}", dir=TMP)
-    cap1 = cv2.VideoCapture(str(TMP / POSE_VIDEO))
-    assert cap1.isOpened(), "Error reading video file"
-    gym = solutions.AIGym(line_width=2, kpts=[5, 11, 13], show=False)
-    while cap1.isOpened():
-        success, im0 = cap1.read()
+    cap = cv2.VideoCapture(str(TMP / POSE_VIDEO))
+    assert cap.isOpened(), "Error reading video file"
+    gym = solutions.AIGym(kpts=[5, 11, 13], show=False)
+    while cap.isOpened():
+        success, im0 = cap.read()
         if not success:
             break
         _ = gym.monitor(im0)
-    cap1.release()
+    cap.release()
 @pytest.mark.slow

{ultralytics-8.3.55 → ultralytics-8.3.57}/ultralytics/__init__.py RENAMED Viewed

@@ -1,6 +1,6 @@
 # Ultralytics YOLO 🚀, AGPL-3.0 license
-__version__ = "8.3.55"
+__version__ = "8.3.57"
 import os

{ultralytics-8.3.55 → ultralytics-8.3.57}/ultralytics/cfg/__init__.py RENAMED Viewed

@@ -303,7 +303,7 @@ def get_cfg(cfg: Union[str, Path, Dict, SimpleNamespace] = DEFAULT_CFG_DICT, ove
         if k in cfg and isinstance(cfg[k], (int, float)):
             cfg[k] = str(cfg[k])
     if cfg.get("name") == "model":  # assign model to 'name' arg
-        cfg["name"] = cfg.get("model", "").split(".")[0]
+        cfg["name"] = str(cfg.get("model", "")).split(".")[0]
         LOGGER.warning(f"WARNING ⚠️ 'name=model' automatically updated to 'name={cfg['name']}'.")
     # Type and Value checks

{ultralytics-8.3.55 → ultralytics-8.3.57}/ultralytics/data/augment.py RENAMED Viewed

@@ -642,7 +642,7 @@ class Mosaic(BaseMixTransform):
                 c = s - w, s + h0 - h, s, s + h0
             padw, padh = c[:2]
-            x1, y1, x2, y2 = (max(x, 0) for x in c)  # allocate coords
+            x1, y1, x2, y2 = (max(x, 0) for x in c)  # allocate coordinates
             img3[y1:y2, x1:x2] = img[y1 - padh :, x1 - padw :]  # img3[ymin:ymax, xmin:xmax]
             # hp, wp = h, w  # height, width previous for next iteration
@@ -771,7 +771,7 @@ class Mosaic(BaseMixTransform):
                 c = s - w, s + h0 - hp - h, s, s + h0 - hp
             padw, padh = c[:2]
-            x1, y1, x2, y2 = (max(x, 0) for x in c)  # allocate coords
+            x1, y1, x2, y2 = (max(x, 0) for x in c)  # allocate coordinates
             # Image
             img9[y1:y2, x1:x2] = img[y1 - padh :, x1 - padw :]  # img9[ymin:ymax, xmin:xmax]
@@ -1283,7 +1283,7 @@ class RandomPerspective:
             eps (float): Small epsilon value to prevent division by zero.
         Returns:
-            (numpy.ndarray): Boolean array of shape (n,) indicating which boxes are candidates.
+            (numpy.ndarray): Boolean array of shape (n) indicating which boxes are candidates.
                 True values correspond to boxes that meet all criteria.
         Examples:
@@ -1320,7 +1320,7 @@ class RandomHSV:
         >>> augmenter = RandomHSV(hgain=0.5, sgain=0.5, vgain=0.5)
         >>> image = np.random.randint(0, 255, (100, 100, 3), dtype=np.uint8)
         >>> labels = {"img": image}
-        >>> augmented_labels = augmenter(labels)
+        >>> augmenter(labels)
         >>> augmented_image = augmented_labels["img"]
     """
@@ -1337,7 +1337,7 @@ class RandomHSV:
         Examples:
             >>> hsv_aug = RandomHSV(hgain=0.5, sgain=0.5, vgain=0.5)
-            >>> augmented_image = hsv_aug(image)
+            >>> hsv_aug(image)
         """
         self.hgain = hgain
         self.sgain = sgain
@@ -1419,7 +1419,7 @@ class RandomFlip:
         Examples:
             >>> flip = RandomFlip(p=0.5, direction="horizontal")
-            >>> flip = RandomFlip(p=0.7, direction="vertical", flip_idx=[1, 0, 3, 2, 5, 4])
+            >>> flip_with_idx = RandomFlip(p=0.7, direction="vertical", flip_idx=[1, 0, 3, 2, 5, 4])
         """
         assert direction in {"horizontal", "vertical"}, f"Support direction `horizontal` or `vertical`, got {direction}"
         assert 0 <= p <= 1.0, f"The probability should be in range [0, 1], but got {p}."
@@ -2022,7 +2022,7 @@ class Format:
         Returns:
             (Dict): A dictionary with formatted data, including:
                 - 'img': Formatted image tensor.
-                - 'cls': Class labels tensor.
+                - 'cls': Class label's tensor.
                 - 'bboxes': Bounding boxes tensor in the specified format.
                 - 'masks': Instance masks tensor (if return_mask is True).
                 - 'keypoints': Keypoints tensor (if return_keypoint is True).

{ultralytics-8.3.55 → ultralytics-8.3.57}/ultralytics/data/converter.py RENAMED Viewed

@@ -241,7 +241,7 @@ def convert_coco(
         ```python
         from ultralytics.data.converter import convert_coco
-        convert_coco("../datasets/coco/annotations/", use_segments=True, use_keypoints=False, cls91to80=True)
+        convert_coco("../datasets/coco/annotations/", use_segments=True, use_keypoints=False, cls91to80=False)
         convert_coco("../datasets/lvis/annotations/", use_segments=True, use_keypoints=False, cls91to80=False, lvis=True)
         ```
@@ -266,7 +266,7 @@ def convert_coco(
             # since LVIS val set contains images from COCO 2017 train in addition to the COCO 2017 val split.
             (fn / "train2017").mkdir(parents=True, exist_ok=True)
             (fn / "val2017").mkdir(parents=True, exist_ok=True)
-        with open(json_file) as f:
+        with open(json_file, encoding="utf-8") as f:
             data = json.load(f)
         # Create image dict

{ultralytics-8.3.55 → ultralytics-8.3.57}/ultralytics/data/dataset.py RENAMED Viewed

@@ -323,7 +323,8 @@ class GroundingDataset(YOLODataset):
                 if box[2] <= 0 or box[3] <= 0:
                     continue
-                cat_name = " ".join([img["caption"][t[0] : t[1]] for t in ann["tokens_positive"]])
+                caption = img["caption"]
+                cat_name = " ".join([caption[t[0] : t[1]] for t in ann["tokens_positive"]])
                 if cat_name not in cat2id:
                     cat2id[cat_name] = len(cat2id)
                     texts.append([cat_name])

{ultralytics-8.3.55 → ultralytics-8.3.57}/ultralytics/data/split_dota.py RENAMED Viewed

@@ -67,7 +67,7 @@ def load_yolo_dota(data_root, split="train"):
     Args:
         data_root (str): Data root.
-        split (str): The split data set, could be train or val.
+        split (str): The split data set, could be `train` or `val`.
     Notes:
         The directory structure assumed for the DOTA dataset:

{ultralytics-8.3.55 → ultralytics-8.3.57}/ultralytics/data/utils.py RENAMED Viewed

@@ -167,6 +167,55 @@ def verify_image_label(args):
         return [None, None, None, None, None, nm, nf, ne, nc, msg]
+def visualize_image_annotations(image_path, txt_path, label_map):
+    """
+    Visualizes YOLO annotations (bounding boxes and class labels) on an image.
+    This function reads an image and its corresponding annotation file in YOLO format, then
+    draws bounding boxes around detected objects and labels them with their respective class names.
+    The bounding box colors are assigned based on the class ID, and the text color is dynamically
+    adjusted for readability, depending on the background color's luminance.
+    Args:
+        image_path (str): The path to the image file to annotate, and it can be in formats supported by PIL (e.g., .jpg, .png).
+        txt_path (str): The path to the annotation file in YOLO format, that should contain one line per object with:
+                        - class_id (int): The class index.
+                        - x_center (float): The X center of the bounding box (relative to image width).
+                        - y_center (float): The Y center of the bounding box (relative to image height).
+                        - width (float): The width of the bounding box (relative to image width).
+                        - height (float): The height of the bounding box (relative to image height).
+        label_map (dict): A dictionary that maps class IDs (integers) to class labels (strings).
+    Example:
+        >>> label_map = {0: "cat", 1: "dog", 2: "bird"}  # It should include all annotated classes details
+        >>> visualize_image_annotations("path/to/image.jpg", "path/to/annotations.txt", label_map)
+    """
+    import matplotlib.pyplot as plt
+    from ultralytics.utils.plotting import colors
+    img = np.array(Image.open(image_path))
+    img_height, img_width = img.shape[:2]
+    annotations = []
+    with open(txt_path) as file:
+        for line in file:
+            class_id, x_center, y_center, width, height = map(float, line.split())
+            x = (x_center - width / 2) * img_width
+            y = (y_center - height / 2) * img_height
+            w = width * img_width
+            h = height * img_height
+            annotations.append((x, y, w, h, int(class_id)))
+    fig, ax = plt.subplots(1)  # Plot the image and annotations
+    for x, y, w, h, label in annotations:
+        color = tuple(c / 255 for c in colors(label, True))  # Get and normalize the RGB color
+        rect = plt.Rectangle((x, y), w, h, linewidth=2, edgecolor=color, facecolor="none")  # Create a rectangle
+        ax.add_patch(rect)
+        luminance = 0.2126 * color[0] + 0.7152 * color[1] + 0.0722 * color[2]  # Formula for luminance
+        ax.text(x, y - 5, label_map[label], color="white" if luminance < 0.5 else "black", backgroundcolor=color)
+    ax.imshow(img)
+    plt.show()
 def polygon2mask(imgsz, polygons, color=1, downsample_ratio=1):
     """
     Convert a list of polygons to a binary mask of the specified image size.

{ultralytics-8.3.55 → ultralytics-8.3.57}/ultralytics/engine/exporter.py RENAMED Viewed

@@ -250,7 +250,8 @@ class Exporter:
         self.device = select_device("cpu" if self.args.device is None else self.args.device)
         # Argument compatibility checks
-        validate_args(fmt, self.args, fmts_dict["Arguments"][flags.index(True) + 1])
+        fmt_keys = fmts_dict["Arguments"][flags.index(True) + 1]
+        validate_args(fmt, self.args, fmt_keys)
         if imx and not self.args.int8:
             LOGGER.warning("WARNING ⚠️ IMX only supports int8 export, setting int8=True.")
             self.args.int8 = True
@@ -285,6 +286,7 @@ class Exporter:
                 "(torchscript, onnx, openvino, engine, coreml) formats. "
                 "See https://docs.ultralytics.com/models/yolo-world for details."
             )
+            model.clip_model = None  # openvino int8 export error: https://github.com/ultralytics/ultralytics/pull/18445
         if self.args.int8 and not self.args.data:
             self.args.data = DEFAULT_CFG.data or TASK2DATA[getattr(model, "task", "detect")]  # assign default data
             LOGGER.warning(
@@ -368,6 +370,7 @@ class Exporter:
             "batch": self.args.batch,
             "imgsz": self.imgsz,
             "names": model.names,
+            "args": {k: v for k, v in self.args if k in fmt_keys},
         }  # model metadata
         if model.task == "pose":
             self.metadata["kpt_shape"] = model.model[-1].kpt_shape
@@ -602,7 +605,7 @@ class Exporter:
     @try_export
     def export_paddle(self, prefix=colorstr("PaddlePaddle:")):
         """YOLO Paddle export."""
-        check_requirements(("paddlepaddle", "x2paddle"))
+        check_requirements(("paddlepaddle-gpu" if torch.cuda.is_available() else "paddlepaddle", "x2paddle"))
         import x2paddle  # noqa
         from x2paddle.convert import pytorch2paddle  # noqa
@@ -949,7 +952,7 @@ class Exporter:
                 "sng4onnx>=1.0.1",  # required by 'onnx2tf' package
                 "onnx_graphsurgeon>=0.3.26",  # required by 'onnx2tf' package
                 "onnx>=1.12.0",
-                "onnx2tf>1.17.5,<=1.22.3",
+                "onnx2tf>1.17.5,<=1.26.3",
                 "onnxslim>=0.1.31",
                 "tflite_support<=0.4.3" if IS_JETSON else "tflite_support",  # fix ImportError 'GLIBCXX_3.4.29'
                 "flatbuffers>=23.5.26,<100",  # update old 'flatbuffers' included inside tensorflow package
@@ -1136,7 +1139,7 @@ class Exporter:
         if getattr(self.model, "end2end", False):
             raise ValueError("IMX export is not supported for end2end models.")
         if "C2f" not in self.model.__str__():
-            raise ValueError("IMX export is only supported for YOLOv8 detection models")
+            raise ValueError("IMX export is only supported for YOLOv8n detection models")
         check_requirements(("model-compression-toolkit==2.1.1", "sony-custom-layers==0.2.0", "tensorflow==2.12.0"))
         check_requirements("imx500-converter[pt]==3.14.3")  # Separate requirements for imx500-converter

{ultralytics-8.3.55 → ultralytics-8.3.57}/ultralytics/models/sam/amg.py RENAMED Viewed

@@ -76,7 +76,7 @@ def build_all_layer_point_grids(n_per_side: int, n_layers: int, scale_per_layer:
 def generate_crop_boxes(
     im_size: Tuple[int, ...], n_layers: int, overlap_ratio: float
 ) -> Tuple[List[List[int]], List[int]]:
-    """Generates crop boxes of varying sizes for multi-scale image processing, with layered overlapping regions."""
+    """Generates crop boxes of varying sizes for multiscale image processing, with layered overlapping regions."""
     crop_boxes, layer_idxs = [], []
     im_h, im_w = im_size
     short_side = min(im_h, im_w)

{ultralytics-8.3.55 → ultralytics-8.3.57}/ultralytics/models/sam/modules/blocks.py RENAMED Viewed

@@ -502,11 +502,11 @@ def do_pool(x: torch.Tensor, pool: nn.Module, norm: nn.Module = None) -> torch.T
 class MultiScaleAttention(nn.Module):
     """
-    Implements multi-scale self-attention with optional query pooling for efficient feature extraction.
+    Implements multiscale self-attention with optional query pooling for efficient feature extraction.
-    This class provides a flexible implementation of multi-scale attention, allowing for optional
+    This class provides a flexible implementation of multiscale attention, allowing for optional
     downsampling of query features through pooling. It's designed to enhance the model's ability to
-    capture multi-scale information in visual tasks.
+    capture multiscale information in visual tasks.
     Attributes:
         dim (int): Input dimension of the feature map.
@@ -518,7 +518,7 @@ class MultiScaleAttention(nn.Module):
         proj (nn.Linear): Output projection.
     Methods:
-        forward: Applies multi-scale attention to the input tensor.
+        forward: Applies multiscale attention to the input tensor.
     Examples:
         >>> import torch
@@ -537,7 +537,7 @@ class MultiScaleAttention(nn.Module):
         num_heads: int,
         q_pool: nn.Module = None,
     ):
-        """Initializes multi-scale attention with optional query pooling for efficient feature extraction."""
+        """Initializes multiscale attention with optional query pooling for efficient feature extraction."""
         super().__init__()
         self.dim = dim
@@ -552,7 +552,7 @@ class MultiScaleAttention(nn.Module):
         self.proj = nn.Linear(dim_out, dim_out)
     def forward(self, x: torch.Tensor) -> torch.Tensor:
-        """Applies multi-scale attention with optional query pooling to extract multi-scale features."""
+        """Applies multiscale attention with optional query pooling to extract multiscale features."""
         B, H, W, _ = x.shape
         # qkv with shape (B, H * W, 3, nHead, C)
         qkv = self.qkv(x).reshape(B, H * W, 3, self.num_heads, -1)
@@ -582,9 +582,9 @@ class MultiScaleAttention(nn.Module):
 class MultiScaleBlock(nn.Module):
     """
-    A multi-scale attention block with window partitioning and query pooling for efficient vision transformers.
+    A multiscale attention block with window partitioning and query pooling for efficient vision transformers.
-    This class implements a multi-scale attention mechanism with optional window partitioning and downsampling,
+    This class implements a multiscale attention mechanism with optional window partitioning and downsampling,
     designed for use in vision transformer architectures.
     Attributes:
@@ -601,7 +601,7 @@ class MultiScaleBlock(nn.Module):
         proj (nn.Linear | None): Projection layer for dimension mismatch.
     Methods:
-        forward: Processes input tensor through the multi-scale block.
+        forward: Processes input tensor through the multiscale block.
     Examples:
         >>> block = MultiScaleBlock(dim=256, dim_out=512, num_heads=8, window_size=7)
@@ -623,7 +623,7 @@ class MultiScaleBlock(nn.Module):
         act_layer: nn.Module = nn.GELU,
         window_size: int = 0,
     ):
-        """Initializes a multi-scale attention block with window partitioning and optional query pooling."""
+        """Initializes a multiscale attention block with window partitioning and optional query pooling."""
         super().__init__()
         if isinstance(norm_layer, str):
@@ -660,7 +660,7 @@ class MultiScaleBlock(nn.Module):
             self.proj = nn.Linear(dim, dim_out)
     def forward(self, x: torch.Tensor) -> torch.Tensor:
-        """Processes input through multi-scale attention and MLP, with optional windowing and downsampling."""
+        """Processes input through multiscale attention and MLP, with optional windowing and downsampling."""
         shortcut = x  # B, H, W, C
         x = self.norm1(x)

{ultralytics-8.3.55 → ultralytics-8.3.57}/ultralytics/models/sam/modules/sam.py RENAMED Viewed

@@ -425,7 +425,7 @@ class SAM2Model(torch.nn.Module):
                 low_res_masks: Tensor of shape (B, 1, H*4, W*4) with the best low-resolution mask.
                 high_res_masks: Tensor of shape (B, 1, H*16, W*16) with the best high-resolution mask.
                 obj_ptr: Tensor of shape (B, C) with object pointer vector for the output mask.
-                object_score_logits: Tensor of shape (B,) with object score logits.
+                object_score_logits: Tensor of shape (B) with object score logits.
             Where M is 3 if multimask_output=True, and 1 if multimask_output=False.
@@ -643,7 +643,7 @@ class SAM2Model(torch.nn.Module):
         if not is_init_cond_frame:
             # Retrieve the memories encoded with the maskmem backbone
             to_cat_memory, to_cat_memory_pos_embed = [], []
-            # Add conditioning frames's output first (all cond frames have t_pos=0 for
+            # Add conditioning frame's output first (all cond frames have t_pos=0 for
             # when getting temporal positional embedding below)
             assert len(output_dict["cond_frame_outputs"]) > 0
             # Select a maximum number of temporally closest cond frames for cross attention

{ultralytics-8.3.55 → ultralytics-8.3.57}/ultralytics/models/sam/predict.py RENAMED Viewed

@@ -91,9 +91,9 @@ class Predictor(BasePredictor):
             _callbacks (Dict | None): Dictionary of callback functions to customize behavior.
         Examples:
-            >>> predictor = Predictor(cfg=DEFAULT_CFG)
-            >>> predictor = Predictor(overrides={"imgsz": 640})
-            >>> predictor = Predictor(_callbacks={"on_predict_start": custom_callback})
+            >>> predictor_example = Predictor(cfg=DEFAULT_CFG)
+            >>> predictor_example_with_imgsz = Predictor(overrides={"imgsz": 640})
+            >>> predictor_example_with_callback = Predictor(_callbacks={"on_predict_start": custom_callback})
         """
         if overrides is None:
             overrides = {}
@@ -215,7 +215,7 @@ class Predictor(BasePredictor):
             im (torch.Tensor): Preprocessed input image tensor with shape (N, C, H, W).
             bboxes (np.ndarray | List | None): Bounding boxes in XYXY format with shape (N, 4).
             points (np.ndarray | List | None): Points indicating object locations with shape (N, 2) or (N, num_points, 2), in pixels.
-            labels (np.ndarray | List | None): Point prompt labels with shape (N,) or (N, num_points). 1 for foreground, 0 for background.
+            labels (np.ndarray | List | None): Point prompt labels with shape (N) or (N, num_points). 1 for foreground, 0 for background.
             masks (np.ndarray | None): Low-res masks from previous predictions with shape (N, H, W). For SAM, H=W=256.
             multimask_output (bool): Flag to return multiple masks for ambiguous prompts.
@@ -260,7 +260,7 @@ class Predictor(BasePredictor):
             dst_shape (tuple): The target shape (height, width) for the prompts.
             bboxes (np.ndarray | List | None): Bounding boxes in XYXY format with shape (N, 4).
             points (np.ndarray | List | None): Points indicating object locations with shape (N, 2) or (N, num_points, 2), in pixels.
-            labels (np.ndarray | List | None): Point prompt labels with shape (N,) or (N, num_points). 1 for foreground, 0 for background.
+            labels (np.ndarray | List | None): Point prompt labels with shape (N) or (N, num_points). 1 for foreground, 0 for background.
             masks (List | np.ndarray, Optional): Masks for the objects, where each mask is a 2D array.
         Raises:
@@ -853,8 +853,8 @@ class SAM2VideoPredictor(SAM2Predictor):
         Examples:
             >>> predictor = SAM2VideoPredictor(cfg=DEFAULT_CFG)
-            >>> predictor = SAM2VideoPredictor(overrides={"imgsz": 640})
-            >>> predictor = SAM2VideoPredictor(_callbacks={"on_predict_start": custom_callback})
+            >>> predictor_example_with_imgsz = SAM2VideoPredictor(overrides={"imgsz": 640})
+            >>> predictor_example_with_callback = SAM2VideoPredictor(_callbacks={"on_predict_start": custom_callback})
         """
         super().__init__(cfg, overrides, _callbacks)
         self.inference_state = {}
@@ -1096,7 +1096,7 @@ class SAM2VideoPredictor(SAM2Predictor):
         # to `propagate_in_video_preflight`).
         consolidated_frame_inds = self.inference_state["consolidated_frame_inds"]
         for is_cond in {False, True}:
-            # Separately consolidate conditioning and non-conditioning temp outptus
+            # Separately consolidate conditioning and non-conditioning temp outputs
             storage_key = "cond_frame_outputs" if is_cond else "non_cond_frame_outputs"
             # Find all the frames that contain temporary outputs for any objects
             # (these should be the frames that have just received clicks for mask inputs
@@ -1161,36 +1161,35 @@ class SAM2VideoPredictor(SAM2Predictor):
         assert predictor.dataset is not None
         assert predictor.dataset.mode == "video"
-        inference_state = {}
-        inference_state["num_frames"] = predictor.dataset.frames
-        # inputs on each frame
-        inference_state["point_inputs_per_obj"] = {}
-        inference_state["mask_inputs_per_obj"] = {}
-        # values that don't change across frames (so we only need to hold one copy of them)
-        inference_state["constants"] = {}
-        # mapping between client-side object id and model-side object index
-        inference_state["obj_id_to_idx"] = OrderedDict()
-        inference_state["obj_idx_to_id"] = OrderedDict()
-        inference_state["obj_ids"] = []
-        # A storage to hold the model's tracking results and states on each frame
-        inference_state["output_dict"] = {
-            "cond_frame_outputs": {},  # dict containing {frame_idx: <out>}
-            "non_cond_frame_outputs": {},  # dict containing {frame_idx: <out>}
-        }
-        # Slice (view) of each object tracking results, sharing the same memory with "output_dict"
-        inference_state["output_dict_per_obj"] = {}
-        # A temporary storage to hold new outputs when user interact with a frame
-        # to add clicks or mask (it's merged into "output_dict" before propagation starts)
-        inference_state["temp_output_dict_per_obj"] = {}
-        # Frames that already holds consolidated outputs from click or mask inputs
-        # (we directly use their consolidated outputs during tracking)
-        inference_state["consolidated_frame_inds"] = {
-            "cond_frame_outputs": set(),  # set containing frame indices
-            "non_cond_frame_outputs": set(),  # set containing frame indices
+        inference_state = {
+            "num_frames": predictor.dataset.frames,
+            "point_inputs_per_obj": {},  # inputs points on each frame
+            "mask_inputs_per_obj": {},  # inputs mask on each frame
+            "constants": {},  # values that don't change across frames (so we only need to hold one copy of them)
+            # mapping between client-side object id and model-side object index
+            "obj_id_to_idx": OrderedDict(),
+            "obj_idx_to_id": OrderedDict(),
+            "obj_ids": [],
+            # A storage to hold the model's tracking results and states on each frame
+            "output_dict": {
+                "cond_frame_outputs": {},  # dict containing {frame_idx: <out>}
+                "non_cond_frame_outputs": {},  # dict containing {frame_idx: <out>}
+            },
+            # Slice (view) of each object tracking results, sharing the same memory with "output_dict"
+            "output_dict_per_obj": {},
+            # A temporary storage to hold new outputs when user interact with a frame
+            # to add clicks or mask (it's merged into "output_dict" before propagation starts)
+            "temp_output_dict_per_obj": {},
+            # Frames that already holds consolidated outputs from click or mask inputs
+            # (we directly use their consolidated outputs during tracking)
+            "consolidated_frame_inds": {
+                "cond_frame_outputs": set(),  # set containing frame indices
+                "non_cond_frame_outputs": set(),  # set containing frame indices
+            },
+            # metadata for each tracking frame (e.g. which direction it's tracked)
+            "tracking_has_started": False,
+            "frames_already_tracked": [],
         }
-        # metadata for each tracking frame (e.g. which direction it's tracked)
-        inference_state["tracking_has_started"] = False
-        inference_state["frames_already_tracked"] = []
         predictor.inference_state = inference_state
     def get_im_features(self, im, batch=1):

{ultralytics-8.3.55 → ultralytics-8.3.57}/ultralytics/nn/autobackend.py RENAMED Viewed

@@ -133,7 +133,7 @@ class AutoBackend(nn.Module):
         # Set device
         cuda = torch.cuda.is_available() and device.type != "cpu"  # use CUDA
-        if cuda and not any([nn_module, pt, jit, engine, onnx]):  # GPU dataloader formats
+        if cuda and not any([nn_module, pt, jit, engine, onnx, paddle]):  # GPU dataloader formats
             device = torch.device("cpu")
             cuda = False

{ultralytics-8.3.55 → ultralytics-8.3.57}/ultralytics/trackers/utils/gmc.py RENAMED Viewed

@@ -26,9 +26,9 @@ class GMC:
     Methods:
         __init__: Initializes a GMC object with the specified method and downscale factor.
         apply: Applies the chosen method to a raw frame and optionally uses provided detections.
-        applyEcc: Applies the ECC algorithm to a raw frame.
-        applyFeatures: Applies feature-based methods like ORB or SIFT to a raw frame.
-        applySparseOptFlow: Applies the Sparse Optical Flow method to a raw frame.
+        apply_ecc: Applies the ECC algorithm to a raw frame.
+        apply_features: Applies feature-based methods like ORB or SIFT to a raw frame.
+        apply_sparseoptflow: Applies the Sparse Optical Flow method to a raw frame.
         reset_params: Resets the internal parameters of the GMC object.
     Examples:
@@ -108,15 +108,15 @@ class GMC:
             (480, 640, 3)
         """
         if self.method in {"orb", "sift"}:
-            return self.applyFeatures(raw_frame, detections)
+            return self.apply_features(raw_frame, detections)
         elif self.method == "ecc":
-            return self.applyEcc(raw_frame)
+            return self.apply_ecc(raw_frame)
         elif self.method == "sparseOptFlow":
-            return self.applySparseOptFlow(raw_frame)
+            return self.apply_sparseoptflow(raw_frame)
         else:
             return np.eye(2, 3)
-    def applyEcc(self, raw_frame: np.array) -> np.array:
+    def apply_ecc(self, raw_frame: np.array) -> np.array:
         """
         Apply the ECC (Enhanced Correlation Coefficient) algorithm to a raw frame for motion compensation.
@@ -128,7 +128,7 @@ class GMC:
         Examples:
             >>> gmc = GMC(method="ecc")
-            >>> processed_frame = gmc.applyEcc(np.array([[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]]))
+            >>> processed_frame = gmc.apply_ecc(np.array([[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]]))
             >>> print(processed_frame)
             [[1. 0. 0.]
              [0. 1. 0.]]
@@ -161,7 +161,7 @@ class GMC:
         return H
-    def applyFeatures(self, raw_frame: np.array, detections: list = None) -> np.array:
+    def apply_features(self, raw_frame: np.array, detections: list = None) -> np.array:
         """
         Apply feature-based methods like ORB or SIFT to a raw frame.
@@ -175,7 +175,7 @@ class GMC:
         Examples:
             >>> gmc = GMC(method="orb")
             >>> raw_frame = np.random.randint(0, 255, (480, 640, 3), dtype=np.uint8)
-            >>> processed_frame = gmc.applyFeatures(raw_frame)
+            >>> processed_frame = gmc.apply_features(raw_frame)
             >>> print(processed_frame.shape)
             (2, 3)
         """
@@ -304,7 +304,7 @@ class GMC:
         return H
-    def applySparseOptFlow(self, raw_frame: np.array) -> np.array:
+    def apply_sparseoptflow(self, raw_frame: np.array) -> np.array:
         """
         Apply Sparse Optical Flow method to a raw frame.
@@ -316,7 +316,7 @@ class GMC:
         Examples:
             >>> gmc = GMC()
-            >>> result = gmc.applySparseOptFlow(np.array([[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]]))
+            >>> result = gmc.apply_sparseoptflow(np.array([[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]]))
             >>> print(result)
             [[1. 0. 0.]
              [0. 1. 0.]]

ultralytics 8.3.55__tar.gz → 8.3.57__tar.gz

ultralytics 8.3.55tar.gz → 8.3.57tar.gz