PyPI - ultralytics - Versions diffs - 8.1.29__py3-none-any.whl → 8.3.63__py3-none-any.whl - Mend

ultralytics 8.1.29py3-none-any.whl → 8.3.63py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (247) hide show

tests/__init__.py +22 -0
tests/conftest.py +83 -0
tests/test_cli.py +122 -0
tests/test_cuda.py +155 -0
tests/test_engine.py +131 -0
tests/test_exports.py +216 -0
tests/test_integrations.py +150 -0
tests/test_python.py +615 -0
tests/test_solutions.py +94 -0
ultralytics/__init__.py +11 -8
ultralytics/cfg/__init__.py +569 -131
ultralytics/cfg/datasets/Argoverse.yaml +2 -1
ultralytics/cfg/datasets/DOTAv1.5.yaml +3 -2
ultralytics/cfg/datasets/DOTAv1.yaml +3 -2
ultralytics/cfg/datasets/GlobalWheat2020.yaml +3 -2
ultralytics/cfg/datasets/ImageNet.yaml +2 -1
ultralytics/cfg/datasets/Objects365.yaml +5 -4
ultralytics/cfg/datasets/SKU-110K.yaml +2 -1
ultralytics/cfg/datasets/VOC.yaml +3 -2
ultralytics/cfg/datasets/VisDrone.yaml +6 -5
ultralytics/cfg/datasets/african-wildlife.yaml +25 -0
ultralytics/cfg/datasets/brain-tumor.yaml +23 -0
ultralytics/cfg/datasets/carparts-seg.yaml +3 -2
ultralytics/cfg/datasets/coco-pose.yaml +7 -6
ultralytics/cfg/datasets/coco.yaml +3 -2
ultralytics/cfg/datasets/coco128-seg.yaml +4 -3
ultralytics/cfg/datasets/coco128.yaml +4 -3
ultralytics/cfg/datasets/coco8-pose.yaml +3 -2
ultralytics/cfg/datasets/coco8-seg.yaml +3 -2
ultralytics/cfg/datasets/coco8.yaml +3 -2
ultralytics/cfg/datasets/crack-seg.yaml +3 -2
ultralytics/cfg/datasets/dog-pose.yaml +24 -0
ultralytics/cfg/datasets/dota8.yaml +3 -2
ultralytics/cfg/datasets/hand-keypoints.yaml +26 -0
ultralytics/cfg/datasets/lvis.yaml +1236 -0
ultralytics/cfg/datasets/medical-pills.yaml +22 -0
ultralytics/cfg/datasets/open-images-v7.yaml +2 -1
ultralytics/cfg/datasets/package-seg.yaml +5 -4
ultralytics/cfg/datasets/signature.yaml +21 -0
ultralytics/cfg/datasets/tiger-pose.yaml +3 -2
ultralytics/cfg/datasets/xView.yaml +2 -1
ultralytics/cfg/default.yaml +14 -11
ultralytics/cfg/models/11/yolo11-cls-resnet18.yaml +24 -0
ultralytics/cfg/models/11/yolo11-cls.yaml +33 -0
ultralytics/cfg/models/11/yolo11-obb.yaml +50 -0
ultralytics/cfg/models/11/yolo11-pose.yaml +51 -0
ultralytics/cfg/models/11/yolo11-seg.yaml +50 -0
ultralytics/cfg/models/11/yolo11.yaml +50 -0
ultralytics/cfg/models/rt-detr/rtdetr-l.yaml +5 -2
ultralytics/cfg/models/rt-detr/rtdetr-resnet101.yaml +5 -2
ultralytics/cfg/models/rt-detr/rtdetr-resnet50.yaml +5 -2
ultralytics/cfg/models/rt-detr/rtdetr-x.yaml +5 -2
ultralytics/cfg/models/v10/yolov10b.yaml +45 -0
ultralytics/cfg/models/v10/yolov10l.yaml +45 -0
ultralytics/cfg/models/v10/yolov10m.yaml +45 -0
ultralytics/cfg/models/v10/yolov10n.yaml +45 -0
ultralytics/cfg/models/v10/yolov10s.yaml +45 -0
ultralytics/cfg/models/v10/yolov10x.yaml +45 -0
ultralytics/cfg/models/v3/yolov3-spp.yaml +5 -2
ultralytics/cfg/models/v3/yolov3-tiny.yaml +5 -2
ultralytics/cfg/models/v3/yolov3.yaml +5 -2
ultralytics/cfg/models/v5/yolov5-p6.yaml +5 -2
ultralytics/cfg/models/v5/yolov5.yaml +5 -2
ultralytics/cfg/models/v6/yolov6.yaml +5 -2
ultralytics/cfg/models/v8/yolov8-cls-resnet101.yaml +5 -2
ultralytics/cfg/models/v8/yolov8-cls-resnet50.yaml +5 -2
ultralytics/cfg/models/v8/yolov8-cls.yaml +5 -2
ultralytics/cfg/models/v8/yolov8-ghost-p2.yaml +6 -2
ultralytics/cfg/models/v8/yolov8-ghost-p6.yaml +6 -2
ultralytics/cfg/models/v8/yolov8-ghost.yaml +5 -2
ultralytics/cfg/models/v8/yolov8-obb.yaml +5 -2
ultralytics/cfg/models/v8/yolov8-p2.yaml +5 -2
ultralytics/cfg/models/v8/yolov8-p6.yaml +10 -7
ultralytics/cfg/models/v8/yolov8-pose-p6.yaml +5 -2
ultralytics/cfg/models/v8/yolov8-pose.yaml +5 -2
ultralytics/cfg/models/v8/yolov8-rtdetr.yaml +5 -2
ultralytics/cfg/models/v8/yolov8-seg-p6.yaml +5 -2
ultralytics/cfg/models/v8/yolov8-seg.yaml +5 -2
ultralytics/cfg/models/v8/yolov8-world.yaml +5 -2
ultralytics/cfg/models/v8/yolov8-worldv2.yaml +5 -2
ultralytics/cfg/models/v8/yolov8.yaml +5 -2
ultralytics/cfg/models/v9/yolov9c-seg.yaml +41 -0
ultralytics/cfg/models/v9/yolov9c.yaml +30 -25
ultralytics/cfg/models/v9/yolov9e-seg.yaml +64 -0
ultralytics/cfg/models/v9/yolov9e.yaml +46 -42
ultralytics/cfg/models/v9/yolov9m.yaml +41 -0
ultralytics/cfg/models/v9/yolov9s.yaml +41 -0
ultralytics/cfg/models/v9/yolov9t.yaml +41 -0
ultralytics/cfg/solutions/default.yaml +24 -0
ultralytics/cfg/trackers/botsort.yaml +8 -5
ultralytics/cfg/trackers/bytetrack.yaml +8 -5
ultralytics/data/__init__.py +14 -3
ultralytics/data/annotator.py +37 -15
ultralytics/data/augment.py +1783 -289
ultralytics/data/base.py +62 -27
ultralytics/data/build.py +37 -8
ultralytics/data/converter.py +196 -36
ultralytics/data/dataset.py +233 -94
ultralytics/data/loaders.py +199 -96
ultralytics/data/split_dota.py +39 -29
ultralytics/data/utils.py +111 -41
ultralytics/engine/__init__.py +1 -1
ultralytics/engine/exporter.py +579 -244
ultralytics/engine/model.py +604 -252
ultralytics/engine/predictor.py +22 -11
ultralytics/engine/results.py +1228 -218
ultralytics/engine/trainer.py +191 -129
ultralytics/engine/tuner.py +18 -18
ultralytics/engine/validator.py +18 -15
ultralytics/hub/__init__.py +31 -13
ultralytics/hub/auth.py +11 -7
ultralytics/hub/google/__init__.py +159 -0
ultralytics/hub/session.py +128 -94
ultralytics/hub/utils.py +20 -21
ultralytics/models/__init__.py +4 -2
ultralytics/models/fastsam/__init__.py +2 -3
ultralytics/models/fastsam/model.py +26 -4
ultralytics/models/fastsam/predict.py +127 -63
ultralytics/models/fastsam/utils.py +1 -44
ultralytics/models/fastsam/val.py +1 -1
ultralytics/models/nas/__init__.py +1 -1
ultralytics/models/nas/model.py +21 -10
ultralytics/models/nas/predict.py +3 -6
ultralytics/models/nas/val.py +4 -4
ultralytics/models/rtdetr/__init__.py +1 -1
ultralytics/models/rtdetr/model.py +1 -1
ultralytics/models/rtdetr/predict.py +6 -8
ultralytics/models/rtdetr/train.py +6 -2
ultralytics/models/rtdetr/val.py +3 -3
ultralytics/models/sam/__init__.py +3 -3
ultralytics/models/sam/amg.py +29 -23
ultralytics/models/sam/build.py +211 -13
ultralytics/models/sam/model.py +91 -30
ultralytics/models/sam/modules/__init__.py +1 -1
ultralytics/models/sam/modules/blocks.py +1129 -0
ultralytics/models/sam/modules/decoders.py +381 -53
ultralytics/models/sam/modules/encoders.py +515 -324
ultralytics/models/sam/modules/memory_attention.py +237 -0
ultralytics/models/sam/modules/sam.py +969 -21
ultralytics/models/sam/modules/tiny_encoder.py +425 -154
ultralytics/models/sam/modules/transformer.py +159 -60
ultralytics/models/sam/modules/utils.py +293 -0
ultralytics/models/sam/predict.py +1263 -132
ultralytics/models/utils/__init__.py +1 -1
ultralytics/models/utils/loss.py +36 -24
ultralytics/models/utils/ops.py +3 -7
ultralytics/models/yolo/__init__.py +3 -3
ultralytics/models/yolo/classify/__init__.py +1 -1
ultralytics/models/yolo/classify/predict.py +7 -8
ultralytics/models/yolo/classify/train.py +17 -22
ultralytics/models/yolo/classify/val.py +8 -4
ultralytics/models/yolo/detect/__init__.py +1 -1
ultralytics/models/yolo/detect/predict.py +3 -5
ultralytics/models/yolo/detect/train.py +11 -4
ultralytics/models/yolo/detect/val.py +90 -52
ultralytics/models/yolo/model.py +14 -9
ultralytics/models/yolo/obb/__init__.py +1 -1
ultralytics/models/yolo/obb/predict.py +2 -2
ultralytics/models/yolo/obb/train.py +5 -3
ultralytics/models/yolo/obb/val.py +41 -23
ultralytics/models/yolo/pose/__init__.py +1 -1
ultralytics/models/yolo/pose/predict.py +3 -5
ultralytics/models/yolo/pose/train.py +2 -2
ultralytics/models/yolo/pose/val.py +51 -17
ultralytics/models/yolo/segment/__init__.py +1 -1
ultralytics/models/yolo/segment/predict.py +3 -5
ultralytics/models/yolo/segment/train.py +2 -2
ultralytics/models/yolo/segment/val.py +60 -19
ultralytics/models/yolo/world/__init__.py +5 -0
ultralytics/models/yolo/world/train.py +92 -0
ultralytics/models/yolo/world/train_world.py +109 -0
ultralytics/nn/__init__.py +1 -1
ultralytics/nn/autobackend.py +228 -93
ultralytics/nn/modules/__init__.py +39 -14
ultralytics/nn/modules/activation.py +21 -0
ultralytics/nn/modules/block.py +526 -66
ultralytics/nn/modules/conv.py +24 -7
ultralytics/nn/modules/head.py +177 -34
ultralytics/nn/modules/transformer.py +6 -5
ultralytics/nn/modules/utils.py +1 -2
ultralytics/nn/tasks.py +226 -82
ultralytics/solutions/__init__.py +30 -1
ultralytics/solutions/ai_gym.py +96 -143
ultralytics/solutions/analytics.py +247 -0
ultralytics/solutions/distance_calculation.py +78 -135
ultralytics/solutions/heatmap.py +93 -247
ultralytics/solutions/object_counter.py +184 -259
ultralytics/solutions/parking_management.py +246 -0
ultralytics/solutions/queue_management.py +112 -0
ultralytics/solutions/region_counter.py +116 -0
ultralytics/solutions/security_alarm.py +144 -0
ultralytics/solutions/solutions.py +178 -0
ultralytics/solutions/speed_estimation.py +86 -174
ultralytics/solutions/streamlit_inference.py +190 -0
ultralytics/solutions/trackzone.py +68 -0
ultralytics/trackers/__init__.py +1 -1
ultralytics/trackers/basetrack.py +32 -13
ultralytics/trackers/bot_sort.py +61 -28
ultralytics/trackers/byte_tracker.py +83 -51
ultralytics/trackers/track.py +21 -6
ultralytics/trackers/utils/__init__.py +1 -1
ultralytics/trackers/utils/gmc.py +62 -48
ultralytics/trackers/utils/kalman_filter.py +166 -35
ultralytics/trackers/utils/matching.py +40 -21
ultralytics/utils/__init__.py +511 -239
ultralytics/utils/autobatch.py +40 -22
ultralytics/utils/benchmarks.py +266 -85
ultralytics/utils/callbacks/__init__.py +1 -1
ultralytics/utils/callbacks/base.py +1 -3
ultralytics/utils/callbacks/clearml.py +7 -6
ultralytics/utils/callbacks/comet.py +39 -17
ultralytics/utils/callbacks/dvc.py +1 -1
ultralytics/utils/callbacks/hub.py +16 -16
ultralytics/utils/callbacks/mlflow.py +28 -24
ultralytics/utils/callbacks/neptune.py +6 -2
ultralytics/utils/callbacks/raytune.py +3 -4
ultralytics/utils/callbacks/tensorboard.py +18 -18
ultralytics/utils/callbacks/wb.py +27 -20
ultralytics/utils/checks.py +172 -100
ultralytics/utils/dist.py +2 -1
ultralytics/utils/downloads.py +40 -34
ultralytics/utils/errors.py +1 -1
ultralytics/utils/files.py +72 -38
ultralytics/utils/instance.py +41 -19
ultralytics/utils/loss.py +83 -55
ultralytics/utils/metrics.py +61 -56
ultralytics/utils/ops.py +94 -89
ultralytics/utils/patches.py +30 -14
ultralytics/utils/plotting.py +600 -269
ultralytics/utils/tal.py +67 -26
ultralytics/utils/torch_utils.py +305 -112
ultralytics/utils/triton.py +2 -1
ultralytics/utils/tuner.py +21 -12
ultralytics-8.3.63.dist-info/METADATA +370 -0
ultralytics-8.3.63.dist-info/RECORD +241 -0
{ultralytics-8.1.29.dist-info → ultralytics-8.3.63.dist-info}/WHEEL +1 -1
ultralytics/data/explorer/__init__.py +0 -5
ultralytics/data/explorer/explorer.py +0 -472
ultralytics/data/explorer/gui/__init__.py +0 -1
ultralytics/data/explorer/gui/dash.py +0 -268
ultralytics/data/explorer/utils.py +0 -166
ultralytics/models/fastsam/prompt.py +0 -357
ultralytics-8.1.29.dist-info/METADATA +0 -373
ultralytics-8.1.29.dist-info/RECORD +0 -197
{ultralytics-8.1.29.dist-info → ultralytics-8.3.63.dist-info}/LICENSE +0 -0
{ultralytics-8.1.29.dist-info → ultralytics-8.3.63.dist-info}/entry_points.txt +0 -0
{ultralytics-8.1.29.dist-info → ultralytics-8.3.63.dist-info}/top_level.txt +0 -0

ultralytics/models/fastsam/predict.py CHANGED Viewed

@@ -1,86 +1,150 @@
-# Ultralytics YOLO 🚀, AGPL-3.0 license
+# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
 import torch
+from PIL import Image
-from ultralytics.engine.results import Results
-from ultralytics.models.fastsam.utils import bbox_iou
-from ultralytics.models.yolo.detect.predict import DetectionPredictor
-from ultralytics.utils import DEFAULT_CFG, ops
+from ultralytics.models.yolo.segment import SegmentationPredictor
+from ultralytics.utils import DEFAULT_CFG, checks
+from ultralytics.utils.metrics import box_iou
+from ultralytics.utils.ops import scale_masks
+from .utils import adjust_bboxes_to_image_border
-class FastSAMPredictor(DetectionPredictor):
+class FastSAMPredictor(SegmentationPredictor):
     """
     FastSAMPredictor is specialized for fast SAM (Segment Anything Model) segmentation prediction tasks in Ultralytics
     YOLO framework.
-    This class extends the DetectionPredictor, customizing the prediction pipeline specifically for fast SAM.
-    It adjusts post-processing steps to incorporate mask prediction and non-max suppression while optimizing
-    for single-class segmentation.
-    Attributes:
-        cfg (dict): Configuration parameters for prediction.
-        overrides (dict, optional): Optional parameter overrides for custom behavior.
-        _callbacks (dict, optional): Optional list of callback functions to be invoked during prediction.
+    This class extends the SegmentationPredictor, customizing the prediction pipeline specifically for fast SAM. It
+    adjusts post-processing steps to incorporate mask prediction and non-max suppression while optimizing for single-
+    class segmentation.
     """
     def __init__(self, cfg=DEFAULT_CFG, overrides=None, _callbacks=None):
+        """Initializes a FastSAMPredictor for fast SAM segmentation tasks in Ultralytics YOLO framework."""
+        super().__init__(cfg, overrides, _callbacks)
+        self.prompts = {}
+    def postprocess(self, preds, img, orig_imgs):
+        """Applies box postprocess for FastSAM predictions."""
+        bboxes = self.prompts.pop("bboxes", None)
+        points = self.prompts.pop("points", None)
+        labels = self.prompts.pop("labels", None)
+        texts = self.prompts.pop("texts", None)
+        results = super().postprocess(preds, img, orig_imgs)
+        for result in results:
+            full_box = torch.tensor(
+                [0, 0, result.orig_shape[1], result.orig_shape[0]], device=preds[0].device, dtype=torch.float32
+            )
+            boxes = adjust_bboxes_to_image_border(result.boxes.xyxy, result.orig_shape)
+            idx = torch.nonzero(box_iou(full_box[None], boxes) > 0.9).flatten()
+            if idx.numel() != 0:
+                result.boxes.xyxy[idx] = full_box
+        return self.prompt(results, bboxes=bboxes, points=points, labels=labels, texts=texts)
+    def prompt(self, results, bboxes=None, points=None, labels=None, texts=None):
         """
-        Initializes the FastSAMPredictor class, inheriting from DetectionPredictor and setting the task to 'segment'.
+        Internal function for image segmentation inference based on cues like bounding boxes, points, and masks.
+        Leverages SAM's specialized architecture for prompt-based, real-time segmentation.
         Args:
-            cfg (dict): Configuration parameters for prediction.
-            overrides (dict, optional): Optional parameter overrides for custom behavior.
-            _callbacks (dict, optional): Optional list of callback functions to be invoked during prediction.
+            results (Results | List[Results]): The original inference results from FastSAM models without any prompts.
+            bboxes (np.ndarray | List, optional): Bounding boxes with shape (N, 4), in XYXY format.
+            points (np.ndarray | List, optional): Points indicating object locations with shape (N, 2), in pixels.
+            labels (np.ndarray | List, optional): Labels for point prompts, shape (N, ). 1 = foreground, 0 = background.
+            texts (str | List[str], optional): Textual prompts, a list contains string objects.
+        Returns:
+            (List[Results]): The output results determined by prompts.
         """
-        super().__init__(cfg, overrides, _callbacks)
-        self.args.task = "segment"
+        if bboxes is None and points is None and texts is None:
+            return results
+        prompt_results = []
+        if not isinstance(results, list):
+            results = [results]
+        for result in results:
+            if len(result) == 0:
+                prompt_results.append(result)
+                continue
+            masks = result.masks.data
+            if masks.shape[1:] != result.orig_shape:
+                masks = scale_masks(masks[None], result.orig_shape)[0]
+            # bboxes prompt
+            idx = torch.zeros(len(result), dtype=torch.bool, device=self.device)
+            if bboxes is not None:
+                bboxes = torch.as_tensor(bboxes, dtype=torch.int32, device=self.device)
+                bboxes = bboxes[None] if bboxes.ndim == 1 else bboxes
+                bbox_areas = (bboxes[:, 3] - bboxes[:, 1]) * (bboxes[:, 2] - bboxes[:, 0])
+                mask_areas = torch.stack([masks[:, b[1] : b[3], b[0] : b[2]].sum(dim=(1, 2)) for b in bboxes])
+                full_mask_areas = torch.sum(masks, dim=(1, 2))
-    def postprocess(self, preds, img, orig_imgs):
+                union = bbox_areas[:, None] + full_mask_areas - mask_areas
+                idx[torch.argmax(mask_areas / union, dim=1)] = True
+            if points is not None:
+                points = torch.as_tensor(points, dtype=torch.int32, device=self.device)
+                points = points[None] if points.ndim == 1 else points
+                if labels is None:
+                    labels = torch.ones(points.shape[0])
+                labels = torch.as_tensor(labels, dtype=torch.int32, device=self.device)
+                assert len(labels) == len(points), (
+                    f"Excepted `labels` got same size as `point`, but got {len(labels)} and {len(points)}"
+                )
+                point_idx = (
+                    torch.ones(len(result), dtype=torch.bool, device=self.device)
+                    if labels.sum() == 0  # all negative points
+                    else torch.zeros(len(result), dtype=torch.bool, device=self.device)
+                )
+                for point, label in zip(points, labels):
+                    point_idx[torch.nonzero(masks[:, point[1], point[0]], as_tuple=True)[0]] = bool(label)
+                idx |= point_idx
+            if texts is not None:
+                if isinstance(texts, str):
+                    texts = [texts]
+                crop_ims, filter_idx = [], []
+                for i, b in enumerate(result.boxes.xyxy.tolist()):
+                    x1, y1, x2, y2 = (int(x) for x in b)
+                    if masks[i].sum() <= 100:
+                        filter_idx.append(i)
+                        continue
+                    crop_ims.append(Image.fromarray(result.orig_img[y1:y2, x1:x2, ::-1]))
+                similarity = self._clip_inference(crop_ims, texts)
+                text_idx = torch.argmax(similarity, dim=-1)  # (M, )
+                if len(filter_idx):
+                    text_idx += (torch.tensor(filter_idx, device=self.device)[None] <= int(text_idx)).sum(0)
+                idx[text_idx] = True
+            prompt_results.append(result[idx])
+        return prompt_results
+    def _clip_inference(self, images, texts):
         """
-        Perform post-processing steps on predictions, including non-max suppression and scaling boxes to original image
-        size, and returns the final results.
+        CLIP Inference process.
         Args:
-            preds (list): The raw output predictions from the model.
-            img (torch.Tensor): The processed image tensor.
-            orig_imgs (list | torch.Tensor): The original image or list of images.
+            images (List[PIL.Image]): A list of source images and each of them should be PIL.Image type with RGB channel order.
+            texts (List[str]): A list of prompt texts and each of them should be string object.
         Returns:
-            (list): A list of Results objects, each containing processed boxes, masks, and other metadata.
+            (torch.Tensor): The similarity between given images and texts.
         """
-        p = ops.non_max_suppression(
-            preds[0],
-            self.args.conf,
-            self.args.iou,
-            agnostic=self.args.agnostic_nms,
-            max_det=self.args.max_det,
-            nc=1,  # set to 1 class since SAM has no class predictions
-            classes=self.args.classes,
-        )
-        full_box = torch.zeros(p[0].shape[1], device=p[0].device)
-        full_box[2], full_box[3], full_box[4], full_box[6:] = img.shape[3], img.shape[2], 1.0, 1.0
-        full_box = full_box.view(1, -1)
-        critical_iou_index = bbox_iou(full_box[0][:4], p[0][:, :4], iou_thres=0.9, image_shape=img.shape[2:])
-        if critical_iou_index.numel() != 0:
-            full_box[0][4] = p[0][critical_iou_index][:, 4]
-            full_box[0][6:] = p[0][critical_iou_index][:, 6:]
-            p[0][critical_iou_index] = full_box
-        if not isinstance(orig_imgs, list):  # input images are a torch.Tensor, not a list
-            orig_imgs = ops.convert_torch2numpy_batch(orig_imgs)
-        results = []
-        proto = preds[1][-1] if len(preds[1]) == 3 else preds[1]  # second output is len 3 if pt, but only 1 if exported
-        for i, pred in enumerate(p):
-            orig_img = orig_imgs[i]
-            img_path = self.batch[0][i]
-            if not len(pred):  # save empty boxes
-                masks = None
-            elif self.args.retina_masks:
-                pred[:, :4] = ops.scale_boxes(img.shape[2:], pred[:, :4], orig_img.shape)
-                masks = ops.process_mask_native(proto[i], pred[:, 6:], pred[:, :4], orig_img.shape[:2])  # HWC
-            else:
-                masks = ops.process_mask(proto[i], pred[:, 6:], pred[:, :4], img.shape[2:], upsample=True)  # HWC
-                pred[:, :4] = ops.scale_boxes(img.shape[2:], pred[:, :4], orig_img.shape)
-            results.append(Results(orig_img, path=img_path, names=self.model.names, boxes=pred[:, :6], masks=masks))
-        return results
+        try:
+            import clip
+        except ImportError:
+            checks.check_requirements("git+https://github.com/ultralytics/CLIP.git")
+            import clip
+        if (not hasattr(self, "clip_model")) or (not hasattr(self, "clip_preprocess")):
+            self.clip_model, self.clip_preprocess = clip.load("ViT-B/32", device=self.device)
+        images = torch.stack([self.clip_preprocess(image).to(self.device) for image in images])
+        tokenized_text = clip.tokenize(texts).to(self.device)
+        image_features = self.clip_model.encode_image(images)
+        text_features = self.clip_model.encode_text(tokenized_text)
+        image_features /= image_features.norm(dim=-1, keepdim=True)  # (N, 512)
+        text_features /= text_features.norm(dim=-1, keepdim=True)  # (M, 512)
+        return (image_features * text_features[:, None]).sum(-1)  # (M, N)
+    def set_prompts(self, prompts):
+        """Set prompts in advance."""
+        self.prompts = prompts

ultralytics/models/fastsam/utils.py CHANGED Viewed

@@ -1,6 +1,4 @@
-# Ultralytics YOLO 🚀, AGPL-3.0 license
-import torch
+# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
 def adjust_bboxes_to_image_border(boxes, image_shape, threshold=20):
@@ -15,7 +13,6 @@ def adjust_bboxes_to_image_border(boxes, image_shape, threshold=20):
     Returns:
         adjusted_boxes (torch.Tensor): adjusted bounding boxes
     """
     # Image dimensions
     h, w = image_shape
@@ -25,43 +22,3 @@ def adjust_bboxes_to_image_border(boxes, image_shape, threshold=20):
     boxes[boxes[:, 2] > w - threshold, 2] = w  # x2
     boxes[boxes[:, 3] > h - threshold, 3] = h  # y2
     return boxes
-def bbox_iou(box1, boxes, iou_thres=0.9, image_shape=(640, 640), raw_output=False):
-    """
-    Compute the Intersection-Over-Union of a bounding box with respect to an array of other bounding boxes.
-    Args:
-        box1 (torch.Tensor): (4, )
-        boxes (torch.Tensor): (n, 4)
-        iou_thres (float): IoU threshold
-        image_shape (tuple): (height, width)
-        raw_output (bool): If True, return the raw IoU values instead of the indices
-    Returns:
-        high_iou_indices (torch.Tensor): Indices of boxes with IoU > thres
-    """
-    boxes = adjust_bboxes_to_image_border(boxes, image_shape)
-    # Obtain coordinates for intersections
-    x1 = torch.max(box1[0], boxes[:, 0])
-    y1 = torch.max(box1[1], boxes[:, 1])
-    x2 = torch.min(box1[2], boxes[:, 2])
-    y2 = torch.min(box1[3], boxes[:, 3])
-    # Compute the area of intersection
-    intersection = (x2 - x1).clamp(0) * (y2 - y1).clamp(0)
-    # Compute the area of both individual boxes
-    box1_area = (box1[2] - box1[0]) * (box1[3] - box1[1])
-    box2_area = (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1])
-    # Compute the area of union
-    union = box1_area + box2_area - intersection
-    # Compute the IoU
-    iou = intersection / union  # Should be shape (n, )
-    if raw_output:
-        return 0 if iou.numel() == 0 else iou
-    # return indices of boxes with IoU > thres
-    return torch.nonzero(iou > iou_thres).flatten()

ultralytics/models/fastsam/val.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# Ultralytics YOLO 🚀, AGPL-3.0 license
+# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
 from ultralytics.models.yolo.segment import SegmentationValidator
 from ultralytics.utils.metrics import SegmentMetrics

ultralytics/models/nas/__init__.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# Ultralytics YOLO 🚀, AGPL-3.0 license
+# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
 from .model import NAS
 from .predict import NASPredictor

ultralytics/models/nas/model.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# Ultralytics YOLO 🚀, AGPL-3.0 license
+# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
 """
 YOLO-NAS model interface.
@@ -6,8 +6,8 @@ Example:
     ```python
     from ultralytics import NAS
-    model = NAS('yolo_nas_s')
-    results = model.predict('ultralytics/assets/bus.jpg')
+    model = NAS("yolo_nas_s")
+    results = model.predict("ultralytics/assets/bus.jpg")
     ```
 """
@@ -16,7 +16,9 @@ from pathlib import Path
 import torch
 from ultralytics.engine.model import Model
-from ultralytics.utils.torch_utils import model_info, smart_inference_mode
+from ultralytics.utils.downloads import attempt_download_asset
+from ultralytics.utils.torch_utils import model_info
 from .predict import NASPredictor
 from .val import NASValidator
@@ -32,8 +34,8 @@ class NAS(Model):
         ```python
         from ultralytics import NAS
-        model = NAS('yolo_nas_s')
-        results = model.predict('ultralytics/assets/bus.jpg')
+        model = NAS("yolo_nas_s")
+        results = model.predict("ultralytics/assets/bus.jpg")
         ```
     Attributes:
@@ -45,19 +47,28 @@ class NAS(Model):
     def __init__(self, model="yolo_nas_s.pt") -> None:
         """Initializes the NAS model with the provided or default 'yolo_nas_s.pt' model."""
-        assert Path(model).suffix not in (".yaml", ".yml"), "YOLO-NAS models only support pre-trained models."
+        assert Path(model).suffix not in {".yaml", ".yml"}, "YOLO-NAS models only support pre-trained models."
         super().__init__(model, task="detect")
-    @smart_inference_mode()
-    def _load(self, weights: str, task: str):
+    def _load(self, weights: str, task=None) -> None:
         """Loads an existing NAS model weights or creates a new NAS model with pretrained weights if not provided."""
         import super_gradients
         suffix = Path(weights).suffix
         if suffix == ".pt":
-            self.model = torch.load(weights)
+            self.model = torch.load(attempt_download_asset(weights))
         elif suffix == "":
             self.model = super_gradients.training.models.get(weights, pretrained_weights="coco")
+        # Override the forward method to ignore additional arguments
+        def new_forward(x, *args, **kwargs):
+            """Ignore additional __call__ arguments."""
+            return self.model._original_forward(x)
+        self.model._original_forward = self.model.forward
+        self.model.forward = new_forward
         # Standardize model
         self.model.fuse = lambda verbose=True: self.model
         self.model.stride = torch.tensor([32])

ultralytics/models/nas/predict.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# Ultralytics YOLO 🚀, AGPL-3.0 license
+# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
 import torch
@@ -22,7 +22,7 @@ class NASPredictor(BasePredictor):
         ```python
         from ultralytics import NAS
-        model = NAS('yolo_nas_s')
+        model = NAS("yolo_nas_s")
         predictor = model.predictor
         # Assumes that raw_preds, img, orig_imgs are available
         results = predictor.postprocess(raw_preds, img, orig_imgs)
@@ -34,7 +34,6 @@ class NASPredictor(BasePredictor):
     def postprocess(self, preds_in, img, orig_imgs):
         """Postprocess predictions and returns a list of Results objects."""
         # Cat boxes and class scores
         boxes = ops.xyxy2xywh(preds_in[0][0])
         preds = torch.cat((boxes, preds_in[0][1]), -1).permute(0, 2, 1)
@@ -52,9 +51,7 @@ class NASPredictor(BasePredictor):
             orig_imgs = ops.convert_torch2numpy_batch(orig_imgs)
         results = []
-        for i, pred in enumerate(preds):
-            orig_img = orig_imgs[i]
+        for pred, orig_img, img_path in zip(preds, orig_imgs, self.batch[0]):
             pred[:, :4] = ops.scale_boxes(img.shape[2:], pred[:, :4], orig_img.shape)
-            img_path = self.batch[0][i]
             results.append(Results(orig_img, path=img_path, names=self.model.names, boxes=pred))
         return results

ultralytics/models/nas/val.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# Ultralytics YOLO 🚀, AGPL-3.0 license
+# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
 import torch
@@ -17,14 +17,14 @@ class NASValidator(DetectionValidator):
     ultimately producing the final detections.
     Attributes:
-        args (Namespace): Namespace containing various configurations for post-processing, such as confidence and IoU thresholds.
+        args (Namespace): Namespace containing various configurations for post-processing, such as confidence and IoU.
         lb (torch.Tensor): Optional tensor for multilabel NMS.
     Example:
         ```python
         from ultralytics import NAS
-        model = NAS('yolo_nas_s')
+        model = NAS("yolo_nas_s")
         validator = model.validator
         # Assumes that raw_preds are available
         final_preds = validator.postprocess(raw_preds)
@@ -44,7 +44,7 @@ class NASValidator(DetectionValidator):
             self.args.iou,
             labels=self.lb,
             multi_label=False,
-            agnostic=self.args.single_cls,
+            agnostic=self.args.single_cls or self.args.agnostic_nms,
             max_det=self.args.max_det,
             max_time_img=0.5,
         )

ultralytics/models/rtdetr/__init__.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# Ultralytics YOLO 🚀, AGPL-3.0 license
+# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
 from .model import RTDETR
 from .predict import RTDETRPredictor

ultralytics/models/rtdetr/model.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# Ultralytics YOLO 🚀, AGPL-3.0 license
+# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
 """
 Interface for Baidu's RT-DETR, a Vision Transformer-based real-time object detector. RT-DETR offers real-time
 performance and high accuracy, excelling in accelerated backends like CUDA with TensorRT. It features an efficient

ultralytics/models/rtdetr/predict.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# Ultralytics YOLO 🚀, AGPL-3.0 license
+# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
 import torch
@@ -21,7 +21,7 @@ class RTDETRPredictor(BasePredictor):
         from ultralytics.utils import ASSETS
         from ultralytics.models.rtdetr import RTDETRPredictor
-        args = dict(model='rtdetr-l.pt', source=ASSETS)
+        args = dict(model="rtdetr-l.pt", source=ASSETS)
         predictor = RTDETRPredictor(overrides=args)
         predictor.predict_cli()
         ```
@@ -56,18 +56,16 @@ class RTDETRPredictor(BasePredictor):
             orig_imgs = ops.convert_torch2numpy_batch(orig_imgs)
         results = []
-        for i, bbox in enumerate(bboxes):  # (300, 4)
+        for bbox, score, orig_img, img_path in zip(bboxes, scores, orig_imgs, self.batch[0]):  # (300, 4)
             bbox = ops.xywh2xyxy(bbox)
-            score, cls = scores[i].max(-1, keepdim=True)  # (300, 1)
-            idx = score.squeeze(-1) > self.args.conf  # (300, )
+            max_score, cls = score.max(-1, keepdim=True)  # (300, 1)
+            idx = max_score.squeeze(-1) > self.args.conf  # (300, )
             if self.args.classes is not None:
                 idx = (cls == torch.tensor(self.args.classes, device=cls.device)).any(1) & idx
-            pred = torch.cat([bbox, score, cls], dim=-1)[idx]  # filter
-            orig_img = orig_imgs[i]
+            pred = torch.cat([bbox, max_score, cls], dim=-1)[idx]  # filter
             oh, ow = orig_img.shape[:2]
             pred[..., [0, 2]] *= ow
             pred[..., [1, 3]] *= oh
-            img_path = self.batch[0][i]
             results.append(Results(orig_img, path=img_path, names=self.model.names, boxes=pred))
         return results

ultralytics/models/rtdetr/train.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# Ultralytics YOLO 🚀, AGPL-3.0 license
+# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
 from copy import copy
@@ -7,6 +7,7 @@ import torch
 from ultralytics.models.yolo.detect import DetectionTrainer
 from ultralytics.nn.tasks import RTDETRDetectionModel
 from ultralytics.utils import RANK, colorstr
 from .val import RTDETRDataset, RTDETRValidator
@@ -24,7 +25,7 @@ class RTDETRTrainer(DetectionTrainer):
         ```python
         from ultralytics.models.rtdetr.train import RTDETRTrainer
-        args = dict(model='rtdetr-l.yaml', data='coco8.yaml', imgsz=640, epochs=3)
+        args = dict(model="rtdetr-l.yaml", data="coco8.yaml", imgsz=640, epochs=3)
         trainer = RTDETRTrainer(overrides=args)
         trainer.train()
         ```
@@ -67,8 +68,11 @@ class RTDETRTrainer(DetectionTrainer):
             hyp=self.args,
             rect=False,
             cache=self.args.cache or None,
+            single_cls=self.args.single_cls or False,
             prefix=colorstr(f"{mode}: "),
+            classes=self.args.classes,
             data=self.data,
+            fraction=self.args.fraction if mode == "train" else 1.0,
         )
     def get_validator(self):

ultralytics/models/rtdetr/val.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# Ultralytics YOLO 🚀, AGPL-3.0 license
+# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
 import torch
@@ -62,7 +62,7 @@ class RTDETRValidator(DetectionValidator):
         ```python
         from ultralytics.models.rtdetr import RTDETRValidator
-        args = dict(model='rtdetr-l.pt', data='coco8.yaml')
+        args = dict(model="rtdetr-l.pt", data="coco8.yaml")
         validator = RTDETRValidator(args=args)
         validator()
         ```
@@ -125,7 +125,7 @@ class RTDETRValidator(DetectionValidator):
             bbox = ops.xywh2xyxy(bbox)  # target boxes
             bbox[..., [0, 2]] *= ori_shape[1]  # native-space pred
             bbox[..., [1, 3]] *= ori_shape[0]  # native-space pred
-        return dict(cls=cls, bbox=bbox, ori_shape=ori_shape, imgsz=imgsz, ratio_pad=ratio_pad)
+        return {"cls": cls, "bbox": bbox, "ori_shape": ori_shape, "imgsz": imgsz, "ratio_pad": ratio_pad}
     def _prepare_pred(self, pred, pbatch):
         """Prepares and returns a batch with transformed bounding boxes and class labels."""

ultralytics/models/sam/__init__.py CHANGED Viewed

@@ -1,6 +1,6 @@
-# Ultralytics YOLO 🚀, AGPL-3.0 license
+# Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
 from .model import SAM
-from .predict import Predictor
+from .predict import Predictor, SAM2Predictor, SAM2VideoPredictor
-__all__ = "SAM", "Predictor"  # tuple or list
+__all__ = "SAM", "Predictor", "SAM2Predictor", "SAM2VideoPredictor"  # tuple or list

ultralytics 8.1.29__py3-none-any.whl → 8.3.63__py3-none-any.whl

ultralytics 8.1.29py3-none-any.whl → 8.3.63py3-none-any.whl