PyPI - ultralytics - Versions diffs - 8.3.98__py3-none-any.whl → 8.3.99__py3-none-any.whl - Mend

ultralytics 8.3.98py3-none-any.whl → 8.3.99py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (37) hide show

tests/test_python.py +56 -0
ultralytics/__init__.py +3 -2
ultralytics/cfg/models/11/yoloe-11-seg.yaml +48 -0
ultralytics/cfg/models/11/yoloe-11.yaml +48 -0
ultralytics/cfg/models/v8/yoloe-v8-seg.yaml +45 -0
ultralytics/cfg/models/v8/yoloe-v8.yaml +45 -0
ultralytics/data/augment.py +101 -5
ultralytics/data/dataset.py +165 -12
ultralytics/engine/exporter.py +4 -3
ultralytics/engine/trainer.py +16 -7
ultralytics/models/__init__.py +2 -2
ultralytics/models/yolo/__init__.py +3 -3
ultralytics/models/yolo/detect/val.py +6 -1
ultralytics/models/yolo/model.py +182 -3
ultralytics/models/yolo/segment/val.py +43 -16
ultralytics/models/yolo/yoloe/__init__.py +21 -0
ultralytics/models/yolo/yoloe/predict.py +170 -0
ultralytics/models/yolo/yoloe/train.py +355 -0
ultralytics/models/yolo/yoloe/train_seg.py +141 -0
ultralytics/models/yolo/yoloe/val.py +187 -0
ultralytics/nn/autobackend.py +3 -2
ultralytics/nn/modules/__init__.py +18 -1
ultralytics/nn/modules/block.py +17 -1
ultralytics/nn/modules/head.py +359 -22
ultralytics/nn/tasks.py +276 -10
ultralytics/nn/text_model.py +193 -0
ultralytics/utils/callbacks/comet.py +3 -6
ultralytics/utils/downloads.py +6 -2
ultralytics/utils/loss.py +67 -6
ultralytics/utils/plotting.py +1 -1
ultralytics/utils/tal.py +1 -1
{ultralytics-8.3.98.dist-info → ultralytics-8.3.99.dist-info}/METADATA +10 -10
{ultralytics-8.3.98.dist-info → ultralytics-8.3.99.dist-info}/RECORD +37 -27
{ultralytics-8.3.98.dist-info → ultralytics-8.3.99.dist-info}/WHEEL +0 -0
{ultralytics-8.3.98.dist-info → ultralytics-8.3.99.dist-info}/entry_points.txt +0 -0
{ultralytics-8.3.98.dist-info → ultralytics-8.3.99.dist-info}/licenses/LICENSE +0 -0
{ultralytics-8.3.98.dist-info → ultralytics-8.3.99.dist-info}/top_level.txt +0 -0

ultralytics/data/dataset.py CHANGED Viewed

@@ -13,7 +13,7 @@ from PIL import Image
 from torch.utils.data import ConcatDataset
 from ultralytics.utils import LOCAL_RANK, NUM_THREADS, TQDM, colorstr
-from ultralytics.utils.ops import resample_segments
+from ultralytics.utils.ops import resample_segments, segments2boxes
 from ultralytics.utils.torch_utils import TORCHVISION_0_18
 from .augment import (
@@ -27,6 +27,7 @@ from .augment import (
     v8_transforms,
 )
 from .base import BaseDataset
+from .converter import merge_multi_segment
 from .utils import (
     HELP_URL,
     LOGGER,
@@ -289,12 +290,15 @@ class YOLODataset(BaseDataset):
             (dict): Collated batch with stacked tensors.
         """
         new_batch = {}
+        batch = [dict(sorted(b.items())) for b in batch]  # make sure the keys are in the same order
         keys = batch[0].keys()
         values = list(zip(*[list(b.values()) for b in batch]))
         for i, k in enumerate(keys):
             value = values[i]
-            if k == "img":
+            if k == "img" or k == "text_feats":
                 value = torch.stack(value, 0)
+            elif k == "visuals":
+                value = torch.nn.utils.rnn.pad_sequence(value, batch_first=True)
             if k in {"masks", "keypoints", "bboxes", "cls", "segments", "obb"}:
                 value = torch.cat(value, 0)
             new_batch[k] = value
@@ -346,7 +350,9 @@ class YOLOMultiModalDataset(YOLODataset):
         """
         labels = super().update_labels_info(label)
         # NOTE: some categories are concatenated with its synonyms by `/`.
+        # NOTE: and `RandomLoadText` would randomly select one of them if there are multiple words.
         labels["texts"] = [v.split("/") for _, v in self.data["names"].items()]
         return labels
     def build_transforms(self, hyp=None):
@@ -362,9 +368,46 @@ class YOLOMultiModalDataset(YOLODataset):
         transforms = super().build_transforms(hyp)
         if self.augment:
             # NOTE: hard-coded the args for now.
-            transforms.insert(-1, RandomLoadText(max_samples=min(self.data["nc"], 80), padding=True))
+            # NOTE: this implementation is different from official yoloe,
+            # the strategy of selecting negative is restricted in one dataset,
+            # while official pre-saved neg embeddings from all datasets at once.
+            transform = RandomLoadText(
+                max_samples=min(self.data["nc"], 80),
+                padding=True,
+                padding_value=self._get_neg_texts(self.category_freq),
+            )
+            transforms.insert(-1, transform)
         return transforms
+    @property
+    def category_names(self):
+        """
+        Return category names for the dataset.
+        Returns:
+            (Tuple[str]): List of class names.
+        """
+        names = self.data["names"].values()
+        return {n.strip() for name in names for n in name.split("/")}  # category names
+    @property
+    def category_freq(self):
+        """Return frequency of each category in the dataset."""
+        texts = [v.split("/") for v in self.data["names"].values()]
+        category_freq = defaultdict(int)
+        for label in self.labels:
+            for c in label["cls"]:  # to check
+                text = texts[int(c)]
+                for t in text:
+                    t = t.strip()
+                    category_freq[t] += 1
+        return category_freq
+    @staticmethod
+    def _get_neg_texts(category_freq, threshold=100):
+        """Get negative text samples based on frequency threshold."""
+        return [k for k, v in category_freq.items() if v >= threshold]
 class GroundingDataset(YOLODataset):
     """
@@ -386,17 +429,17 @@ class GroundingDataset(YOLODataset):
         >>> len(dataset)  # Number of valid images with annotations
     """
-    def __init__(self, *args, task="detect", json_file, **kwargs):
+    def __init__(self, *args, task="detect", json_file="", **kwargs):
         """
         Initialize a GroundingDataset for object detection.
         Args:
             json_file (str): Path to the JSON file containing annotations.
-            task (str): Must be 'detect' for GroundingDataset.
+            task (str): Must be 'detect' or 'segment' for GroundingDataset.
             *args (Any): Additional positional arguments for the parent class.
             **kwargs (Any): Additional keyword arguments for the parent class.
         """
-        assert task == "detect", "`GroundingDataset` only support `detect` task for now!"
+        assert task in {"detect", "segment"}, "GroundingDataset currently only supports `detect` and `segment` tasks"
         self.json_file = json_file
         super().__init__(*args, task=task, data={}, **kwargs)
@@ -412,14 +455,31 @@ class GroundingDataset(YOLODataset):
         """
         return []
-    def get_labels(self):
+    def verify_labels(self, labels):
+        """Verify the number of instances in the dataset matches expected counts."""
+        instance_count = sum(label["bboxes"].shape[0] for label in labels)
+        if "final_mixed_train_no_coco_segm" in self.json_file:
+            assert instance_count == 3662344
+        elif "final_mixed_train_no_coco" in self.json_file:
+            assert instance_count == 3681235
+        elif "final_flickr_separateGT_train_segm" in self.json_file:
+            assert instance_count == 638214
+        elif "final_flickr_separateGT_train" in self.json_file:
+            assert instance_count == 640704
+        else:
+            assert False
+    def cache_labels(self, path=Path("./labels.cache")):
         """
         Loads annotations from a JSON file, filters, and normalizes bounding boxes for each image.
+        Args:
+            path (Path): Path where to save the cache file.
         Returns:
-            (List[dict]): List of label dictionaries, each containing information about an image and its annotations.
+            (dict): Dictionary containing cached labels and related information.
         """
-        labels = []
+        x = {"labels": []}
         LOGGER.info("Loading annotation file...")
         with open(self.json_file) as f:
             annotations = json.load(f)
@@ -435,6 +495,7 @@ class GroundingDataset(YOLODataset):
                 continue
             self.im_files.append(str(im_file))
             bboxes = []
+            segments = []
             cat2id = {}
             texts = []
             for ann in anns:
@@ -448,7 +509,10 @@ class GroundingDataset(YOLODataset):
                     continue
                 caption = img["caption"]
-                cat_name = " ".join([caption[t[0] : t[1]] for t in ann["tokens_positive"]])
+                cat_name = " ".join([caption[t[0] : t[1]] for t in ann["tokens_positive"]]).lower().strip()
+                if not cat_name:
+                    continue
                 if cat_name not in cat2id:
                     cat2id[cat_name] = len(cat2id)
                     texts.append([cat_name])
@@ -456,18 +520,66 @@ class GroundingDataset(YOLODataset):
                 box = [cls] + box.tolist()
                 if box not in bboxes:
                     bboxes.append(box)
+                    if ann.get("segmentation") is not None:
+                        if len(ann["segmentation"]) == 0:
+                            segments.append(box)
+                            continue
+                        elif len(ann["segmentation"]) > 1:
+                            s = merge_multi_segment(ann["segmentation"])
+                            s = (np.concatenate(s, axis=0) / np.array([w, h], dtype=np.float32)).reshape(-1).tolist()
+                        else:
+                            s = [j for i in ann["segmentation"] for j in i]  # all segments concatenated
+                            s = (
+                                (np.array(s, dtype=np.float32).reshape(-1, 2) / np.array([w, h], dtype=np.float32))
+                                .reshape(-1)
+                                .tolist()
+                            )
+                        s = [cls] + s
+                        segments.append(s)
             lb = np.array(bboxes, dtype=np.float32) if len(bboxes) else np.zeros((0, 5), dtype=np.float32)
-            labels.append(
+            if segments:
+                classes = np.array([x[0] for x in segments], dtype=np.float32)
+                segments = [np.array(x[1:], dtype=np.float32).reshape(-1, 2) for x in segments]  # (cls, xy1...)
+                lb = np.concatenate((classes.reshape(-1, 1), segments2boxes(segments)), 1)  # (cls, xywh)
+            lb = np.array(lb, dtype=np.float32)
+            x["labels"].append(
                 {
                     "im_file": im_file,
                     "shape": (h, w),
                     "cls": lb[:, 0:1],  # n, 1
                     "bboxes": lb[:, 1:],  # n, 4
+                    "segments": segments,
                     "normalized": True,
                     "bbox_format": "xywh",
                     "texts": texts,
                 }
             )
+        x["hash"] = get_hash(self.json_file)
+        save_dataset_cache_file(self.prefix, path, x, DATASET_CACHE_VERSION)
+        return x
+    def get_labels(self):
+        """
+        Load labels from cache or generate them from JSON file.
+        Returns:
+            (List[dict]): List of label dictionaries, each containing information about an image and its annotations.
+        """
+        cache_path = Path(self.json_file).with_suffix(".cache")
+        try:
+            cache, _ = load_dataset_cache_file(cache_path), True  # attempt to load a *.cache file
+            assert cache["version"] == DATASET_CACHE_VERSION  # matches current version
+            assert cache["hash"] == get_hash(self.json_file)  # identical hash
+        except (FileNotFoundError, AssertionError, AttributeError):
+            cache, _ = self.cache_labels(cache_path), False  # run cache ops
+        [cache.pop(k) for k in ("hash", "version")]  # remove items
+        labels = cache["labels"]
+        # self.verify_labels(labels)
+        self.im_files = [str(label["im_file"]) for label in labels]
+        if LOCAL_RANK in {-1, 0}:
+            LOGGER.info(f"Load {self.json_file} from cache file {cache_path}")
         return labels
     def build_transforms(self, hyp=None):
@@ -483,9 +595,38 @@ class GroundingDataset(YOLODataset):
         transforms = super().build_transforms(hyp)
         if self.augment:
             # NOTE: hard-coded the args for now.
-            transforms.insert(-1, RandomLoadText(max_samples=80, padding=True))
+            # NOTE: this implementation is different from official yoloe,
+            # the strategy of selecting negative is restricted in one dataset,
+            # while official pre-saved neg embeddings from all datasets at once.
+            transform = RandomLoadText(
+                max_samples=80,
+                padding=True,
+                padding_value=self._get_neg_texts(self.category_freq),
+            )
+            transforms.insert(-1, transform)
         return transforms
+    @property
+    def category_names(self):
+        """Return unique category names from the dataset."""
+        return {t.strip() for label in self.labels for text in label["texts"] for t in text}
+    @property
+    def category_freq(self):
+        """Return frequency of each category in the dataset."""
+        category_freq = defaultdict(int)
+        for label in self.labels:
+            for text in label["texts"]:
+                for t in text:
+                    t = t.strip()
+                    category_freq[t] += 1
+        return category_freq
+    @staticmethod
+    def _get_neg_texts(category_freq, threshold=100):
+        """Get negative text samples based on frequency threshold."""
+        return [k for k, v in category_freq.items() if v >= threshold]
 class YOLOConcatDataset(ConcatDataset):
     """
@@ -516,6 +657,18 @@ class YOLOConcatDataset(ConcatDataset):
         """
         return YOLODataset.collate_fn(batch)
+    def close_mosaic(self, hyp):
+        """
+        Sets mosaic, copy_paste and mixup options to 0.0 and builds transformations.
+        Args:
+            hyp (dict): Hyperparameters for transforms.
+        """
+        for dataset in self.datasets:
+            if not hasattr(dataset, "close_mosaic"):
+                continue
+            dataset.close_mosaic(hyp)
 # TODO: support semantic segmentation
 class SemanticDataset(BaseDataset):

ultralytics/engine/exporter.py CHANGED Viewed

@@ -327,6 +327,7 @@ class Exporter:
                 "See https://docs.ultralytics.com/models/yolo-world for details."
             )
             model.clip_model = None  # openvino int8 export error: https://github.com/ultralytics/ultralytics/pull/18445
         if self.args.int8 and not self.args.data:
             self.args.data = DEFAULT_CFG.data or TASK2DATA[getattr(model, "task", "detect")]  # assign default data
             LOGGER.warning(
@@ -635,7 +636,7 @@ class Exporter:
             # Generate calibration data for integer quantization
             ignored_scope = None
             if isinstance(self.model.model[-1], Detect):
-                # Includes all Detect subclasses like Segment, Pose, OBB, WorldDetect
+                # Includes all Detect subclasses like Segment, Pose, OBB, WorldDetect, YOLOEDetect
                 head_module_name = ".".join(list(self.model.named_modules())[-1][0].split(".")[:2])
                 ignored_scope = nncf.IgnoredScope(  # ignore operations
                     patterns=[
@@ -797,12 +798,12 @@ class Exporter:
                 LOGGER.warning(f"{prefix} WARNING ⚠️ 'nms=True' is only available for Detect models like 'yolo11n.pt'.")
                 # TODO CoreML Segment and Pose model pipelining
             model = self.model
         ts = torch.jit.trace(model.eval(), self.im, strict=False)  # TorchScript model
         ct_model = ct.convert(
             ts,
-            inputs=[ct.ImageType("image", shape=self.im.shape, scale=scale, bias=bias)],
+            inputs=[ct.ImageType("image", shape=self.im.shape, scale=scale, bias=bias)],  # expects ct.TensorType
             classifier_config=classifier_config,
+            minimum_deployment_target=ct.target.iOS16,
             convert_to="neuralnetwork" if mlmodel else "mlprogram",
         )
         bits, mode = (8, "kmeans") if self.args.int8 else (16, "linear") if self.args.half else (32, None)

ultralytics/engine/trainer.py CHANGED Viewed

@@ -249,6 +249,7 @@ class BaseTrainer:
         )
         always_freeze_names = [".dfl"]  # always freeze these layers
         freeze_layer_names = [f"model.{x}." for x in freeze_list] + always_freeze_names
+        self.freeze_layer_names = freeze_layer_names
         for k, v in self.model.named_parameters():
             # v.register_hook(lambda x: torch.nan_to_num(x))  # NaN to 0 (commented for erratic training results)
             if any(x in k for x in freeze_layer_names):
@@ -350,7 +351,7 @@ class BaseTrainer:
                 warnings.simplefilter("ignore")  # suppress 'Detected lr_scheduler.step() before optimizer.step()'
                 self.scheduler.step()
-            self.model.train()
+            self._model_train()
             if RANK != -1:
                 self.train_loader.sampler.set_epoch(epoch)
             pbar = enumerate(self.train_loader)
@@ -381,7 +382,8 @@ class BaseTrainer:
                 # Forward
                 with autocast(self.amp):
                     batch = self.preprocess_batch(batch)
-                    self.loss, self.loss_items = self.model(batch)
+                    loss, self.loss_items = self.model(batch)
+                    self.loss = loss.sum()
                     if RANK != -1:
                         self.loss *= world_size
                     self.tloss = (
@@ -496,9 +498,7 @@ class BaseTrainer:
             memory = torch.mps.driver_allocated_memory()
             if fraction:
                 return __import__("psutil").virtual_memory().percent / 100
-        elif self.device.type == "cpu":
-            pass
-        else:
+        elif self.device.type != "cpu":
             memory = torch.cuda.memory_reserved()
             if fraction:
                 total = torch.cuda.get_device_properties(self.device).total_memory
@@ -520,6 +520,14 @@ class BaseTrainer:
         return pd.read_csv(self.csv).to_dict(orient="list")
+    def _model_train(self):
+        """Set model in training mode."""
+        self.model.train()
+        # Freeze BN stat
+        for n, m in self.model.named_modules():
+            if any(filter(lambda f: f in n, self.freeze_layer_names)) and isinstance(m, nn.BatchNorm2d):
+                m.eval()
     def save_model(self):
         """Save model training checkpoints with additional metadata."""
         import io
@@ -720,7 +728,7 @@ class BaseTrainer:
                 # Check that resume data YAML exists, otherwise strip to force re-download of dataset
                 ckpt_args = attempt_load_weights(last).args
-                if not Path(ckpt_args["data"]).exists():
+                if not isinstance(ckpt_args["data"], dict) and not Path(ckpt_args["data"]).exists():
                     ckpt_args["data"] = self.args.data
                 resume = True
@@ -812,7 +820,8 @@ class BaseTrainer:
                 fullname = f"{module_name}.{param_name}" if module_name else param_name
                 if "bias" in fullname:  # bias (no decay)
                     g[2].append(param)
-                elif isinstance(module, bn):  # weight (no decay)
+                elif isinstance(module, bn) or "logit_scale" in fullname:  # weight (no decay)
+                    # ContrastiveHead and BNContrastiveHead included here with 'logit_scale'
                     g[1].append(param)
                 else:  # weight (with decay)
                     g[0].append(param)

ultralytics/models/__init__.py CHANGED Viewed

@@ -4,6 +4,6 @@ from .fastsam import FastSAM
 from .nas import NAS
 from .rtdetr import RTDETR
 from .sam import SAM
-from .yolo import YOLO, YOLOWorld
+from .yolo import YOLO, YOLOE, YOLOWorld
-__all__ = "YOLO", "RTDETR", "SAM", "FastSAM", "NAS", "YOLOWorld"  # allow simpler import
+__all__ = "YOLO", "RTDETR", "SAM", "FastSAM", "NAS", "YOLOWorld", "YOLOE"  # allow simpler import

ultralytics/models/yolo/__init__.py CHANGED Viewed

@@ -1,7 +1,7 @@
 # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
-from ultralytics.models.yolo import classify, detect, obb, pose, segment, world
+from ultralytics.models.yolo import classify, detect, obb, pose, segment, world, yoloe
-from .model import YOLO, YOLOWorld
+from .model import YOLO, YOLOE, YOLOWorld
-__all__ = "classify", "segment", "detect", "pose", "obb", "world", "YOLO", "YOLOWorld"
+__all__ = "classify", "segment", "detect", "pose", "obb", "world", "yoloe", "YOLO", "YOLOWorld", "YOLOE"

ultralytics/models/yolo/detect/val.py CHANGED Viewed

@@ -455,8 +455,13 @@ class DetectionValidator(BaseValidator):
                     val.print_results()  # explicitly call print_results
                 # update mAP50-95 and mAP50
                 stats[self.metrics.keys[-1]], stats[self.metrics.keys[-2]] = (
-                    val.stats[:2] if self.is_coco else [val.results["AP50"], val.results["AP"]]
+                    val.stats[:2] if self.is_coco else [val.results["AP"], val.results["AP50"]]
                 )
+                if self.is_lvis:
+                    stats["metrics/APr(B)"] = val.results["APr"]
+                    stats["metrics/APc(B)"] = val.results["APc"]
+                    stats["metrics/APf(B)"] = val.results["APf"]
+                    stats["fitness"] = val.results["AP"]
             except Exception as e:
                 LOGGER.warning(f"{pkg} unable to run: {e}")
         return stats

ultralytics/models/yolo/model.py CHANGED Viewed

@@ -4,7 +4,16 @@ from pathlib import Path
 from ultralytics.engine.model import Model
 from ultralytics.models import yolo
-from ultralytics.nn.tasks import ClassificationModel, DetectionModel, OBBModel, PoseModel, SegmentationModel, WorldModel
+from ultralytics.nn.tasks import (
+    ClassificationModel,
+    DetectionModel,
+    OBBModel,
+    PoseModel,
+    SegmentationModel,
+    WorldModel,
+    YOLOEModel,
+    YOLOESegModel,
+)
 from ultralytics.utils import ROOT, yaml_load
@@ -12,12 +21,16 @@ class YOLO(Model):
     """YOLO (You Only Look Once) object detection model."""
     def __init__(self, model="yolo11n.pt", task=None, verbose=False):
-        """Initialize YOLO model, switching to YOLOWorld if model filename contains '-world'."""
+        """Initialize YOLO model, switching to YOLOWorld/YOLOE if model filename contains '-world'/'yoloe'."""
         path = Path(model)
         if "-world" in path.stem and path.suffix in {".pt", ".yaml", ".yml"}:  # if YOLOWorld PyTorch model
             new_instance = YOLOWorld(path, verbose=verbose)
             self.__class__ = type(new_instance)
             self.__dict__ = new_instance.__dict__
+        elif "yoloe" in path.stem and path.suffix in {".pt", ".yaml", ".yml"}:  # if YOLOE PyTorch model
+            new_instance = YOLOE(path, task=task, verbose=verbose)
+            self.__class__ = type(new_instance)
+            self.__dict__ = new_instance.__dict__
         else:
             # Continue with default YOLO initialization
             super().__init__(model=model, task=task, verbose=verbose)
@@ -96,7 +109,7 @@ class YOLOWorld(Model):
         Set the model's class names for detection.
         Args:
-            classes (List(str)): A list of categories i.e. ["person"].
+            classes (list[str]): A list of categories i.e. ["person"].
         """
         self.model.set_classes(classes)
         # Remove background if it's given
@@ -108,3 +121,169 @@ class YOLOWorld(Model):
         # Reset method class names
         if self.predictor:
             self.predictor.model.names = classes
+class YOLOE(Model):
+    """YOLOE object detection and segmentation model."""
+    def __init__(self, model="yoloe-v8s-seg.pt", task=None, verbose=False) -> None:
+        """
+        Initialize YOLOE model with a pre-trained model file.
+        Args:
+            model (str | Path): Path to the pre-trained model file. Supports *.pt and *.yaml formats.
+            task (str, optional): Task type for the model. Auto-detected if None.
+            verbose (bool): If True, prints additional information during initialization.
+        """
+        super().__init__(model=model, task=task, verbose=verbose)
+        # Assign default COCO class names when there are no custom names
+        if not hasattr(self.model, "names"):
+            self.model.names = yaml_load(ROOT / "cfg/datasets/coco8.yaml").get("names")
+    @property
+    def task_map(self):
+        """Map head to model, validator, and predictor classes."""
+        return {
+            "detect": {
+                "model": YOLOEModel,
+                "validator": yolo.yoloe.YOLOEDetectValidator,
+                "predictor": yolo.detect.DetectionPredictor,
+                "trainer": yolo.yoloe.YOLOETrainer,
+            },
+            "segment": {
+                "model": YOLOESegModel,
+                "validator": yolo.yoloe.YOLOESegValidator,
+                "predictor": yolo.segment.SegmentationPredictor,
+                "trainer": yolo.yoloe.YOLOESegTrainer,
+            },
+        }
+    def get_text_pe(self, texts):
+        """Get text positional embeddings for the given texts."""
+        assert isinstance(self.model, YOLOEModel)
+        return self.model.get_text_pe(texts)
+    def get_visual_pe(self, img, visual):
+        """Get visual positional embeddings for the given image and visual features."""
+        assert isinstance(self.model, YOLOEModel)
+        return self.model.get_visual_pe(img, visual)
+    def set_vocab(self, vocab, names):
+        """Set vocabulary and class names for the model."""
+        assert isinstance(self.model, YOLOEModel)
+        self.model.set_vocab(vocab, names=names)
+    def get_vocab(self, names):
+        """Get vocabulary for the given class names."""
+        assert isinstance(self.model, YOLOEModel)
+        return self.model.get_vocab(names)
+    def set_classes(self, classes, embeddings):
+        """
+        Set the model's class names and embeddings for detection.
+        Args:
+            classes (list[str]): A list of categories i.e. ["person"].
+            embeddings (torch.Tensor): Embeddings corresponding to the classes.
+        """
+        assert isinstance(self.model, YOLOEModel)
+        self.model.set_classes(classes, embeddings)
+        # Verify no background class is present
+        assert " " not in classes
+        self.model.names = classes
+        # Reset method class names
+        if self.predictor:
+            self.predictor.model.names = classes
+    def val(
+        self,
+        validator=None,
+        load_vp=False,
+        refer_data=None,
+        **kwargs,
+    ):
+        """
+        Validate the model using text or visual prompts.
+        Args:
+            validator (callable, optional): A callable validator function. If None, a default validator is loaded.
+            load_vp (bool): Whether to load visual prompts. If False, text prompts are used.
+            refer_data (str, optional): Path to the reference data for visual prompts.
+            **kwargs (Any): Additional keyword arguments to override default settings.
+        Returns:
+            (dict): Validation statistics containing metrics computed during validation.
+        """
+        custom = {"rect": not load_vp}  # method defaults
+        args = {**self.overrides, **custom, **kwargs, "mode": "val"}  # highest priority args on the right
+        validator = (validator or self._smart_load("validator"))(args=args, _callbacks=self.callbacks)
+        validator(model=self.model, load_vp=load_vp, refer_data=refer_data)
+        self.metrics = validator.metrics
+        return validator.metrics
+    def predict(
+        self,
+        source=None,
+        stream: bool = False,
+        visual_prompts: dict = {},
+        refer_image=None,
+        predictor=None,
+        **kwargs,
+    ):
+        """
+        Run prediction on images, videos, directories, streams, etc.
+        Args:
+            source (str | int | PIL.Image | np.ndarray, optional): Source for prediction. Accepts image paths,
+                directory paths, URL/YouTube streams, PIL images, numpy arrays, or webcam indices.
+            stream (bool): Whether to stream the prediction results. If True, results are yielded as a
+                generator as they are computed.
+            visual_prompts (dict): Dictionary containing visual prompts for the model. Must include 'bboxes' and
+                'cls' keys when non-empty.
+            refer_image (str | PIL.Image | np.ndarray, optional): Reference image for visual prompts.
+            predictor (callable, optional): Custom predictor function. If None, a predictor is automatically
+                loaded based on the task.
+            **kwargs (Any): Additional keyword arguments passed to the predictor.
+        Returns:
+            (List | generator): List of Results objects or generator of Results objects if stream=True.
+        Examples:
+            >>> model = YOLOE("yoloe-v8s-seg.pt")
+            >>> results = model.predict("path/to/image.jpg")
+            >>> # With visual prompts
+            >>> prompts = {"bboxes": [[10, 20, 100, 200]], "cls": ["person"]}
+            >>> results = model.predict("path/to/image.jpg", visual_prompts=prompts)
+        """
+        if len(visual_prompts):
+            assert "bboxes" in visual_prompts and "cls" in visual_prompts, (
+                f"Expected 'bboxes' and 'cls' in visual prompts, but got {visual_prompts.keys()}"
+            )
+            assert len(visual_prompts["bboxes"]) == len(visual_prompts["cls"]), (
+                f"Expected equal number of bounding boxes and classes, but got {len(visual_prompts['bboxes'])} and "
+                f"{len(visual_prompts['cls'])} respectively"
+            )
+        self.predictor = (predictor or self._smart_load("predictor"))(
+            overrides={"task": "segment", "mode": "predict", "save": False, "verbose": False}, _callbacks=self.callbacks
+        )
+        if len(visual_prompts):
+            num_cls = (
+                max(len(set(c)) for c in visual_prompts["cls"])
+                if isinstance(source, list)  # means multiple images
+                else len(set(visual_prompts["cls"]))
+            )
+            self.model.model[-1].nc = num_cls
+            self.model.names = [f"object{i}" for i in range(num_cls)]
+            self.predictor.set_prompts(visual_prompts)
+        self.predictor.setup_model(model=self.model)
+        if refer_image is not None and len(visual_prompts):
+            vpe = self.predictor.get_vpe(refer_image)
+            self.model.set_classes(self.model.names, vpe)
+            self.predictor = None  # reset predictor
+        return super().predict(source, stream, **kwargs)

ultralytics 8.3.98__py3-none-any.whl → 8.3.99__py3-none-any.whl

ultralytics 8.3.98py3-none-any.whl → 8.3.99py3-none-any.whl