PyPI - ultralytics - Versions diffs - 8.1.37__py3-none-any.whl → 8.1.39__py3-none-any.whl - Mend

ultralytics 8.1.37py3-none-any.whl → 8.1.39py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of ultralytics might be problematic. Click here for more details.

Files changed (39) hide show

ultralytics/__init__.py +1 -1
ultralytics/cfg/__init__.py +1 -2
ultralytics/cfg/datasets/lvis.yaml +1239 -0
ultralytics/cfg/default.yaml +2 -2
ultralytics/data/__init__.py +18 -2
ultralytics/data/augment.py +123 -2
ultralytics/data/base.py +2 -0
ultralytics/data/build.py +25 -3
ultralytics/data/converter.py +22 -4
ultralytics/data/dataset.py +143 -27
ultralytics/data/utils.py +25 -1
ultralytics/engine/exporter.py +1 -3
ultralytics/engine/model.py +4 -1
ultralytics/engine/trainer.py +48 -44
ultralytics/models/fastsam/prompt.py +1 -1
ultralytics/models/yolo/__init__.py +2 -2
ultralytics/models/yolo/detect/val.py +36 -17
ultralytics/models/yolo/model.py +1 -0
ultralytics/models/yolo/world/__init__.py +5 -0
ultralytics/models/yolo/world/train.py +91 -0
ultralytics/models/yolo/world/train_world.py +108 -0
ultralytics/nn/autobackend.py +1 -1
ultralytics/nn/modules/block.py +4 -2
ultralytics/nn/modules/head.py +9 -0
ultralytics/nn/tasks.py +29 -13
ultralytics/solutions/heatmap.py +84 -46
ultralytics/solutions/object_counter.py +79 -64
ultralytics/trackers/utils/gmc.py +1 -1
ultralytics/utils/callbacks/raytune.py +1 -1
ultralytics/utils/loss.py +1 -1
ultralytics/utils/plotting.py +35 -21
ultralytics/utils/torch_utils.py +14 -0
ultralytics/utils/tuner.py +2 -2
{ultralytics-8.1.37.dist-info → ultralytics-8.1.39.dist-info}/METADATA +1 -1
{ultralytics-8.1.37.dist-info → ultralytics-8.1.39.dist-info}/RECORD +39 -35
{ultralytics-8.1.37.dist-info → ultralytics-8.1.39.dist-info}/LICENSE +0 -0
{ultralytics-8.1.37.dist-info → ultralytics-8.1.39.dist-info}/WHEEL +0 -0
{ultralytics-8.1.37.dist-info → ultralytics-8.1.39.dist-info}/entry_points.txt +0 -0
{ultralytics-8.1.37.dist-info → ultralytics-8.1.39.dist-info}/top_level.txt +0 -0

ultralytics/cfg/default.yaml CHANGED Viewed

@@ -116,8 +116,8 @@ mosaic: 1.0 # (float) image mosaic (probability)
 mixup: 0.0 # (float) image mixup (probability)
 copy_paste: 0.0 # (float) segment copy-paste (probability)
 auto_augment: randaugment # (str) auto augmentation policy for classification (randaugment, autoaugment, augmix)
-erasing: 0.4 # (float) probability of random erasing during classification training (0-1)
-crop_fraction: 1.0 # (float) image crop fraction for classification evaluation/inference (0-1)
+erasing: 0.4 # (float) probability of random erasing during classification training (0-0.9), 0 means no erasing, must be less than 1.0.
+crop_fraction: 1.0 # (float) image crop fraction for classification (0.1-1), 1.0 means no crop, must be greater than 0.
 # Custom config.yaml ---------------------------------------------------------------------------------------------------
 cfg: # (str, optional) for overriding defaults.yaml

ultralytics/data/__init__.py CHANGED Viewed

@@ -1,15 +1,31 @@
 # Ultralytics YOLO 🚀, AGPL-3.0 license
 from .base import BaseDataset
-from .build import build_dataloader, build_yolo_dataset, load_inference_source
-from .dataset import ClassificationDataset, SemanticDataset, YOLODataset
+from .build import (
+    build_dataloader,
+    build_yolo_dataset,
+    build_grounding,
+    load_inference_source,
+)
+from .dataset import (
+    ClassificationDataset,
+    SemanticDataset,
+    YOLODataset,
+    YOLOMultiModalDataset,
+    GroundingDataset,
+    YOLOConcatDataset,
+)
 __all__ = (
     "BaseDataset",
     "ClassificationDataset",
     "SemanticDataset",
     "YOLODataset",
+    "YOLOMultiModalDataset",
+    "YOLOConcatDataset",
+    "GroundingDataset",
     "build_yolo_dataset",
+    "build_grounding",
     "build_dataloader",
     "load_inference_source",
 )

ultralytics/data/augment.py CHANGED Viewed

@@ -3,6 +3,7 @@
 import math
 import random
 from copy import deepcopy
+from typing import Tuple, Union
 import cv2
 import numpy as np
@@ -66,7 +67,7 @@ class Compose:
     def __init__(self, transforms):
         """Initializes the Compose object with a list of transforms."""
-        self.transforms = transforms
+        self.transforms = transforms if isinstance(transforms, list) else [transforms]
     def __call__(self, data):
         """Applies a series of transformations to input data."""
@@ -78,6 +79,29 @@ class Compose:
         """Appends a new transform to the existing list of transforms."""
         self.transforms.append(transform)
+    def insert(self, index, transform):
+        """Inserts a new transform to the existing list of transforms."""
+        self.transforms.insert(index, transform)
+    def __getitem__(self, index: Union[list, int]) -> "Compose":
+        """Retrieve a specific transform or a set of transforms using indexing."""
+        assert isinstance(index, (int, list)), f"The indices should be either list or int type but got {type(index)}"
+        index = [index] if isinstance(index, int) else index
+        return Compose([self.transforms[i] for i in index])
+    def __setitem__(self, index: Union[list, int], value: Union[list, int]) -> None:
+        """Retrieve a specific transform or a set of transforms using indexing."""
+        assert isinstance(index, (int, list)), f"The indices should be either list or int type but got {type(index)}"
+        if isinstance(index, list):
+            assert isinstance(
+                value, list
+            ), f"The indices should be the same type as values, but got {type(index)} and {type(value)}"
+        if isinstance(index, int):
+            index, value = [index], [value]
+        for i, v in zip(index, value):
+            assert i < len(self.transforms), f"list index {i} out of range {len(self.transforms)}."
+            self.transforms[i] = v
     def tolist(self):
         """Converts the list of transforms to a standard Python list."""
         return self.transforms
@@ -118,6 +142,8 @@ class BaseMixTransform:
                 mix_labels[i] = self.pre_transform(data)
         labels["mix_labels"] = mix_labels
+        # Update cls and texts
+        labels = self._update_label_text(labels)
         # Mosaic or MixUp
         labels = self._mix_transform(labels)
         labels.pop("mix_labels", None)
@@ -131,6 +157,22 @@ class BaseMixTransform:
         """Gets a list of shuffled indexes for mosaic augmentation."""
         raise NotImplementedError
+    def _update_label_text(self, labels):
+        """Update label text."""
+        if "texts" not in labels:
+            return labels
+        mix_texts = sum([labels["texts"]] + [x["texts"] for x in labels["mix_labels"]], [])
+        mix_texts = list({tuple(x) for x in mix_texts})
+        text2id = {text: i for i, text in enumerate(mix_texts)}
+        for label in [labels] + labels["mix_labels"]:
+            for i, l in enumerate(label["cls"].squeeze(-1).tolist()):
+                text = label["texts"][int(l)]
+                label["cls"][i] = text2id[tuple(text)]
+            label["texts"] = mix_texts
+        return labels
 class Mosaic(BaseMixTransform):
     """
@@ -320,6 +362,8 @@ class Mosaic(BaseMixTransform):
         final_labels["instances"].clip(imgsz, imgsz)
         good = final_labels["instances"].remove_zero_area_boxes()
         final_labels["cls"] = final_labels["cls"][good]
+        if "texts" in mosaic_labels[0]:
+            final_labels["texts"] = mosaic_labels[0]["texts"]
         return final_labels
@@ -970,6 +1014,83 @@ class Format:
         return masks, instances, cls
+class RandomLoadText:
+    """
+    Randomly sample positive texts and negative texts and update the class indices accordingly to the number of samples.
+    Attributes:
+        prompt_format (str): Format for prompt. Default is '{}'.
+        neg_samples (tuple[int]): A ranger to randomly sample negative texts, Default is (80, 80).
+        max_samples (int): The max number of different text samples in one image, Default is 80.
+        padding (bool): Whether to pad texts to max_samples. Default is False.
+        padding_value (str): The padding text. Default is "".
+    """
+    def __init__(
+        self,
+        prompt_format: str = "{}",
+        neg_samples: Tuple[int, int] = (80, 80),
+        max_samples: int = 80,
+        padding: bool = False,
+        padding_value: str = "",
+    ) -> None:
+        """Initializes the RandomLoadText class with given parameters."""
+        self.prompt_format = prompt_format
+        self.neg_samples = neg_samples
+        self.max_samples = max_samples
+        self.padding = padding
+        self.padding_value = padding_value
+    def __call__(self, labels: dict) -> dict:
+        """Return updated classes and texts."""
+        assert "texts" in labels, "No texts found in labels."
+        class_texts = labels["texts"]
+        num_classes = len(class_texts)
+        cls = np.asarray(labels.pop("cls"), dtype=int)
+        pos_labels = np.unique(cls).tolist()
+        if len(pos_labels) > self.max_samples:
+            pos_labels = set(random.sample(pos_labels, k=self.max_samples))
+        neg_samples = min(min(num_classes, self.max_samples) - len(pos_labels), random.randint(*self.neg_samples))
+        neg_labels = []
+        for i in range(num_classes):
+            if i not in pos_labels:
+                neg_labels.append(i)
+        neg_labels = random.sample(neg_labels, k=neg_samples)
+        sampled_labels = pos_labels + neg_labels
+        random.shuffle(sampled_labels)
+        label2ids = {label: i for i, label in enumerate(sampled_labels)}
+        valid_idx = np.zeros(len(labels["instances"]), dtype=bool)
+        new_cls = []
+        for i, label in enumerate(cls.squeeze(-1).tolist()):
+            if label not in label2ids:
+                continue
+            valid_idx[i] = True
+            new_cls.append([label2ids[label]])
+        labels["instances"] = labels["instances"][valid_idx]
+        labels["cls"] = np.array(new_cls)
+        # Randomly select one prompt when there's more than one prompts
+        texts = []
+        for label in sampled_labels:
+            prompts = class_texts[label]
+            assert len(prompts) > 0
+            prompt = self.prompt_format.format(prompts[random.randrange(len(prompts))])
+            texts.append(prompt)
+        if self.padding:
+            valid_labels = len(pos_labels) + len(neg_labels)
+            num_padding = self.max_samples - valid_labels
+            if num_padding > 0:
+                texts += [self.padding_value] * num_padding
+        labels["texts"] = texts
+        return labels
 def v8_transforms(dataset, imgsz, hyp, stretch=False):
     """Convert images to a size suitable for YOLOv8 training."""
     pre_transform = Compose(
@@ -1056,7 +1177,7 @@ def classify_transforms(
     return T.Compose(tfl)
-# Classification augmentations train ---------------------------------------------------------------------------------------
+# Classification training augmentations --------------------------------------------------------------------------------
 def classify_augmentations(
     size=224,
     mean=DEFAULT_MEAN,

ultralytics/data/base.py CHANGED Viewed

@@ -81,6 +81,8 @@ class BaseDataset(Dataset):
         if self.rect:
             assert self.batch_size is not None
             self.set_rectangle()
+        if isinstance(cache, str):
+            cache = cache.lower()
         # Buffer thread for mosaic images
         self.buffer = []  # buffer size = batch size

ultralytics/data/build.py CHANGED Viewed

@@ -22,7 +22,7 @@ from ultralytics.data.loaders import (
 from ultralytics.data.utils import IMG_FORMATS, VID_FORMATS
 from ultralytics.utils import RANK, colorstr
 from ultralytics.utils.checks import check_file
-from .dataset import YOLODataset
+from .dataset import YOLODataset, YOLOMultiModalDataset, GroundingDataset
 from .utils import PIN_MEMORY
@@ -82,9 +82,10 @@ def seed_worker(worker_id):  # noqa
     random.seed(worker_seed)
-def build_yolo_dataset(cfg, img_path, batch, data, mode="train", rect=False, stride=32):
+def build_yolo_dataset(cfg, img_path, batch, data, mode="train", rect=False, stride=32, multi_modal=False):
     """Build YOLO Dataset."""
-    return YOLODataset(
+    dataset = YOLOMultiModalDataset if multi_modal else YOLODataset
+    return dataset(
         img_path=img_path,
         imgsz=cfg.imgsz,
         batch_size=batch,
@@ -103,6 +104,27 @@ def build_yolo_dataset(cfg, img_path, batch, data, mode="train", rect=False, str
     )
+def build_grounding(cfg, img_path, json_file, batch, mode="train", rect=False, stride=32):
+    """Build YOLO Dataset."""
+    return GroundingDataset(
+        img_path=img_path,
+        json_file=json_file,
+        imgsz=cfg.imgsz,
+        batch_size=batch,
+        augment=mode == "train",  # augmentation
+        hyp=cfg,  # TODO: probably add a get_hyps_from_cfg function
+        rect=cfg.rect or rect,  # rectangular batches
+        cache=cfg.cache or None,
+        single_cls=cfg.single_cls or False,
+        stride=int(stride),
+        pad=0.0 if mode == "train" else 0.5,
+        prefix=colorstr(f"{mode}: "),
+        task=cfg.task,
+        classes=cfg.classes,
+        fraction=cfg.fraction if mode == "train" else 1.0,
+    )
 def build_dataloader(dataset, batch, workers, shuffle=True, rank=-1):
     """Return an InfiniteDataLoader or DataLoader for training or validation set."""
     batch = min(batch, len(dataset))

ultralytics/data/converter.py CHANGED Viewed

@@ -219,6 +219,7 @@ def convert_coco(
     use_segments=False,
     use_keypoints=False,
     cls91to80=True,
+    lvis=False,
 ):
     """
     Converts COCO dataset annotations to a YOLO annotation format  suitable for training YOLO models.
@@ -229,12 +230,14 @@ def convert_coco(
         use_segments (bool, optional): Whether to include segmentation masks in the output.
         use_keypoints (bool, optional): Whether to include keypoint annotations in the output.
         cls91to80 (bool, optional): Whether to map 91 COCO class IDs to the corresponding 80 COCO class IDs.
+        lvis (bool, optional): Whether to convert data in lvis dataset way.
     Example:
         ```python
         from ultralytics.data.converter import convert_coco
         convert_coco('../datasets/coco/annotations/', use_segments=True, use_keypoints=False, cls91to80=True)
+        convert_coco('../datasets/lvis/annotations/', use_segments=True, use_keypoints=False, cls91to80=False, lvis=True)
         ```
     Output:
@@ -251,8 +254,14 @@ def convert_coco(
     # Import json
     for json_file in sorted(Path(labels_dir).resolve().glob("*.json")):
-        fn = Path(save_dir) / "labels" / json_file.stem.replace("instances_", "")  # folder name
+        lname = "" if lvis else json_file.stem.replace("instances_", "")
+        fn = Path(save_dir) / "labels" / lname  # folder name
         fn.mkdir(parents=True, exist_ok=True)
+        if lvis:
+            # NOTE: create folders for both train and val in advance,
+            # since LVIS val set contains images from COCO 2017 train in addition to the COCO 2017 val split.
+            (fn / "train2017").mkdir(parents=True, exist_ok=True)
+            (fn / "val2017").mkdir(parents=True, exist_ok=True)
         with open(json_file) as f:
             data = json.load(f)
@@ -263,16 +272,20 @@ def convert_coco(
         for ann in data["annotations"]:
             imgToAnns[ann["image_id"]].append(ann)
+        image_txt = []
         # Write labels file
         for img_id, anns in TQDM(imgToAnns.items(), desc=f"Annotations {json_file}"):
             img = images[f"{img_id:d}"]
-            h, w, f = img["height"], img["width"], img["file_name"]
+            h, w = img["height"], img["width"]
+            f = str(Path(img["coco_url"]).relative_to("http://images.cocodataset.org")) if lvis else img["file_name"]
+            if lvis:
+                image_txt.append(str(Path("./images") / f))
             bboxes = []
             segments = []
             keypoints = []
             for ann in anns:
-                if ann["iscrowd"]:
+                if ann.get("iscrowd", False):
                     continue
                 # The COCO box format is [top left x, top left y, width, height]
                 box = np.array(ann["bbox"], dtype=np.float64)
@@ -314,7 +327,12 @@ def convert_coco(
                         )  # cls, box or segments
                     file.write(("%g " * len(line)).rstrip() % line + "\n")
-    LOGGER.info(f"COCO data converted successfully.\nResults saved to {save_dir.resolve()}")
+        if lvis:
+            with open((Path(save_dir) / json_file.name.replace("lvis_v1_", "").replace(".json", ".txt")), "a") as f:
+                for l in image_txt:
+                    f.write(f"{l}\n")
+    LOGGER.info(f"{'LVIS' if lvis else 'COCO'} data converted successfully.\nResults saved to {save_dir.resolve()}")
 def convert_dota_to_yolo_obb(dota_root_path: str):

ultralytics/data/dataset.py CHANGED Viewed

@@ -1,20 +1,41 @@
 # Ultralytics YOLO 🚀, AGPL-3.0 license
 import contextlib
 from itertools import repeat
+from collections import defaultdict
 from multiprocessing.pool import ThreadPool
 from pathlib import Path
 import cv2
+import json
 import numpy as np
 import torch
 import torchvision
 from PIL import Image
-from ultralytics.utils import LOCAL_RANK, NUM_THREADS, TQDM, colorstr, is_dir_writeable
+from torch.utils.data import ConcatDataset
+from ultralytics.utils import LOCAL_RANK, NUM_THREADS, TQDM, colorstr
 from ultralytics.utils.ops import resample_segments
-from .augment import Compose, Format, Instances, LetterBox, classify_augmentations, classify_transforms, v8_transforms
+from .augment import (
+    Compose,
+    Format,
+    Instances,
+    LetterBox,
+    RandomLoadText,
+    classify_augmentations,
+    classify_transforms,
+    v8_transforms,
+)
 from .base import BaseDataset
-from .utils import HELP_URL, LOGGER, get_hash, img2label_paths, verify_image, verify_image_label
+from .utils import (
+    HELP_URL,
+    LOGGER,
+    get_hash,
+    img2label_paths,
+    verify_image,
+    verify_image_label,
+    load_dataset_cache_file,
+    save_dataset_cache_file,
+)
 # Ultralytics dataset *.cache version, >= 1.0.0 for YOLOv8
 DATASET_CACHE_VERSION = "1.0.3"
@@ -105,7 +126,7 @@ class YOLODataset(BaseDataset):
         x["hash"] = get_hash(self.label_files + self.im_files)
         x["results"] = nf, nm, ne, nc, len(self.im_files)
         x["msgs"] = msgs  # warnings
-        save_dataset_cache_file(self.prefix, path, x)
+        save_dataset_cache_file(self.prefix, path, x, DATASET_CACHE_VERSION)
         return x
     def get_labels(self):
@@ -261,8 +282,8 @@ class ClassificationDataset(torchvision.datasets.ImageFolder):
         if augment and args.fraction < 1.0:  # reduce training fraction
             self.samples = self.samples[: round(len(self.samples) * args.fraction)]
         self.prefix = colorstr(f"{prefix}: ") if prefix else ""
-        self.cache_ram = args.cache is True or args.cache == "ram"  # cache images into RAM
-        self.cache_disk = args.cache == "disk"  # cache images on hard drive as uncompressed *.npy files
+        self.cache_ram = args.cache is True or str(args.cache).lower() == "ram"  # cache images into RAM
+        self.cache_disk = str(args.cache).lower() == "disk"  # cache images on hard drive as uncompressed *.npy files
         self.samples = self.verify_images()  # filter out bad images
         self.samples = [list(x) + [Path(x[0]).with_suffix(".npy"), None] for x in self.samples]  # file, index, npy, im
         scale = (1.0 - args.scale, 1.0)  # (0.08, 1.0)
@@ -285,8 +306,9 @@ class ClassificationDataset(torchvision.datasets.ImageFolder):
     def __getitem__(self, i):
         """Returns subset of data and targets corresponding to given indices."""
         f, j, fn, im = self.samples[i]  # filename, index, filename.with_suffix('.npy'), image
-        if self.cache_ram and im is None:
-            im = self.samples[i][3] = cv2.imread(f)
+        if self.cache_ram:
+            if im is None:  # Warning: two separate if statements required here, do not combine this with previous line
+                im = self.samples[i][3] = cv2.imread(f)
         elif self.cache_disk:
             if not fn.exists():  # load npy
                 np.save(fn.as_posix(), cv2.imread(f), allow_pickle=False)
@@ -338,31 +360,125 @@ class ClassificationDataset(torchvision.datasets.ImageFolder):
         x["hash"] = get_hash([x[0] for x in self.samples])
         x["results"] = nf, nc, len(samples), samples
         x["msgs"] = msgs  # warnings
-        save_dataset_cache_file(self.prefix, path, x)
+        save_dataset_cache_file(self.prefix, path, x, DATASET_CACHE_VERSION)
         return samples
-def load_dataset_cache_file(path):
-    """Load an Ultralytics *.cache dictionary from path."""
-    import gc
+class YOLOMultiModalDataset(YOLODataset):
+    """
+    Dataset class for loading object detection and/or segmentation labels in YOLO format.
+    Args:
+        data (dict, optional): A dataset YAML dictionary. Defaults to None.
+        task (str): An explicit arg to point current task, Defaults to 'detect'.
+    Returns:
+        (torch.utils.data.Dataset): A PyTorch dataset object that can be used for training an object detection model.
+    """
+    def __init__(self, *args, data=None, task="detect", **kwargs):
+        """Initializes a dataset object for object detection tasks with optional specifications."""
+        super().__init__(*args, data=data, task=task, **kwargs)
+    def update_labels_info(self, label):
+        """Add texts information for multi modal model training."""
+        labels = super().update_labels_info(label)
+        # NOTE: some categories are concatenated with its synonyms by `/`.
+        labels["texts"] = [v.split("/") for _, v in self.data["names"].items()]
+        return labels
+    def build_transforms(self, hyp=None):
+        """Enhances data transformations with optional text augmentation for multi-modal training."""
+        transforms = super().build_transforms(hyp)
+        if self.augment:
+            # NOTE: hard-coded the args for now.
+            transforms.insert(-1, RandomLoadText(max_samples=min(self.data["nc"], 80), padding=True))
+        return transforms
+class GroundingDataset(YOLODataset):
+    def __init__(self, *args, task="detect", json_file, **kwargs):
+        """Initializes a GroundingDataset for object detection, loading annotations from a specified JSON file."""
+        assert task == "detect", "`GroundingDataset` only support `detect` task for now!"
+        self.json_file = json_file
+        super().__init__(*args, task=task, data={}, **kwargs)
+    def get_img_files(self, img_path):
+        """The image files would be read in `get_labels` function, return empty list here."""
+        return []
+    def get_labels(self):
+        """Loads annotations from a JSON file, filters, and normalizes bounding boxes for each image."""
+        labels = []
+        LOGGER.info("Loading annotation file...")
+        with open(self.json_file, "r") as f:
+            annotations = json.load(f)
+        images = {f'{x["id"]:d}': x for x in annotations["images"]}
+        imgToAnns = defaultdict(list)
+        for ann in annotations["annotations"]:
+            imgToAnns[ann["image_id"]].append(ann)
+        for img_id, anns in TQDM(imgToAnns.items(), desc=f"Reading annotations {self.json_file}"):
+            img = images[f"{img_id:d}"]
+            h, w, f = img["height"], img["width"], img["file_name"]
+            im_file = Path(self.img_path) / f
+            if not im_file.exists():
+                continue
+            self.im_files.append(str(im_file))
+            bboxes = []
+            cat2id = {}
+            texts = []
+            for ann in anns:
+                if ann["iscrowd"]:
+                    continue
+                box = np.array(ann["bbox"], dtype=np.float32)
+                box[:2] += box[2:] / 2
+                box[[0, 2]] /= float(w)
+                box[[1, 3]] /= float(h)
+                if box[2] <= 0 or box[3] <= 0:
+                    continue
+                cat_name = " ".join([img["caption"][t[0] : t[1]] for t in ann["tokens_positive"]])
+                if cat_name not in cat2id:
+                    cat2id[cat_name] = len(cat2id)
+                    texts.append([cat_name])
+                cls = cat2id[cat_name]  # class
+                box = [cls] + box.tolist()
+                if box not in bboxes:
+                    bboxes.append(box)
+            lb = np.array(bboxes, dtype=np.float32) if len(bboxes) else np.zeros((0, 5), dtype=np.float32)
+            labels.append(
+                dict(
+                    im_file=im_file,
+                    shape=(h, w),
+                    cls=lb[:, 0:1],  # n, 1
+                    bboxes=lb[:, 1:],  # n, 4
+                    normalized=True,
+                    bbox_format="xywh",
+                    texts=texts,
+                )
+            )
+        return labels
+    def build_transforms(self, hyp=None):
+        """Configures augmentations for training with optional text loading; `hyp` adjusts augmentation intensity."""
+        transforms = super().build_transforms(hyp)
+        if self.augment:
+            # NOTE: hard-coded the args for now.
+            transforms.insert(-1, RandomLoadText(max_samples=80, padding=True))
+        return transforms
-    gc.disable()  # reduce pickle load time https://github.com/ultralytics/ultralytics/pull/1585
-    cache = np.load(str(path), allow_pickle=True).item()  # load dict
-    gc.enable()
-    return cache
+class YOLOConcatDataset(ConcatDataset):
+    """
+    Dataset as a concatenation of multiple datasets.
-def save_dataset_cache_file(prefix, path, x):
-    """Save an Ultralytics dataset *.cache dictionary x to path."""
-    x["version"] = DATASET_CACHE_VERSION  # add cache version
-    if is_dir_writeable(path.parent):
-        if path.exists():
-            path.unlink()  # remove *.cache file if exists
-        np.save(str(path), x)  # save cache for next time
-        path.with_suffix(".cache.npy").rename(path)  # remove .npy suffix
-        LOGGER.info(f"{prefix}New cache created: {path}")
-    else:
-        LOGGER.warning(f"{prefix}WARNING ⚠️ Cache directory {path.parent} is not writeable, cache not saved.")
+    This class is useful to assemble different existing datasets.
+    """
+    @staticmethod
+    def collate_fn(batch):
+        """Collates data samples into batches."""
+        return YOLODataset.collate_fn(batch)
 # TODO: support semantic segmentation

ultralytics/data/utils.py CHANGED Viewed

@@ -29,6 +29,7 @@ from ultralytics.utils import (
     emojis,
     yaml_load,
     yaml_save,
+    is_dir_writeable,
 )
 from ultralytics.utils.checks import check_file, check_font, is_ascii
 from ultralytics.utils.downloads import download, safe_download, unzip_file
@@ -303,7 +304,7 @@ def check_det_dataset(dataset, autodownload=True):
     # Set paths
     data["path"] = path  # download scripts
-    for k in "train", "val", "test":
+    for k in "train", "val", "test", "minival":
         if data.get(k):  # prepend path
             if isinstance(data[k], str):
                 x = (path / data[k]).resolve()
@@ -649,3 +650,26 @@ def autosplit(path=DATASETS_DIR / "coco8/images", weights=(0.9, 0.1, 0.0), annot
         if not annotated_only or Path(img2label_paths([str(img)])[0]).exists():  # check label
             with open(path.parent / txt[i], "a") as f:
                 f.write(f"./{img.relative_to(path.parent).as_posix()}" + "\n")  # add image to txt file
+def load_dataset_cache_file(path):
+    """Load an Ultralytics *.cache dictionary from path."""
+    import gc
+    gc.disable()  # reduce pickle load time https://github.com/ultralytics/ultralytics/pull/1585
+    cache = np.load(str(path), allow_pickle=True).item()  # load dict
+    gc.enable()
+    return cache
+def save_dataset_cache_file(prefix, path, x, version):
+    """Save an Ultralytics dataset *.cache dictionary x to path."""
+    x["version"] = version  # add cache version
+    if is_dir_writeable(path.parent):
+        if path.exists():
+            path.unlink()  # remove *.cache file if exists
+        np.save(str(path), x)  # save cache for next time
+        path.with_suffix(".cache.npy").rename(path)  # remove .npy suffix
+        LOGGER.info(f"{prefix}New cache created: {path}")
+    else:
+        LOGGER.warning(f"{prefix}WARNING ⚠️ Cache directory {path.parent} is not writeable, cache not saved.")

ultralytics/engine/exporter.py CHANGED Viewed

@@ -675,9 +675,7 @@ class Exporter:
         builder = trt.Builder(logger)
         config = builder.create_builder_config()
-        config.max_workspace_size = self.args.workspace * 1 << 30
-        # config.set_memory_pool_limit(trt.MemoryPoolType.WORKSPACE, workspace << 30)  # fix TRT 8.4 deprecation notice
+        config.max_workspace_size = int(self.args.workspace * (1 << 30))
         flag = 1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)
         network = builder.create_network(flag)
         parser = trt.OnnxParser(network, logger)

ultralytics/engine/model.py CHANGED Viewed

@@ -733,7 +733,10 @@ class Model(nn.Module):
         """
         from ultralytics.nn.autobackend import check_class_names
-        return check_class_names(self.model.names) if hasattr(self.model, "names") else None
+        if hasattr(self.model, "names"):
+            return check_class_names(self.model.names)
+        elif self.predictor:
+            return self.predictor.model.names
     @property
     def device(self) -> torch.device:

ultralytics 8.1.37__py3-none-any.whl → 8.1.39__py3-none-any.whl

Potentially problematic release.

ultralytics 8.1.37py3-none-any.whl → 8.1.39py3-none-any.whl