PyPI - ultralytics - Versions diffs - 8.1.42__py3-none-any.whl → 8.1.44__py3-none-any.whl - Mend

ultralytics 8.1.42py3-none-any.whl → 8.1.44py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of ultralytics might be problematic. Click here for more details.

Files changed (58) hide show

ultralytics/__init__.py +3 -2
ultralytics/cfg/models/v9/yolov9c-seg.yaml +1 -1
ultralytics/cfg/models/v9/yolov9c.yaml +1 -1
ultralytics/cfg/models/v9/yolov9e-seg.yaml +2 -3
ultralytics/cfg/models/v9/yolov9e.yaml +2 -3
ultralytics/data/__init__.py +3 -8
ultralytics/data/augment.py +14 -11
ultralytics/data/base.py +1 -1
ultralytics/data/build.py +1 -1
ultralytics/data/converter.py +4 -3
ultralytics/data/dataset.py +149 -144
ultralytics/data/explorer/explorer.py +10 -11
ultralytics/data/explorer/gui/dash.py +3 -3
ultralytics/data/explorer/utils.py +3 -2
ultralytics/data/loaders.py +3 -3
ultralytics/data/utils.py +1 -1
ultralytics/engine/exporter.py +3 -2
ultralytics/engine/model.py +2 -1
ultralytics/engine/trainer.py +2 -1
ultralytics/hub/auth.py +3 -3
ultralytics/hub/session.py +3 -3
ultralytics/hub/utils.py +6 -6
ultralytics/models/fastsam/prompt.py +4 -1
ultralytics/models/rtdetr/val.py +1 -1
ultralytics/models/sam/modules/tiny_encoder.py +2 -2
ultralytics/models/sam/modules/transformer.py +1 -1
ultralytics/models/sam/predict.py +16 -13
ultralytics/models/yolo/classify/train.py +2 -1
ultralytics/models/yolo/detect/val.py +1 -1
ultralytics/models/yolo/model.py +1 -1
ultralytics/models/yolo/obb/val.py +1 -1
ultralytics/models/yolo/world/train_world.py +2 -2
ultralytics/nn/modules/__init__.py +8 -8
ultralytics/nn/modules/head.py +1 -1
ultralytics/nn/tasks.py +7 -7
ultralytics/solutions/heatmap.py +14 -27
ultralytics/solutions/object_counter.py +12 -22
ultralytics/trackers/byte_tracker.py +1 -1
ultralytics/trackers/utils/kalman_filter.py +4 -4
ultralytics/trackers/utils/matching.py +1 -1
ultralytics/utils/__init__.py +56 -41
ultralytics/utils/benchmarks.py +1 -2
ultralytics/utils/callbacks/clearml.py +4 -3
ultralytics/utils/callbacks/hub.py +1 -4
ultralytics/utils/callbacks/mlflow.py +1 -1
ultralytics/utils/callbacks/tensorboard.py +1 -0
ultralytics/utils/callbacks/wb.py +5 -5
ultralytics/utils/checks.py +17 -20
ultralytics/utils/metrics.py +3 -3
ultralytics/utils/ops.py +1 -1
ultralytics/utils/plotting.py +67 -40
ultralytics/utils/torch_utils.py +13 -6
{ultralytics-8.1.42.dist-info → ultralytics-8.1.44.dist-info}/METADATA +1 -1
{ultralytics-8.1.42.dist-info → ultralytics-8.1.44.dist-info}/RECORD +58 -58
{ultralytics-8.1.42.dist-info → ultralytics-8.1.44.dist-info}/LICENSE +0 -0
{ultralytics-8.1.42.dist-info → ultralytics-8.1.44.dist-info}/WHEEL +0 -0
{ultralytics-8.1.42.dist-info → ultralytics-8.1.44.dist-info}/entry_points.txt +0 -0
{ultralytics-8.1.42.dist-info → ultralytics-8.1.44.dist-info}/top_level.txt +0 -0

ultralytics/__init__.py CHANGED Viewed

@@ -1,15 +1,16 @@
 # Ultralytics YOLO 🚀, AGPL-3.0 license
-__version__ = "8.1.42"
+__version__ = "8.1.44"
 from ultralytics.data.explorer.explorer import Explorer
 from ultralytics.models import RTDETR, SAM, YOLO, YOLOWorld
 from ultralytics.models.fastsam import FastSAM
 from ultralytics.models.nas import NAS
-from ultralytics.utils import ASSETS, SETTINGS as settings
+from ultralytics.utils import ASSETS, SETTINGS
 from ultralytics.utils.checks import check_yolo as checks
 from ultralytics.utils.downloads import download
+settings = SETTINGS
 __all__ = (
     "__version__",
     "ASSETS",

ultralytics/cfg/models/v9/yolov9c-seg.yaml CHANGED Viewed

@@ -35,4 +35,4 @@ head:
   - [[-1, 9], 1, Concat, [1]]  # cat head P5
   - [-1, 1, RepNCSPELAN4, [512, 512, 256, 1]]  # 21 (P5/32-large)
-  - [[15, 18, 21], 1, Segment, [nc, 32, 256]] # Segment(P3, P4, P5)
+  - [[15, 18, 21], 1, Segment, [nc, 32, 256]]  # Segment(P3, P4, P5)

ultralytics/cfg/models/v9/yolov9c.yaml CHANGED Viewed

@@ -35,4 +35,4 @@ head:
   - [[-1, 9], 1, Concat, [1]]  # cat head P5
   - [-1, 1, RepNCSPELAN4, [512, 512, 256, 1]]  # 21 (P5/32-large)
-  - [[15, 18, 21], 1, Detect, [nc]]  # DDetect(P3, P4, P5)
+  - [[15, 18, 21], 1, Detect, [nc]]  # Detect(P3, P4, P5)

ultralytics/cfg/models/v9/yolov9e-seg.yaml CHANGED Viewed

@@ -17,13 +17,13 @@ backbone:
   - [-1, 1, RepNCSPELAN4, [1024, 512, 256, 2]]  # 7
   - [-1, 1, ADown, [1024]]  # 8-P5/32
   - [-1, 1, RepNCSPELAN4, [1024, 512, 256, 2]]  # 9
   - [1, 1, CBLinear, [[64]]] # 10
   - [3, 1, CBLinear, [[64, 128]]] # 11
   - [5, 1, CBLinear, [[64, 128, 256]]] # 12
   - [7, 1, CBLinear, [[64, 128, 256, 512]]] # 13
   - [9, 1, CBLinear, [[64, 128, 256, 512, 1024]]] # 14
   - [0, 1, Conv, [64, 3, 2]]  # 15-P1/2
   - [[10, 11, 12, 13, 14, -1], 1, CBFuse, [[0, 0, 0, 0, 0]]] # 16
   - [-1, 1, Conv, [128, 3, 2]]  # 17-P2/4
@@ -58,5 +58,4 @@ head:
   - [[-1, 29], 1, Concat, [1]]  # cat head P5
   - [-1, 1, RepNCSPELAN4, [512, 1024, 512, 2]]  # 41 (P5/32-large)
-   # segment
   - [[35, 38, 41], 1, Segment, [nc, 32, 256]]  # Segment (P3, P4, P5)

ultralytics/cfg/models/v9/yolov9e.yaml CHANGED Viewed

@@ -17,13 +17,13 @@ backbone:
   - [-1, 1, RepNCSPELAN4, [1024, 512, 256, 2]]  # 7
   - [-1, 1, ADown, [1024]]  # 8-P5/32
   - [-1, 1, RepNCSPELAN4, [1024, 512, 256, 2]]  # 9
   - [1, 1, CBLinear, [[64]]] # 10
   - [3, 1, CBLinear, [[64, 128]]] # 11
   - [5, 1, CBLinear, [[64, 128, 256]]] # 12
   - [7, 1, CBLinear, [[64, 128, 256, 512]]] # 13
   - [9, 1, CBLinear, [[64, 128, 256, 512, 1024]]] # 14
   - [0, 1, Conv, [64, 3, 2]]  # 15-P1/2
   - [[10, 11, 12, 13, 14, -1], 1, CBFuse, [[0, 0, 0, 0, 0]]] # 16
   - [-1, 1, Conv, [128, 3, 2]]  # 17-P2/4
@@ -58,5 +58,4 @@ head:
   - [[-1, 29], 1, Concat, [1]]  # cat head P5
   - [-1, 1, RepNCSPELAN4, [512, 1024, 512, 2]]  # 41 (P5/32-large)
-   # detect
   - [[35, 38, 41], 1, Detect, [nc]]  # Detect(P3, P4, P5)

ultralytics/data/__init__.py CHANGED Viewed

@@ -1,19 +1,14 @@
 # Ultralytics YOLO 🚀, AGPL-3.0 license
 from .base import BaseDataset
-from .build import (
-    build_dataloader,
-    build_yolo_dataset,
-    build_grounding,
-    load_inference_source,
-)
+from .build import build_dataloader, build_grounding, build_yolo_dataset, load_inference_source
 from .dataset import (
     ClassificationDataset,
+    GroundingDataset,
     SemanticDataset,
+    YOLOConcatDataset,
     YOLODataset,
     YOLOMultiModalDataset,
-    GroundingDataset,
-    YOLOConcatDataset,
 )
 __all__ = (

ultralytics/data/augment.py CHANGED Viewed

@@ -8,7 +8,7 @@ from typing import Tuple, Union
 import cv2
 import numpy as np
 import torch
-import torchvision.transforms as T
+from PIL import Image
 from ultralytics.utils import LOGGER, colorstr
 from ultralytics.utils.checks import check_version
@@ -20,7 +20,7 @@ from .utils import polygons2masks, polygons2masks_overlap
 DEFAULT_MEAN = (0.0, 0.0, 0.0)
 DEFAULT_STD = (1.0, 1.0, 1.0)
-DEFAULT_CROP_FTACTION = 1.0
+DEFAULT_CROP_FRACTION = 1.0
 # TODO: we might need a BaseTransform to make all these augments be compatible with both classification and semantic
@@ -167,8 +167,8 @@ class BaseMixTransform:
         text2id = {text: i for i, text in enumerate(mix_texts)}
         for label in [labels] + labels["mix_labels"]:
-            for i, l in enumerate(label["cls"].squeeze(-1).tolist()):
-                text = label["texts"][int(l)]
+            for i, cls in enumerate(label["cls"].squeeze(-1).tolist()):
+                text = label["texts"][int(cls)]
                 label["cls"][i] = text2id[tuple(text)]
             label["texts"] = mix_texts
         return labels
@@ -1133,8 +1133,8 @@ def classify_transforms(
     size=224,
     mean=DEFAULT_MEAN,
     std=DEFAULT_STD,
-    interpolation: T.InterpolationMode = T.InterpolationMode.BILINEAR,
-    crop_fraction: float = DEFAULT_CROP_FTACTION,
+    interpolation=Image.BILINEAR,
+    crop_fraction: float = DEFAULT_CROP_FRACTION,
 ):
     """
     Classification transforms for evaluation/inference. Inspired by timm/data/transforms_factory.py.
@@ -1149,6 +1149,7 @@ def classify_transforms(
     Returns:
         (T.Compose): torchvision transforms
     """
+    import torchvision.transforms as T  # scope for faster 'import ultralytics'
     if isinstance(size, (tuple, list)):
         assert len(size) == 2
@@ -1157,12 +1158,12 @@ def classify_transforms(
         scale_size = math.floor(size / crop_fraction)
         scale_size = (scale_size, scale_size)
-    # aspect ratio is preserved, crops center within image, no borders are added, image is lost
+    # Aspect ratio is preserved, crops center within image, no borders are added, image is lost
     if scale_size[0] == scale_size[1]:
-        # simple case, use torchvision built-in Resize w/ shortest edge mode (scalar size arg)
+        # Simple case, use torchvision built-in Resize with the shortest edge mode (scalar size arg)
         tfl = [T.Resize(scale_size[0], interpolation=interpolation)]
     else:
-        # resize shortest edge to matching target dim for non-square target
+        # Resize the shortest edge to matching target dim for non-square target
         tfl = [T.Resize(scale_size)]
     tfl += [T.CenterCrop(size)]
@@ -1192,7 +1193,7 @@ def classify_augmentations(
     hsv_v=0.4,  # image HSV-Value augmentation (fraction)
     force_color_jitter=False,
     erasing=0.0,
-    interpolation: T.InterpolationMode = T.InterpolationMode.BILINEAR,
+    interpolation=Image.BILINEAR,
 ):
     """
     Classification transforms with augmentation for training. Inspired by timm/data/transforms_factory.py.
@@ -1216,7 +1217,9 @@ def classify_augmentations(
     Returns:
         (T.Compose): torchvision transforms
     """
-    # Transforms to apply if albumentations not installed
+    # Transforms to apply if Albumentations not installed
+    import torchvision.transforms as T  # scope for faster 'import ultralytics'
     if not isinstance(size, int):
         raise TypeError(f"classify_transforms() size {size} must be integer, not (list, tuple)")
     scale = tuple(scale or (0.08, 1.0))  # default imagenet scale range

ultralytics/data/base.py CHANGED Viewed

@@ -15,7 +15,7 @@ import psutil
 from torch.utils.data import Dataset
 from ultralytics.utils import DEFAULT_CFG, LOCAL_RANK, LOGGER, NUM_THREADS, TQDM
-from .utils import HELP_URL, FORMATS_HELP_MSG, IMG_FORMATS
+from .utils import FORMATS_HELP_MSG, HELP_URL, IMG_FORMATS
 class BaseDataset(Dataset):

ultralytics/data/build.py CHANGED Viewed

@@ -22,7 +22,7 @@ from ultralytics.data.loaders import (
 from ultralytics.data.utils import IMG_FORMATS, VID_FORMATS
 from ultralytics.utils import RANK, colorstr
 from ultralytics.utils.checks import check_file
-from .dataset import YOLODataset, YOLOMultiModalDataset, GroundingDataset
+from .dataset import GroundingDataset, YOLODataset, YOLOMultiModalDataset
 from .utils import PIN_MEMORY

ultralytics/data/converter.py CHANGED Viewed

@@ -519,11 +519,12 @@ def yolo_bbox2segment(im_dir, save_dir=None, sam_model="sam_b.pt"):
                 ├─ ..
                 └─ NNN.txt
     """
+    from tqdm import tqdm
+    from ultralytics import SAM
     from ultralytics.data import YOLODataset
-    from ultralytics.utils.ops import xywh2xyxy
     from ultralytics.utils import LOGGER
-    from ultralytics import SAM
-    from tqdm import tqdm
+    from ultralytics.utils.ops import xywh2xyxy
     # NOTE: add placeholder to pass class index check
     dataset = YOLODataset(im_dir, data=dict(names=list(range(1000))))

ultralytics/data/dataset.py CHANGED Viewed

@@ -1,18 +1,17 @@
 # Ultralytics YOLO 🚀, AGPL-3.0 license
 import contextlib
-from itertools import repeat
+import json
 from collections import defaultdict
+from itertools import repeat
 from multiprocessing.pool import ThreadPool
 from pathlib import Path
 import cv2
-import json
 import numpy as np
 import torch
-import torchvision
 from PIL import Image
 from torch.utils.data import ConcatDataset
 from ultralytics.utils import LOCAL_RANK, NUM_THREADS, TQDM, colorstr
 from ultralytics.utils.ops import resample_segments
 from .augment import (
@@ -31,10 +30,10 @@ from .utils import (
     LOGGER,
     get_hash,
     img2label_paths,
-    verify_image,
-    verify_image_label,
     load_dataset_cache_file,
     save_dataset_cache_file,
+    verify_image,
+    verify_image_label,
 )
 # Ultralytics dataset *.cache version, >= 1.0.0 for YOLOv8
@@ -103,16 +102,16 @@ class YOLODataset(BaseDataset):
                 nc += nc_f
                 if im_file:
                     x["labels"].append(
-                        dict(
-                            im_file=im_file,
-                            shape=shape,
-                            cls=lb[:, 0:1],  # n, 1
-                            bboxes=lb[:, 1:],  # n, 4
-                            segments=segments,
-                            keypoints=keypoint,
-                            normalized=True,
-                            bbox_format="xywh",
-                        )
+                        {
+                            "im_file": im_file,
+                            "shape": shape,
+                            "cls": lb[:, 0:1],  # n, 1
+                            "bboxes": lb[:, 1:],  # n, 4
+                            "segments": segments,
+                            "keypoints": keypoint,
+                            "normalized": True,
+                            "bbox_format": "xywh",
+                        }
                     )
                 if msg:
                     msgs.append(msg)
@@ -245,125 +244,6 @@ class YOLODataset(BaseDataset):
         return new_batch
-# Classification dataloaders -------------------------------------------------------------------------------------------
-class ClassificationDataset(torchvision.datasets.ImageFolder):
-    """
-    Extends torchvision ImageFolder to support YOLO classification tasks, offering functionalities like image
-    augmentation, caching, and verification. It's designed to efficiently handle large datasets for training deep
-    learning models, with optional image transformations and caching mechanisms to speed up training.
-    This class allows for augmentations using both torchvision and Albumentations libraries, and supports caching images
-    in RAM or on disk to reduce IO overhead during training. Additionally, it implements a robust verification process
-    to ensure data integrity and consistency.
-    Attributes:
-        cache_ram (bool): Indicates if caching in RAM is enabled.
-        cache_disk (bool): Indicates if caching on disk is enabled.
-        samples (list): A list of tuples, each containing the path to an image, its class index, path to its .npy cache
-                        file (if caching on disk), and optionally the loaded image array (if caching in RAM).
-        torch_transforms (callable): PyTorch transforms to be applied to the images.
-    """
-    def __init__(self, root, args, augment=False, prefix=""):
-        """
-        Initialize YOLO object with root, image size, augmentations, and cache settings.
-        Args:
-            root (str): Path to the dataset directory where images are stored in a class-specific folder structure.
-            args (Namespace): Configuration containing dataset-related settings such as image size, augmentation
-                parameters, and cache settings. It includes attributes like `imgsz` (image size), `fraction` (fraction
-                of data to use), `scale`, `fliplr`, `flipud`, `cache` (disk or RAM caching for faster training),
-                `auto_augment`, `hsv_h`, `hsv_s`, `hsv_v`, and `crop_fraction`.
-            augment (bool, optional): Whether to apply augmentations to the dataset. Default is False.
-            prefix (str, optional): Prefix for logging and cache filenames, aiding in dataset identification and
-                debugging. Default is an empty string.
-        """
-        super().__init__(root=root)
-        if augment and args.fraction < 1.0:  # reduce training fraction
-            self.samples = self.samples[: round(len(self.samples) * args.fraction)]
-        self.prefix = colorstr(f"{prefix}: ") if prefix else ""
-        self.cache_ram = args.cache is True or str(args.cache).lower() == "ram"  # cache images into RAM
-        self.cache_disk = str(args.cache).lower() == "disk"  # cache images on hard drive as uncompressed *.npy files
-        self.samples = self.verify_images()  # filter out bad images
-        self.samples = [list(x) + [Path(x[0]).with_suffix(".npy"), None] for x in self.samples]  # file, index, npy, im
-        scale = (1.0 - args.scale, 1.0)  # (0.08, 1.0)
-        self.torch_transforms = (
-            classify_augmentations(
-                size=args.imgsz,
-                scale=scale,
-                hflip=args.fliplr,
-                vflip=args.flipud,
-                erasing=args.erasing,
-                auto_augment=args.auto_augment,
-                hsv_h=args.hsv_h,
-                hsv_s=args.hsv_s,
-                hsv_v=args.hsv_v,
-            )
-            if augment
-            else classify_transforms(size=args.imgsz, crop_fraction=args.crop_fraction)
-        )
-    def __getitem__(self, i):
-        """Returns subset of data and targets corresponding to given indices."""
-        f, j, fn, im = self.samples[i]  # filename, index, filename.with_suffix('.npy'), image
-        if self.cache_ram:
-            if im is None:  # Warning: two separate if statements required here, do not combine this with previous line
-                im = self.samples[i][3] = cv2.imread(f)
-        elif self.cache_disk:
-            if not fn.exists():  # load npy
-                np.save(fn.as_posix(), cv2.imread(f), allow_pickle=False)
-            im = np.load(fn)
-        else:  # read image
-            im = cv2.imread(f)  # BGR
-        # Convert NumPy array to PIL image
-        im = Image.fromarray(cv2.cvtColor(im, cv2.COLOR_BGR2RGB))
-        sample = self.torch_transforms(im)
-        return {"img": sample, "cls": j}
-    def __len__(self) -> int:
-        """Return the total number of samples in the dataset."""
-        return len(self.samples)
-    def verify_images(self):
-        """Verify all images in dataset."""
-        desc = f"{self.prefix}Scanning {self.root}..."
-        path = Path(self.root).with_suffix(".cache")  # *.cache file path
-        with contextlib.suppress(FileNotFoundError, AssertionError, AttributeError):
-            cache = load_dataset_cache_file(path)  # attempt to load a *.cache file
-            assert cache["version"] == DATASET_CACHE_VERSION  # matches current version
-            assert cache["hash"] == get_hash([x[0] for x in self.samples])  # identical hash
-            nf, nc, n, samples = cache.pop("results")  # found, missing, empty, corrupt, total
-            if LOCAL_RANK in {-1, 0}:
-                d = f"{desc} {nf} images, {nc} corrupt"
-                TQDM(None, desc=d, total=n, initial=n)
-                if cache["msgs"]:
-                    LOGGER.info("\n".join(cache["msgs"]))  # display warnings
-            return samples
-        # Run scan if *.cache retrieval failed
-        nf, nc, msgs, samples, x = 0, 0, [], [], {}
-        with ThreadPool(NUM_THREADS) as pool:
-            results = pool.imap(func=verify_image, iterable=zip(self.samples, repeat(self.prefix)))
-            pbar = TQDM(results, desc=desc, total=len(self.samples))
-            for sample, nf_f, nc_f, msg in pbar:
-                if nf_f:
-                    samples.append(sample)
-                if msg:
-                    msgs.append(msg)
-                nf += nf_f
-                nc += nc_f
-                pbar.desc = f"{desc} {nf} images, {nc} corrupt"
-            pbar.close()
-        if msgs:
-            LOGGER.info("\n".join(msgs))
-        x["hash"] = get_hash([x[0] for x in self.samples])
-        x["results"] = nf, nc, len(samples), samples
-        x["msgs"] = msgs  # warnings
-        save_dataset_cache_file(self.prefix, path, x, DATASET_CACHE_VERSION)
-        return samples
 class YOLOMultiModalDataset(YOLODataset):
     """
     Dataset class for loading object detection and/or segmentation labels in YOLO format.
@@ -447,15 +327,15 @@ class GroundingDataset(YOLODataset):
                     bboxes.append(box)
             lb = np.array(bboxes, dtype=np.float32) if len(bboxes) else np.zeros((0, 5), dtype=np.float32)
             labels.append(
-                dict(
-                    im_file=im_file,
-                    shape=(h, w),
-                    cls=lb[:, 0:1],  # n, 1
-                    bboxes=lb[:, 1:],  # n, 4
-                    normalized=True,
-                    bbox_format="xywh",
-                    texts=texts,
-                )
+                {
+                    "im_file": im_file,
+                    "shape": (h, w),
+                    "cls": lb[:, 0:1],  # n, 1
+                    "bboxes": lb[:, 1:],  # n, 4
+                    "normalized": True,
+                    "bbox_format": "xywh",
+                    "texts": texts,
+                }
             )
         return labels
@@ -497,3 +377,128 @@ class SemanticDataset(BaseDataset):
     def __init__(self):
         """Initialize a SemanticDataset object."""
         super().__init__()
+class ClassificationDataset:
+    """
+    Extends torchvision ImageFolder to support YOLO classification tasks, offering functionalities like image
+    augmentation, caching, and verification. It's designed to efficiently handle large datasets for training deep
+    learning models, with optional image transformations and caching mechanisms to speed up training.
+    This class allows for augmentations using both torchvision and Albumentations libraries, and supports caching images
+    in RAM or on disk to reduce IO overhead during training. Additionally, it implements a robust verification process
+    to ensure data integrity and consistency.
+    Attributes:
+        cache_ram (bool): Indicates if caching in RAM is enabled.
+        cache_disk (bool): Indicates if caching on disk is enabled.
+        samples (list): A list of tuples, each containing the path to an image, its class index, path to its .npy cache
+                        file (if caching on disk), and optionally the loaded image array (if caching in RAM).
+        torch_transforms (callable): PyTorch transforms to be applied to the images.
+    """
+    def __init__(self, root, args, augment=False, prefix=""):
+        """
+        Initialize YOLO object with root, image size, augmentations, and cache settings.
+        Args:
+            root (str): Path to the dataset directory where images are stored in a class-specific folder structure.
+            args (Namespace): Configuration containing dataset-related settings such as image size, augmentation
+                parameters, and cache settings. It includes attributes like `imgsz` (image size), `fraction` (fraction
+                of data to use), `scale`, `fliplr`, `flipud`, `cache` (disk or RAM caching for faster training),
+                `auto_augment`, `hsv_h`, `hsv_s`, `hsv_v`, and `crop_fraction`.
+            augment (bool, optional): Whether to apply augmentations to the dataset. Default is False.
+            prefix (str, optional): Prefix for logging and cache filenames, aiding in dataset identification and
+                debugging. Default is an empty string.
+        """
+        import torchvision  # scope for faster 'import ultralytics'
+        # Base class assigned as attribute rather than used as base class to allow for scoping slow torchvision import
+        self.base = torchvision.datasets.ImageFolder(root=root)
+        self.samples = self.base.samples
+        self.root = self.base.root
+        # Initialize attributes
+        if augment and args.fraction < 1.0:  # reduce training fraction
+            self.samples = self.samples[: round(len(self.samples) * args.fraction)]
+        self.prefix = colorstr(f"{prefix}: ") if prefix else ""
+        self.cache_ram = args.cache is True or str(args.cache).lower() == "ram"  # cache images into RAM
+        self.cache_disk = str(args.cache).lower() == "disk"  # cache images on hard drive as uncompressed *.npy files
+        self.samples = self.verify_images()  # filter out bad images
+        self.samples = [list(x) + [Path(x[0]).with_suffix(".npy"), None] for x in self.samples]  # file, index, npy, im
+        scale = (1.0 - args.scale, 1.0)  # (0.08, 1.0)
+        self.torch_transforms = (
+            classify_augmentations(
+                size=args.imgsz,
+                scale=scale,
+                hflip=args.fliplr,
+                vflip=args.flipud,
+                erasing=args.erasing,
+                auto_augment=args.auto_augment,
+                hsv_h=args.hsv_h,
+                hsv_s=args.hsv_s,
+                hsv_v=args.hsv_v,
+            )
+            if augment
+            else classify_transforms(size=args.imgsz, crop_fraction=args.crop_fraction)
+        )
+    def __getitem__(self, i):
+        """Returns subset of data and targets corresponding to given indices."""
+        f, j, fn, im = self.samples[i]  # filename, index, filename.with_suffix('.npy'), image
+        if self.cache_ram:
+            if im is None:  # Warning: two separate if statements required here, do not combine this with previous line
+                im = self.samples[i][3] = cv2.imread(f)
+        elif self.cache_disk:
+            if not fn.exists():  # load npy
+                np.save(fn.as_posix(), cv2.imread(f), allow_pickle=False)
+            im = np.load(fn)
+        else:  # read image
+            im = cv2.imread(f)  # BGR
+        # Convert NumPy array to PIL image
+        im = Image.fromarray(cv2.cvtColor(im, cv2.COLOR_BGR2RGB))
+        sample = self.torch_transforms(im)
+        return {"img": sample, "cls": j}
+    def __len__(self) -> int:
+        """Return the total number of samples in the dataset."""
+        return len(self.samples)
+    def verify_images(self):
+        """Verify all images in dataset."""
+        desc = f"{self.prefix}Scanning {self.root}..."
+        path = Path(self.root).with_suffix(".cache")  # *.cache file path
+        with contextlib.suppress(FileNotFoundError, AssertionError, AttributeError):
+            cache = load_dataset_cache_file(path)  # attempt to load a *.cache file
+            assert cache["version"] == DATASET_CACHE_VERSION  # matches current version
+            assert cache["hash"] == get_hash([x[0] for x in self.samples])  # identical hash
+            nf, nc, n, samples = cache.pop("results")  # found, missing, empty, corrupt, total
+            if LOCAL_RANK in {-1, 0}:
+                d = f"{desc} {nf} images, {nc} corrupt"
+                TQDM(None, desc=d, total=n, initial=n)
+                if cache["msgs"]:
+                    LOGGER.info("\n".join(cache["msgs"]))  # display warnings
+            return samples
+        # Run scan if *.cache retrieval failed
+        nf, nc, msgs, samples, x = 0, 0, [], [], {}
+        with ThreadPool(NUM_THREADS) as pool:
+            results = pool.imap(func=verify_image, iterable=zip(self.samples, repeat(self.prefix)))
+            pbar = TQDM(results, desc=desc, total=len(self.samples))
+            for sample, nf_f, nc_f, msg in pbar:
+                if nf_f:
+                    samples.append(sample)
+                if msg:
+                    msgs.append(msg)
+                nf += nf_f
+                nc += nc_f
+                pbar.desc = f"{desc} {nf} images, {nc} corrupt"
+            pbar.close()
+        if msgs:
+            LOGGER.info("\n".join(msgs))
+        x["hash"] = get_hash([x[0] for x in self.samples])
+        x["results"] = nf, nc, len(samples), samples
+        x["msgs"] = msgs  # warnings
+        save_dataset_cache_file(self.prefix, path, x, DATASET_CACHE_VERSION)
+        return samples

ultralytics/data/explorer/explorer.py CHANGED Viewed

@@ -9,14 +9,13 @@ import numpy as np
 import torch
 from PIL import Image
 from matplotlib import pyplot as plt
-from pandas import DataFrame
 from tqdm import tqdm
 from ultralytics.data.augment import Format
 from ultralytics.data.dataset import YOLODataset
 from ultralytics.data.utils import check_det_dataset
 from ultralytics.models.yolo.model import YOLO
-from ultralytics.utils import LOGGER, IterableSimpleNamespace, checks, USER_CONFIG_DIR
+from ultralytics.utils import LOGGER, USER_CONFIG_DIR, IterableSimpleNamespace, checks
 from .utils import get_sim_index_schema, get_table_schema, plot_query_result, prompt_sql_query, sanitize_batch
@@ -172,7 +171,7 @@ class Explorer:
     def sql_query(
         self, query: str, return_type: str = "pandas"
-    ) -> Union[DataFrame, Any, None]:  # pandas.dataframe or pyarrow.Table
+    ) -> Union[Any, None]:  # pandas.DataFrame or pyarrow.Table
         """
         Run a SQL-Like query on the table. Utilizes LanceDB predicate pushdown.
@@ -204,7 +203,8 @@ class Explorer:
         table = self.table.to_arrow()  # noqa NOTE: Don't comment this. This line is used by DuckDB
         if not query.startswith("SELECT") and not query.startswith("WHERE"):
             raise ValueError(
-                f"Query must start with SELECT or WHERE. You can either pass the entire query or just the WHERE clause. found {query}"
+                f"Query must start with SELECT or WHERE. You can either pass the entire query or just the WHERE "
+                f"clause. found {query}"
             )
         if query.startswith("WHERE"):
             query = f"SELECT * FROM 'table' {query}"
@@ -247,7 +247,7 @@ class Explorer:
         idx: Union[int, List[int]] = None,
         limit: int = 25,
         return_type: str = "pandas",
-    ) -> Union[DataFrame, Any]:  # pandas.dataframe or pyarrow.Table
+    ) -> Any:  # pandas.DataFrame or pyarrow.Table
         """
         Query the table for similar images. Accepts a single image or a list of images.
@@ -312,20 +312,20 @@ class Explorer:
         img = plot_query_result(similar, plot_labels=labels)
         return Image.fromarray(img)
-    def similarity_index(self, max_dist: float = 0.2, top_k: float = None, force: bool = False) -> DataFrame:
+    def similarity_index(self, max_dist: float = 0.2, top_k: float = None, force: bool = False) -> Any:  # pd.DataFrame
         """
         Calculate the similarity index of all the images in the table. Here, the index will contain the data points that
         are max_dist or closer to the image in the embedding space at a given index.
         Args:
             max_dist (float): maximum L2 distance between the embeddings to consider. Defaults to 0.2.
-            top_k (float): Percentage of the closest data points to consider when counting. Used to apply limit when running
+            top_k (float): Percentage of the closest data points to consider when counting. Used to apply limit.
                            vector search. Defaults: None.
             force (bool): Whether to overwrite the existing similarity index or not. Defaults to True.
         Returns:
-            (pandas.DataFrame): A dataframe containing the similarity index. Each row corresponds to an image, and columns
-                                include indices of similar images and their respective distances.
+            (pandas.DataFrame): A dataframe containing the similarity index. Each row corresponds to an image,
+                and columns include indices of similar images and their respective distances.
         Example:
             ```python
@@ -447,12 +447,11 @@ class Explorer:
         """
         result = prompt_sql_query(query)
         try:
-            df = self.sql_query(result)
+            return self.sql_query(result)
         except Exception as e:
             LOGGER.error("AI generated query is not valid. Please try again with a different prompt")
             LOGGER.error(e)
             return None
-        return df
     def visualize(self, result):
         """

ultralytics/data/explorer/gui/dash.py CHANGED Viewed

@@ -3,8 +3,6 @@
 import time
 from threading import Thread
-import pandas as pd
 from ultralytics import Explorer
 from ultralytics.utils import ROOT, SETTINGS
 from ultralytics.utils.checks import check_requirements
@@ -148,12 +146,14 @@ def run_ai_query():
             'OpenAI API key not found in settings. Please run yolo settings openai_api_key="..."'
         )
         return
+    import pandas  # scope for faster 'import ultralytics'
     st.session_state["error"] = None
     query = st.session_state.get("ai_query")
     if query.rstrip().lstrip():
         exp = st.session_state["explorer"]
         res = exp.ask_ai(query)
-        if not isinstance(res, pd.DataFrame) or res.empty:
+        if not isinstance(res, pandas.DataFrame) or res.empty:
             st.session_state["error"] = "No results found using AI generated query. Try another query or rerun it."
             return
         st.session_state["imgs"] = res["im_file"].to_list()

ultralytics 8.1.42__py3-none-any.whl → 8.1.44__py3-none-any.whl

Potentially problematic release.

ultralytics 8.1.42py3-none-any.whl → 8.1.44py3-none-any.whl