PyPI - ultralytics - Versions diffs - 8.3.89__py3-none-any.whl → 8.3.91__py3-none-any.whl - Mend

ultralytics 8.3.89py3-none-any.whl → 8.3.91py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (156) hide show

tests/conftest.py +2 -2
tests/test_cli.py +13 -11
tests/test_cuda.py +10 -1
tests/test_exports.py +2 -2
tests/test_integrations.py +1 -5
tests/test_python.py +16 -16
tests/test_solutions.py +9 -9
ultralytics/__init__.py +1 -1
ultralytics/cfg/__init__.py +3 -1
ultralytics/cfg/models/11/yolo11-cls.yaml +5 -5
ultralytics/cfg/models/11/yolo11-obb.yaml +5 -5
ultralytics/cfg/models/11/yolo11-pose.yaml +5 -5
ultralytics/cfg/models/11/yolo11-seg.yaml +5 -5
ultralytics/cfg/models/11/yolo11.yaml +5 -5
ultralytics/cfg/models/v8/yolov8-ghost-p2.yaml +5 -5
ultralytics/cfg/models/v8/yolov8-ghost-p6.yaml +5 -5
ultralytics/cfg/models/v8/yolov8-ghost.yaml +5 -5
ultralytics/cfg/models/v8/yolov8-obb.yaml +5 -5
ultralytics/cfg/models/v8/yolov8-p6.yaml +5 -5
ultralytics/cfg/models/v8/yolov8-rtdetr.yaml +5 -5
ultralytics/cfg/models/v8/yolov8-world.yaml +5 -5
ultralytics/cfg/models/v8/yolov8-worldv2.yaml +5 -5
ultralytics/cfg/models/v8/yolov8.yaml +5 -5
ultralytics/cfg/models/v9/yolov9c-seg.yaml +1 -1
ultralytics/cfg/models/v9/yolov9c.yaml +1 -1
ultralytics/cfg/models/v9/yolov9e-seg.yaml +1 -1
ultralytics/cfg/models/v9/yolov9e.yaml +1 -1
ultralytics/cfg/models/v9/yolov9m.yaml +1 -1
ultralytics/cfg/models/v9/yolov9s.yaml +1 -1
ultralytics/cfg/models/v9/yolov9t.yaml +1 -1
ultralytics/data/annotator.py +9 -14
ultralytics/data/base.py +118 -30
ultralytics/data/build.py +63 -24
ultralytics/data/converter.py +5 -5
ultralytics/data/dataset.py +207 -53
ultralytics/data/loaders.py +1 -0
ultralytics/data/split_dota.py +39 -12
ultralytics/data/utils.py +15 -19
ultralytics/engine/exporter.py +24 -23
ultralytics/engine/model.py +67 -88
ultralytics/engine/predictor.py +106 -21
ultralytics/engine/trainer.py +32 -23
ultralytics/engine/tuner.py +21 -18
ultralytics/engine/validator.py +75 -41
ultralytics/hub/__init__.py +12 -13
ultralytics/hub/auth.py +9 -12
ultralytics/hub/session.py +76 -21
ultralytics/hub/utils.py +19 -17
ultralytics/models/fastsam/model.py +20 -11
ultralytics/models/fastsam/predict.py +36 -16
ultralytics/models/fastsam/utils.py +5 -5
ultralytics/models/fastsam/val.py +6 -6
ultralytics/models/nas/model.py +22 -11
ultralytics/models/nas/predict.py +9 -4
ultralytics/models/nas/val.py +5 -5
ultralytics/models/rtdetr/model.py +20 -11
ultralytics/models/rtdetr/predict.py +18 -15
ultralytics/models/rtdetr/train.py +20 -16
ultralytics/models/rtdetr/val.py +42 -6
ultralytics/models/sam/__init__.py +1 -1
ultralytics/models/sam/amg.py +50 -4
ultralytics/models/sam/model.py +8 -14
ultralytics/models/sam/modules/decoders.py +18 -21
ultralytics/models/sam/modules/encoders.py +25 -46
ultralytics/models/sam/modules/memory_attention.py +19 -15
ultralytics/models/sam/modules/sam.py +18 -25
ultralytics/models/sam/modules/tiny_encoder.py +19 -29
ultralytics/models/sam/modules/transformer.py +35 -57
ultralytics/models/sam/modules/utils.py +15 -15
ultralytics/models/sam/predict.py +0 -3
ultralytics/models/utils/loss.py +87 -36
ultralytics/models/utils/ops.py +26 -31
ultralytics/models/yolo/classify/predict.py +24 -3
ultralytics/models/yolo/classify/train.py +77 -10
ultralytics/models/yolo/classify/val.py +40 -15
ultralytics/models/yolo/detect/predict.py +23 -10
ultralytics/models/yolo/detect/train.py +85 -15
ultralytics/models/yolo/detect/val.py +145 -21
ultralytics/models/yolo/model.py +1 -2
ultralytics/models/yolo/obb/predict.py +12 -4
ultralytics/models/yolo/obb/train.py +7 -0
ultralytics/models/yolo/obb/val.py +25 -7
ultralytics/models/yolo/pose/predict.py +22 -6
ultralytics/models/yolo/pose/train.py +17 -1
ultralytics/models/yolo/pose/val.py +46 -21
ultralytics/models/yolo/segment/predict.py +22 -8
ultralytics/models/yolo/segment/train.py +6 -0
ultralytics/models/yolo/segment/val.py +100 -14
ultralytics/models/yolo/world/train.py +38 -8
ultralytics/models/yolo/world/train_world.py +39 -10
ultralytics/nn/autobackend.py +28 -14
ultralytics/nn/modules/__init__.py +3 -0
ultralytics/nn/modules/activation.py +12 -3
ultralytics/nn/modules/block.py +587 -84
ultralytics/nn/modules/conv.py +418 -54
ultralytics/nn/modules/head.py +3 -4
ultralytics/nn/modules/transformer.py +320 -34
ultralytics/nn/modules/utils.py +17 -3
ultralytics/nn/tasks.py +221 -69
ultralytics/solutions/ai_gym.py +2 -2
ultralytics/solutions/analytics.py +4 -4
ultralytics/solutions/heatmap.py +4 -4
ultralytics/solutions/instance_segmentation.py +10 -4
ultralytics/solutions/object_blurrer.py +2 -2
ultralytics/solutions/object_counter.py +2 -2
ultralytics/solutions/object_cropper.py +2 -2
ultralytics/solutions/parking_management.py +9 -9
ultralytics/solutions/queue_management.py +1 -1
ultralytics/solutions/region_counter.py +2 -2
ultralytics/solutions/security_alarm.py +7 -7
ultralytics/solutions/solutions.py +7 -4
ultralytics/solutions/speed_estimation.py +2 -2
ultralytics/solutions/streamlit_inference.py +6 -6
ultralytics/solutions/trackzone.py +9 -2
ultralytics/solutions/vision_eye.py +4 -4
ultralytics/trackers/basetrack.py +1 -1
ultralytics/trackers/bot_sort.py +23 -22
ultralytics/trackers/byte_tracker.py +4 -4
ultralytics/trackers/track.py +2 -1
ultralytics/trackers/utils/gmc.py +26 -27
ultralytics/trackers/utils/kalman_filter.py +31 -29
ultralytics/trackers/utils/matching.py +7 -7
ultralytics/utils/__init__.py +32 -27
ultralytics/utils/autobatch.py +5 -5
ultralytics/utils/benchmarks.py +111 -18
ultralytics/utils/callbacks/base.py +3 -3
ultralytics/utils/callbacks/clearml.py +11 -11
ultralytics/utils/callbacks/comet.py +42 -24
ultralytics/utils/callbacks/dvc.py +11 -10
ultralytics/utils/callbacks/hub.py +8 -8
ultralytics/utils/callbacks/mlflow.py +1 -1
ultralytics/utils/callbacks/neptune.py +12 -10
ultralytics/utils/callbacks/raytune.py +1 -1
ultralytics/utils/callbacks/tensorboard.py +6 -6
ultralytics/utils/callbacks/wb.py +16 -16
ultralytics/utils/checks.py +116 -35
ultralytics/utils/dist.py +15 -2
ultralytics/utils/downloads.py +13 -9
ultralytics/utils/files.py +12 -13
ultralytics/utils/instance.py +112 -45
ultralytics/utils/loss.py +28 -33
ultralytics/utils/metrics.py +246 -181
ultralytics/utils/ops.py +61 -53
ultralytics/utils/patches.py +8 -6
ultralytics/utils/plotting.py +65 -45
ultralytics/utils/tal.py +88 -57
ultralytics/utils/torch_utils.py +181 -33
ultralytics/utils/triton.py +13 -3
ultralytics/utils/tuner.py +8 -16
{ultralytics-8.3.89.dist-info → ultralytics-8.3.91.dist-info}/METADATA +1 -1
ultralytics-8.3.91.dist-info/RECORD +250 -0
ultralytics-8.3.89.dist-info/RECORD +0 -250
{ultralytics-8.3.89.dist-info → ultralytics-8.3.91.dist-info}/LICENSE +0 -0
{ultralytics-8.3.89.dist-info → ultralytics-8.3.91.dist-info}/WHEEL +0 -0
{ultralytics-8.3.89.dist-info → ultralytics-8.3.91.dist-info}/entry_points.txt +0 -0
{ultralytics-8.3.89.dist-info → ultralytics-8.3.91.dist-info}/top_level.txt +0 -0

ultralytics/data/dataset.py CHANGED Viewed

@@ -46,16 +46,38 @@ class YOLODataset(BaseDataset):
     """
     Dataset class for loading object detection and/or segmentation labels in YOLO format.
-    Args:
-        data (dict, optional): A dataset YAML dictionary. Defaults to None.
-        task (str): An explicit arg to point current task, Defaults to 'detect'.
+    This class supports loading data for object detection, segmentation, pose estimation, and oriented bounding box
+    (OBB) tasks using the YOLO format.
-    Returns:
-        (torch.utils.data.Dataset): A PyTorch dataset object that can be used for training an object detection model.
+    Attributes:
+        use_segments (bool): Indicates if segmentation masks should be used.
+        use_keypoints (bool): Indicates if keypoints should be used for pose estimation.
+        use_obb (bool): Indicates if oriented bounding boxes should be used.
+        data (dict): Dataset configuration dictionary.
+    Methods:
+        cache_labels: Cache dataset labels, check images and read shapes.
+        get_labels: Returns dictionary of labels for YOLO training.
+        build_transforms: Builds and appends transforms to the list.
+        close_mosaic: Sets mosaic, copy_paste and mixup options to 0.0 and builds transformations.
+        update_labels_info: Updates label format for different tasks.
+        collate_fn: Collates data samples into batches.
+    Examples:
+        >>> dataset = YOLODataset(img_path="path/to/images", data={"names": {0: "person"}}, task="detect")
+        >>> dataset.get_labels()
     """
     def __init__(self, *args, data=None, task="detect", **kwargs):
-        """Initializes the YOLODataset with optional configurations for segments and keypoints."""
+        """
+        Initialize the YOLODataset.
+        Args:
+            data (dict, optional): Dataset configuration dictionary.
+            task (str): Task type, one of 'detect', 'segment', 'pose', or 'obb'.
+            *args (Any): Additional positional arguments for the parent class.
+            **kwargs (Any): Additional keyword arguments for the parent class.
+        """
         self.use_segments = task == "segment"
         self.use_keypoints = task == "pose"
         self.use_obb = task == "obb"
@@ -68,10 +90,10 @@ class YOLODataset(BaseDataset):
         Cache dataset labels, check images and read shapes.
         Args:
-            path (Path): Path where to save the cache file. Default is Path("./labels.cache").
+            path (Path): Path where to save the cache file.
         Returns:
-            (dict): labels.
+            (dict): Dictionary containing cached labels and related information.
         """
         x = {"labels": []}
         nm, nf, ne, nc, msgs = 0, 0, 0, 0, []  # number missing, found, empty, corrupt, messages
@@ -131,7 +153,14 @@ class YOLODataset(BaseDataset):
         return x
     def get_labels(self):
-        """Returns dictionary of labels for YOLO training."""
+        """
+        Returns dictionary of labels for YOLO training.
+        This method loads labels from disk or cache, verifies their integrity, and prepares them for training.
+        Returns:
+            (List[dict]): List of label dictionaries, each containing information about an image and its annotations.
+        """
         self.label_files = img2label_paths(self.im_files)
         cache_path = Path(self.label_files[0]).parent.with_suffix(".cache")
         try:
@@ -172,7 +201,15 @@ class YOLODataset(BaseDataset):
         return labels
     def build_transforms(self, hyp=None):
-        """Builds and appends transforms to the list."""
+        """
+        Builds and appends transforms to the list.
+        Args:
+            hyp (dict, optional): Hyperparameters for transforms.
+        Returns:
+            (Compose): Composed transforms.
+        """
         if self.augment:
             hyp.mosaic = hyp.mosaic if self.augment and not self.rect else 0.0
             hyp.mixup = hyp.mixup if self.augment and not self.rect else 0.0
@@ -195,7 +232,12 @@ class YOLODataset(BaseDataset):
         return transforms
     def close_mosaic(self, hyp):
-        """Sets mosaic, copy_paste and mixup options to 0.0 and builds transformations."""
+        """
+        Sets mosaic, copy_paste and mixup options to 0.0 and builds transformations.
+        Args:
+            hyp (dict): Hyperparameters for transforms.
+        """
         hyp.mosaic = 0.0  # set mosaic ratio=0.0
         hyp.copy_paste = 0.0  # keep the same behavior as previous v8 close-mosaic
         hyp.mixup = 0.0  # keep the same behavior as previous v8 close-mosaic
@@ -205,6 +247,12 @@ class YOLODataset(BaseDataset):
         """
         Custom your label format here.
+        Args:
+            label (dict): Label dictionary containing bboxes, segments, keypoints, etc.
+        Returns:
+            (dict): Updated label dictionary with instances.
         Note:
             cls is not with bboxes now, classification and semantic segmentation need an independent cls label
             Can also support classification and semantic segmentation by adding or removing dict keys there.
@@ -230,7 +278,15 @@ class YOLODataset(BaseDataset):
     @staticmethod
     def collate_fn(batch):
-        """Collates data samples into batches."""
+        """
+        Collates data samples into batches.
+        Args:
+            batch (List[dict]): List of dictionaries containing sample data.
+        Returns:
+            (dict): Collated batch with stacked tensors.
+        """
         new_batch = {}
         keys = batch[0].keys()
         values = list(zip(*[list(b.values()) for b in batch]))
@@ -250,29 +306,58 @@ class YOLODataset(BaseDataset):
 class YOLOMultiModalDataset(YOLODataset):
     """
-    Dataset class for loading object detection and/or segmentation labels in YOLO format.
+    Dataset class for loading object detection and/or segmentation labels in YOLO format with multi-modal support.
-    Args:
-        data (dict, optional): A dataset YAML dictionary. Defaults to None.
-        task (str): An explicit arg to point current task, Defaults to 'detect'.
+    This class extends YOLODataset to add text information for multi-modal model training, enabling models to
+    process both image and text data.
-    Returns:
-        (torch.utils.data.Dataset): A PyTorch dataset object that can be used for training an object detection model.
+    Methods:
+        update_labels_info: Adds text information for multi-modal model training.
+        build_transforms: Enhances data transformations with text augmentation.
+    Examples:
+        >>> dataset = YOLOMultiModalDataset(img_path="path/to/images", data={"names": {0: "person"}}, task="detect")
+        >>> batch = next(iter(dataset))
+        >>> print(batch.keys())  # Should include 'texts'
     """
     def __init__(self, *args, data=None, task="detect", **kwargs):
-        """Initializes a dataset object for object detection tasks with optional specifications."""
+        """
+        Initialize a YOLOMultiModalDataset.
+        Args:
+            data (dict, optional): Dataset configuration dictionary.
+            task (str): Task type, one of 'detect', 'segment', 'pose', or 'obb'.
+            *args (Any): Additional positional arguments for the parent class.
+            **kwargs (Any): Additional keyword arguments for the parent class.
+        """
         super().__init__(*args, data=data, task=task, **kwargs)
     def update_labels_info(self, label):
-        """Add texts information for multi-modal model training."""
+        """
+        Add texts information for multi-modal model training.
+        Args:
+            label (dict): Label dictionary containing bboxes, segments, keypoints, etc.
+        Returns:
+            (dict): Updated label dictionary with instances and texts.
+        """
         labels = super().update_labels_info(label)
         # NOTE: some categories are concatenated with its synonyms by `/`.
         labels["texts"] = [v.split("/") for _, v in self.data["names"].items()]
         return labels
     def build_transforms(self, hyp=None):
-        """Enhances data transformations with optional text augmentation for multi-modal training."""
+        """
+        Enhances data transformations with optional text augmentation for multi-modal training.
+        Args:
+            hyp (dict, optional): Hyperparameters for transforms.
+        Returns:
+            (Compose): Composed transforms including text augmentation if applicable.
+        """
         transforms = super().build_transforms(hyp)
         if self.augment:
             # NOTE: hard-coded the args for now.
@@ -281,20 +366,58 @@ class YOLOMultiModalDataset(YOLODataset):
 class GroundingDataset(YOLODataset):
-    """Handles object detection tasks by loading annotations from a specified JSON file, supporting YOLO format."""
+    """
+    Handles object detection tasks by loading annotations from a specified JSON file, supporting YOLO format.
+    This dataset is designed for grounding tasks where annotations are provided in a JSON file rather than
+    the standard YOLO format text files.
+    Attributes:
+        json_file (str): Path to the JSON file containing annotations.
+    Methods:
+        get_img_files: Returns empty list as image files are read in get_labels.
+        get_labels: Loads annotations from a JSON file and prepares them for training.
+        build_transforms: Configures augmentations for training with optional text loading.
+    Examples:
+        >>> dataset = GroundingDataset(img_path="path/to/images", json_file="annotations.json", task="detect")
+        >>> len(dataset)  # Number of valid images with annotations
+    """
     def __init__(self, *args, task="detect", json_file, **kwargs):
-        """Initializes a GroundingDataset for object detection, loading annotations from a specified JSON file."""
+        """
+        Initialize a GroundingDataset for object detection.
+        Args:
+            json_file (str): Path to the JSON file containing annotations.
+            task (str): Must be 'detect' for GroundingDataset.
+            *args (Any): Additional positional arguments for the parent class.
+            **kwargs (Any): Additional keyword arguments for the parent class.
+        """
         assert task == "detect", "`GroundingDataset` only support `detect` task for now!"
         self.json_file = json_file
         super().__init__(*args, task=task, data={}, **kwargs)
     def get_img_files(self, img_path):
-        """The image files would be read in `get_labels` function, return empty list here."""
+        """
+        The image files would be read in `get_labels` function, return empty list here.
+        Args:
+            img_path (str): Path to the directory containing images.
+        Returns:
+            (List): Empty list as image files are read in get_labels.
+        """
         return []
     def get_labels(self):
-        """Loads annotations from a JSON file, filters, and normalizes bounding boxes for each image."""
+        """
+        Loads annotations from a JSON file, filters, and normalizes bounding boxes for each image.
+        Returns:
+            (List[dict]): List of label dictionaries, each containing information about an image and its annotations.
+        """
         labels = []
         LOGGER.info("Loading annotation file...")
         with open(self.json_file) as f:
@@ -347,7 +470,15 @@ class GroundingDataset(YOLODataset):
         return labels
     def build_transforms(self, hyp=None):
-        """Configures augmentations for training with optional text loading; `hyp` adjusts augmentation intensity."""
+        """
+        Configures augmentations for training with optional text loading.
+        Args:
+            hyp (dict, optional): Hyperparameters for transforms.
+        Returns:
+            (Compose): Composed transforms including text augmentation if applicable.
+        """
         transforms = super().build_transforms(hyp)
         if self.augment:
             # NOTE: hard-coded the args for now.
@@ -359,27 +490,35 @@ class YOLOConcatDataset(ConcatDataset):
     """
     Dataset as a concatenation of multiple datasets.
-    This class is useful to assemble different existing datasets.
+    This class is useful to assemble different existing datasets for YOLO training, ensuring they use the same
+    collation function.
+    Methods:
+        collate_fn: Static method that collates data samples into batches using YOLODataset's collation function.
+    Examples:
+        >>> dataset1 = YOLODataset(...)
+        >>> dataset2 = YOLODataset(...)
+        >>> combined_dataset = YOLOConcatDataset([dataset1, dataset2])
     """
     @staticmethod
     def collate_fn(batch):
-        """Collates data samples into batches."""
+        """
+        Collates data samples into batches.
+        Args:
+            batch (List[dict]): List of dictionaries containing sample data.
+        Returns:
+            (dict): Collated batch with stacked tensors.
+        """
         return YOLODataset.collate_fn(batch)
 # TODO: support semantic segmentation
 class SemanticDataset(BaseDataset):
-    """
-    Semantic Segmentation Dataset.
-    This class is responsible for handling datasets used for semantic segmentation tasks. It inherits functionalities
-    from the BaseDataset class.
-    Note:
-        This class is currently a placeholder and needs to be populated with methods and attributes for supporting
-        semantic segmentation tasks.
-    """
+    """Semantic Segmentation Dataset."""
     def __init__(self):
         """Initialize a SemanticDataset object."""
@@ -388,20 +527,25 @@ class SemanticDataset(BaseDataset):
 class ClassificationDataset:
     """
-    Extends torchvision ImageFolder to support YOLO classification tasks, offering functionalities like image
-    augmentation, caching, and verification. It's designed to efficiently handle large datasets for training deep
-    learning models, with optional image transformations and caching mechanisms to speed up training.
+    Extends torchvision ImageFolder to support YOLO classification tasks.
-    This class allows for augmentations using both torchvision and Albumentations libraries, and supports caching images
-    in RAM or on disk to reduce IO overhead during training. Additionally, it implements a robust verification process
-    to ensure data integrity and consistency.
+    This class offers functionalities like image augmentation, caching, and verification. It's designed to efficiently
+    handle large datasets for training deep learning models, with optional image transformations and caching mechanisms
+    to speed up training.
     Attributes:
         cache_ram (bool): Indicates if caching in RAM is enabled.
         cache_disk (bool): Indicates if caching on disk is enabled.
-        samples (list): A list of tuples, each containing the path to an image, its class index, path to its .npy cache
+        samples (List): A list of tuples, each containing the path to an image, its class index, path to its .npy cache
                         file (if caching on disk), and optionally the loaded image array (if caching in RAM).
         torch_transforms (callable): PyTorch transforms to be applied to the images.
+        root (str): Root directory of the dataset.
+        prefix (str): Prefix for logging and cache filenames.
+    Methods:
+        __getitem__: Returns subset of data and targets corresponding to given indices.
+        __len__: Returns the total number of samples in the dataset.
+        verify_images: Verifies all images in dataset.
     """
     def __init__(self, root, args, augment=False, prefix=""):
@@ -411,12 +555,9 @@ class ClassificationDataset:
         Args:
             root (str): Path to the dataset directory where images are stored in a class-specific folder structure.
             args (Namespace): Configuration containing dataset-related settings such as image size, augmentation
-                parameters, and cache settings. It includes attributes like `imgsz` (image size), `fraction` (fraction
-                of data to use), `scale`, `fliplr`, `flipud`, `cache` (disk or RAM caching for faster training),
-                `auto_augment`, `hsv_h`, `hsv_s`, `hsv_v`, and `crop_fraction`.
-            augment (bool, optional): Whether to apply augmentations to the dataset. Default is False.
-            prefix (str, optional): Prefix for logging and cache filenames, aiding in dataset identification and
-                debugging. Default is an empty string.
+                parameters, and cache settings.
+            augment (bool, optional): Whether to apply augmentations to the dataset.
+            prefix (str, optional): Prefix for logging and cache filenames, aiding in dataset identification.
         """
         import torchvision  # scope for faster 'import ultralytics'
@@ -460,7 +601,15 @@ class ClassificationDataset:
         )
     def __getitem__(self, i):
-        """Returns subset of data and targets corresponding to given indices."""
+        """
+        Returns subset of data and targets corresponding to given indices.
+        Args:
+            i (int): Index of the sample to retrieve.
+        Returns:
+            (dict): Dictionary containing the image and its class index.
+        """
         f, j, fn, im = self.samples[i]  # filename, index, filename.with_suffix('.npy'), image
         if self.cache_ram:
             if im is None:  # Warning: two separate if statements required here, do not combine this with previous line
@@ -481,7 +630,12 @@ class ClassificationDataset:
         return len(self.samples)
     def verify_images(self):
-        """Verify all images in dataset."""
+        """
+        Verify all images in dataset.
+        Returns:
+            (List): List of valid samples after verification.
+        """
         desc = f"{self.prefix}Scanning {self.root}..."
         path = Path(self.root).with_suffix(".cache")  # *.cache file path

ultralytics/data/loaders.py CHANGED Viewed

@@ -33,6 +33,7 @@ class SourceTypes:
         stream (bool): Flag indicating if the input source is a video stream.
         screenshot (bool): Flag indicating if the input source is a screenshot.
         from_img (bool): Flag indicating if the input source is an image file.
+        tensor (bool): Flag indicating if the input source is a tensor.
     Examples:
         >>> source_types = SourceTypes(stream=True, screenshot=False, from_img=False)

ultralytics/data/split_dota.py CHANGED Viewed

@@ -19,14 +19,14 @@ def bbox_iof(polygon1, bbox2, eps=1e-6):
     Calculate Intersection over Foreground (IoF) between polygons and bounding boxes.
     Args:
-        polygon1 (np.ndarray): Polygon coordinates, shape (n, 8).
-        bbox2 (np.ndarray): Bounding boxes, shape (n, 4).
-        eps (float, optional): Small value to prevent division by zero. Defaults to 1e-6.
+        polygon1 (np.ndarray): Polygon coordinates with shape (n, 8).
+        bbox2 (np.ndarray): Bounding boxes with shape (n, 4).
+        eps (float, optional): Small value to prevent division by zero.
     Returns:
-        (np.ndarray): IoF scores, shape (n, 1) or (n, m) if bbox2 is (m, 4).
+        (np.ndarray): IoF scores with shape (n, 1) or (n, m) if bbox2 is (m, 4).
-    Note:
+    Notes:
         Polygon format: [x1, y1, x2, y2, x3, y3, x4, y4].
         Bounding box format: [x_min, y_min, x_max, y_max].
     """
@@ -66,9 +66,12 @@ def load_yolo_dota(data_root, split="train"):
     Load DOTA dataset.
     Args:
-        data_root (str): Data root.
+        data_root (str): Data root directory.
         split (str): The split data set, could be `train` or `val`.
+    Returns:
+        (List[Dict]): List of annotation dictionaries containing image information.
     Notes:
         The directory structure assumed for the DOTA dataset:
             - data_root
@@ -100,10 +103,13 @@ def get_windows(im_size, crop_sizes=(1024,), gaps=(200,), im_rate_thr=0.6, eps=0
     Args:
         im_size (tuple): Original image size, (h, w).
-        crop_sizes (List(int)): Crop size of windows.
-        gaps (List(int)): Gap between crops.
-        im_rate_thr (float): Threshold of windows areas divided by image ares.
+        crop_sizes (List[int]): Crop size of windows.
+        gaps (List[int]): Gap between crops.
+        im_rate_thr (float): Threshold of windows areas divided by image areas.
         eps (float): Epsilon value for math operations.
+    Returns:
+        (np.ndarray): Array of window coordinates with shape (n, 4) where each row is [x_start, y_start, x_stop, y_stop].
     """
     h, w = im_size
     windows = []
@@ -157,9 +163,9 @@ def crop_and_save(anno, windows, window_objs, im_dir, lb_dir, allow_background_i
     Crop images and save new labels.
     Args:
-        anno (dict): Annotation dict, including `filepath`, `label`, `ori_size` as its keys.
-        windows (list): A list of windows coordinates.
-        window_objs (list): A list of labels inside each window.
+        anno (Dict): Annotation dict, including `filepath`, `label`, `ori_size` as its keys.
+        windows (np.ndarray): Array of windows coordinates with shape (n, 4).
+        window_objs (List): A list of labels inside each window.
         im_dir (str): The output directory path of images.
         lb_dir (str): The output directory path of labels.
         allow_background_images (bool): Whether to include background images without labels.
@@ -201,6 +207,13 @@ def split_images_and_labels(data_root, save_dir, split="train", crop_sizes=(1024
     """
     Split both images and labels.
+    Args:
+        data_root (str): Root directory of the dataset.
+        save_dir (str): Directory to save the split dataset.
+        split (str): The split data set, could be `train` or `val`.
+        crop_sizes (tuple): Tuple of crop sizes.
+        gaps (tuple): Tuple of gaps between crops.
     Notes:
         The directory structure assumed for the DOTA dataset:
             - data_root
@@ -231,6 +244,13 @@ def split_trainval(data_root, save_dir, crop_size=1024, gap=200, rates=(1.0,)):
     """
     Split train and val set of DOTA.
+    Args:
+        data_root (str): Root directory of the dataset.
+        save_dir (str): Directory to save the split dataset.
+        crop_size (int): Base crop size.
+        gap (int): Base gap between crops.
+        rates (tuple): Scaling rates for crop_size and gap.
     Notes:
         The directory structure assumed for the DOTA dataset:
             - data_root
@@ -261,6 +281,13 @@ def split_test(data_root, save_dir, crop_size=1024, gap=200, rates=(1.0,)):
     """
     Split test set of DOTA, labels are not included within this set.
+    Args:
+        data_root (str): Root directory of the dataset.
+        save_dir (str): Directory to save the split dataset.
+        crop_size (int): Base crop size.
+        gap (int): Base gap between crops.
+        rates (tuple): Scaling rates for crop_size and gap.
     Notes:
         The directory structure assumed for the DOTA dataset:
             - data_root

ultralytics/data/utils.py CHANGED Viewed

@@ -175,13 +175,8 @@ def visualize_image_annotations(image_path, txt_path, label_map):
     adjusted for readability, depending on the background color's luminance.
     Args:
-        image_path (str): The path to the image file to annotate, and it can be in formats supported by PIL (e.g., .jpg, .png).
-        txt_path (str): The path to the annotation file in YOLO format, that should contain one line per object with:
-                        - class_id (int): The class index.
-                        - x_center (float): The X center of the bounding box (relative to image width).
-                        - y_center (float): The Y center of the bounding box (relative to image height).
-                        - width (float): The width of the bounding box (relative to image width).
-                        - height (float): The height of the bounding box (relative to image height).
+        image_path (str): The path to the image file to annotate, and it can be in formats supported by PIL.
+        txt_path (str): The path to the annotation file in YOLO format, that should contain one line per object.
         label_map (dict): A dictionary that maps class IDs (integers) to class labels (strings).
     Examples:
@@ -222,8 +217,8 @@ def polygon2mask(imgsz, polygons, color=1, downsample_ratio=1):
         imgsz (tuple): The size of the image as (height, width).
         polygons (list[np.ndarray]): A list of polygons. Each polygon is an array with shape [N, M], where
                                      N is the number of polygons, and M is the number of points such that M % 2 = 0.
-        color (int, optional): The color value to fill in the polygons on the mask. Defaults to 1.
-        downsample_ratio (int, optional): Factor by which to downsample the mask. Defaults to 1.
+        color (int, optional): The color value to fill in the polygons on the mask.
+        downsample_ratio (int, optional): Factor by which to downsample the mask.
     Returns:
         (np.ndarray): A binary mask of the specified image size with the polygons filled in.
@@ -246,7 +241,7 @@ def polygons2masks(imgsz, polygons, color, downsample_ratio=1):
         polygons (list[np.ndarray]): A list of polygons. Each polygon is an array with shape [N, M], where
                                      N is the number of polygons, and M is the number of points such that M % 2 = 0.
         color (int): The color value to fill in the polygons on the masks.
-        downsample_ratio (int, optional): Factor by which to downsample each mask. Defaults to 1.
+        downsample_ratio (int, optional): Factor by which to downsample each mask.
     Returns:
         (np.ndarray): A set of binary masks of the specified image size with the polygons filled in.
@@ -281,8 +276,7 @@ def find_dataset_yaml(path: Path) -> Path:
     Find and return the YAML file associated with a Detect, Segment or Pose dataset.
     This function searches for a YAML file at the root level of the provided directory first, and if not found, it
-    performs a recursive search. It prefers YAML files that have the same stem as the provided path. An AssertionError
-    is raised if no YAML file is found or if multiple YAML files are found.
+    performs a recursive search. It prefers YAML files that have the same stem as the provided path.
     Args:
         path (Path): The directory path to search for the YAML file.
@@ -308,7 +302,7 @@ def check_det_dataset(dataset, autodownload=True):
     Args:
         dataset (str): Path to the dataset or dataset descriptor (like a YAML file).
-        autodownload (bool, optional): Whether to automatically download the dataset if not found. Defaults to True.
+        autodownload (bool, optional): Whether to automatically download the dataset if not found.
     Returns:
         (dict): Parsed dataset information and paths.
@@ -400,7 +394,7 @@ def check_cls_dataset(dataset, split=""):
     Args:
         dataset (str | Path): The name of the dataset.
-        split (str, optional): The split of the dataset. Either 'val', 'test', or ''. Defaults to ''.
+        split (str, optional): The split of the dataset. Either 'val', 'test', or ''.
     Returns:
         (dict): A dictionary containing the following keys:
@@ -440,8 +434,10 @@ def check_cls_dataset(dataset, split=""):
     test_set = data_dir / "test" if (data_dir / "test").exists() else None  # data/val or data/test
     if split == "val" and not val_set:
         LOGGER.warning("WARNING ⚠️ Dataset 'split=val' not found, using 'split=test' instead.")
+        val_set = test_set
     elif split == "test" and not test_set:
         LOGGER.warning("WARNING ⚠️ Dataset 'split=test' not found, using 'split=val' instead.")
+        test_set = val_set
     nc = len([x for x in (data_dir / "train").glob("*") if x.is_dir()])  # number of classes
     names = [x.name for x in (data_dir / "train").iterdir() if x.is_dir()]  # class names list
@@ -634,8 +630,8 @@ def compress_one_image(f, f_new=None, max_dim=1920, quality=50):
     Args:
         f (str): The path to the input image file.
         f_new (str, optional): The path to the output image file. If not specified, the input file will be overwritten.
-        max_dim (int, optional): The maximum dimension (width or height) of the output image. Default is 1920 pixels.
-        quality (int, optional): The image compression quality as a percentage. Default is 50%.
+        max_dim (int, optional): The maximum dimension (width or height) of the output image.
+        quality (int, optional): The image compression quality as a percentage.
     Examples:
         >>> from pathlib import Path
@@ -664,9 +660,9 @@ def autosplit(path=DATASETS_DIR / "coco8/images", weights=(0.9, 0.1, 0.0), annot
     Automatically split a dataset into train/val/test splits and save the resulting splits into autosplit_*.txt files.
     Args:
-        path (Path, optional): Path to images directory. Defaults to DATASETS_DIR / 'coco8/images'.
-        weights (list | tuple, optional): Train, validation, and test split fractions. Defaults to (0.9, 0.1, 0.0).
-        annotated_only (bool, optional): If True, only images with an associated txt file are used. Defaults to False.
+        path (Path, optional): Path to images directory.
+        weights (list | tuple, optional): Train, validation, and test split fractions.
+        annotated_only (bool, optional): If True, only images with an associated txt file are used.
     Examples:
         >>> from ultralytics.data.utils import autosplit

ultralytics 8.3.89__py3-none-any.whl → 8.3.91__py3-none-any.whl

ultralytics 8.3.89py3-none-any.whl → 8.3.91py3-none-any.whl