PyPI - ultralytics - Versions diffs - 8.3.143__py3-none-any.whl → 8.3.145__py3-none-any.whl - Mend

ultralytics 8.3.143py3-none-any.whl → 8.3.145py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (148) hide show

tests/conftest.py +7 -24
tests/test_cli.py +1 -1
tests/test_cuda.py +7 -2
tests/test_engine.py +7 -8
tests/test_exports.py +16 -16
tests/test_integrations.py +1 -1
tests/test_solutions.py +11 -11
ultralytics/__init__.py +1 -1
ultralytics/cfg/__init__.py +16 -13
ultralytics/data/annotator.py +6 -5
ultralytics/data/augment.py +127 -126
ultralytics/data/base.py +54 -51
ultralytics/data/build.py +47 -23
ultralytics/data/converter.py +47 -43
ultralytics/data/dataset.py +51 -50
ultralytics/data/loaders.py +77 -44
ultralytics/data/split.py +22 -9
ultralytics/data/split_dota.py +63 -39
ultralytics/data/utils.py +59 -39
ultralytics/engine/exporter.py +79 -27
ultralytics/engine/model.py +52 -51
ultralytics/engine/predictor.py +37 -28
ultralytics/engine/results.py +191 -161
ultralytics/engine/trainer.py +36 -19
ultralytics/engine/tuner.py +12 -9
ultralytics/engine/validator.py +7 -9
ultralytics/hub/__init__.py +11 -13
ultralytics/hub/auth.py +22 -2
ultralytics/hub/google/__init__.py +19 -19
ultralytics/hub/session.py +37 -51
ultralytics/hub/utils.py +19 -5
ultralytics/models/fastsam/model.py +30 -12
ultralytics/models/fastsam/predict.py +5 -6
ultralytics/models/fastsam/utils.py +3 -3
ultralytics/models/fastsam/val.py +10 -6
ultralytics/models/nas/model.py +9 -5
ultralytics/models/nas/predict.py +6 -6
ultralytics/models/nas/val.py +3 -3
ultralytics/models/rtdetr/model.py +7 -6
ultralytics/models/rtdetr/predict.py +14 -7
ultralytics/models/rtdetr/train.py +10 -4
ultralytics/models/rtdetr/val.py +36 -9
ultralytics/models/sam/amg.py +30 -12
ultralytics/models/sam/build.py +22 -22
ultralytics/models/sam/model.py +10 -9
ultralytics/models/sam/modules/blocks.py +76 -80
ultralytics/models/sam/modules/decoders.py +6 -8
ultralytics/models/sam/modules/encoders.py +23 -26
ultralytics/models/sam/modules/memory_attention.py +13 -1
ultralytics/models/sam/modules/sam.py +57 -26
ultralytics/models/sam/modules/tiny_encoder.py +232 -237
ultralytics/models/sam/modules/transformer.py +13 -13
ultralytics/models/sam/modules/utils.py +11 -19
ultralytics/models/sam/predict.py +114 -101
ultralytics/models/utils/loss.py +98 -77
ultralytics/models/utils/ops.py +116 -67
ultralytics/models/yolo/classify/predict.py +5 -5
ultralytics/models/yolo/classify/train.py +32 -28
ultralytics/models/yolo/classify/val.py +7 -8
ultralytics/models/yolo/detect/predict.py +1 -0
ultralytics/models/yolo/detect/train.py +15 -14
ultralytics/models/yolo/detect/val.py +37 -36
ultralytics/models/yolo/model.py +106 -23
ultralytics/models/yolo/obb/predict.py +3 -4
ultralytics/models/yolo/obb/train.py +14 -6
ultralytics/models/yolo/obb/val.py +29 -23
ultralytics/models/yolo/pose/predict.py +9 -8
ultralytics/models/yolo/pose/train.py +24 -16
ultralytics/models/yolo/pose/val.py +44 -26
ultralytics/models/yolo/segment/predict.py +5 -5
ultralytics/models/yolo/segment/train.py +11 -7
ultralytics/models/yolo/segment/val.py +2 -2
ultralytics/models/yolo/world/train.py +33 -23
ultralytics/models/yolo/world/train_world.py +11 -3
ultralytics/models/yolo/yoloe/predict.py +11 -11
ultralytics/models/yolo/yoloe/train.py +73 -21
ultralytics/models/yolo/yoloe/train_seg.py +10 -7
ultralytics/models/yolo/yoloe/val.py +42 -18
ultralytics/nn/autobackend.py +59 -15
ultralytics/nn/modules/__init__.py +4 -4
ultralytics/nn/modules/activation.py +4 -1
ultralytics/nn/modules/block.py +178 -111
ultralytics/nn/modules/conv.py +6 -5
ultralytics/nn/modules/head.py +469 -121
ultralytics/nn/modules/transformer.py +147 -58
ultralytics/nn/tasks.py +227 -20
ultralytics/nn/text_model.py +30 -33
ultralytics/solutions/ai_gym.py +4 -6
ultralytics/solutions/analytics.py +7 -4
ultralytics/solutions/config.py +10 -10
ultralytics/solutions/distance_calculation.py +11 -10
ultralytics/solutions/heatmap.py +2 -2
ultralytics/solutions/instance_segmentation.py +7 -4
ultralytics/solutions/object_blurrer.py +3 -3
ultralytics/solutions/object_counter.py +15 -11
ultralytics/solutions/object_cropper.py +3 -2
ultralytics/solutions/parking_management.py +29 -28
ultralytics/solutions/queue_management.py +6 -6
ultralytics/solutions/region_counter.py +10 -3
ultralytics/solutions/security_alarm.py +3 -3
ultralytics/solutions/similarity_search.py +85 -24
ultralytics/solutions/solutions.py +189 -79
ultralytics/solutions/speed_estimation.py +28 -22
ultralytics/solutions/streamlit_inference.py +17 -12
ultralytics/solutions/trackzone.py +4 -4
ultralytics/trackers/basetrack.py +16 -23
ultralytics/trackers/bot_sort.py +30 -20
ultralytics/trackers/byte_tracker.py +70 -64
ultralytics/trackers/track.py +4 -8
ultralytics/trackers/utils/gmc.py +31 -58
ultralytics/trackers/utils/kalman_filter.py +37 -37
ultralytics/trackers/utils/matching.py +1 -1
ultralytics/utils/__init__.py +105 -89
ultralytics/utils/autobatch.py +16 -3
ultralytics/utils/autodevice.py +54 -24
ultralytics/utils/benchmarks.py +45 -29
ultralytics/utils/callbacks/base.py +3 -3
ultralytics/utils/callbacks/clearml.py +9 -9
ultralytics/utils/callbacks/comet.py +67 -25
ultralytics/utils/callbacks/dvc.py +7 -10
ultralytics/utils/callbacks/mlflow.py +2 -5
ultralytics/utils/callbacks/neptune.py +7 -13
ultralytics/utils/callbacks/raytune.py +1 -1
ultralytics/utils/callbacks/tensorboard.py +5 -6
ultralytics/utils/callbacks/wb.py +14 -14
ultralytics/utils/checks.py +14 -13
ultralytics/utils/dist.py +5 -5
ultralytics/utils/downloads.py +94 -67
ultralytics/utils/errors.py +5 -5
ultralytics/utils/export.py +61 -47
ultralytics/utils/files.py +23 -22
ultralytics/utils/instance.py +48 -52
ultralytics/utils/loss.py +78 -40
ultralytics/utils/metrics.py +186 -130
ultralytics/utils/ops.py +186 -190
ultralytics/utils/patches.py +15 -17
ultralytics/utils/plotting.py +71 -27
ultralytics/utils/tal.py +21 -15
ultralytics/utils/torch_utils.py +53 -50
ultralytics/utils/triton.py +5 -4
ultralytics/utils/tuner.py +5 -5
{ultralytics-8.3.143.dist-info → ultralytics-8.3.145.dist-info}/METADATA +2 -2
ultralytics-8.3.145.dist-info/RECORD +272 -0
ultralytics-8.3.143.dist-info/RECORD +0 -272
{ultralytics-8.3.143.dist-info → ultralytics-8.3.145.dist-info}/WHEEL +0 -0
{ultralytics-8.3.143.dist-info → ultralytics-8.3.145.dist-info}/entry_points.txt +0 -0
{ultralytics-8.3.143.dist-info → ultralytics-8.3.145.dist-info}/licenses/LICENSE +0 -0
{ultralytics-8.3.143.dist-info → ultralytics-8.3.145.dist-info}/top_level.txt +0 -0

ultralytics/data/dataset.py CHANGED Viewed

@@ -5,6 +5,7 @@ from collections import defaultdict
 from itertools import repeat
 from multiprocessing.pool import ThreadPool
 from pathlib import Path
+from typing import Dict, List, Optional, Tuple
 import cv2
 import numpy as np
@@ -58,18 +59,18 @@ class YOLODataset(BaseDataset):
     Methods:
         cache_labels: Cache dataset labels, check images and read shapes.
-        get_labels: Returns dictionary of labels for YOLO training.
-        build_transforms: Builds and appends transforms to the list.
-        close_mosaic: Sets mosaic, copy_paste and mixup options to 0.0 and builds transformations.
-        update_labels_info: Updates label format for different tasks.
-        collate_fn: Collates data samples into batches.
+        get_labels: Return dictionary of labels for YOLO training.
+        build_transforms: Build and append transforms to the list.
+        close_mosaic: Set mosaic, copy_paste and mixup options to 0.0 and build transformations.
+        update_labels_info: Update label format for different tasks.
+        collate_fn: Collate data samples into batches.
     Examples:
         >>> dataset = YOLODataset(img_path="path/to/images", data={"names": {0: "person"}}, task="detect")
         >>> dataset.get_labels()
     """
-    def __init__(self, *args, data=None, task="detect", **kwargs):
+    def __init__(self, *args, data: Optional[Dict] = None, task: str = "detect", **kwargs):
         """
         Initialize the YOLODataset.
@@ -86,7 +87,7 @@ class YOLODataset(BaseDataset):
         assert not (self.use_segments and self.use_keypoints), "Can not use both segments and keypoints."
         super().__init__(*args, channels=self.data["channels"], **kwargs)
-    def cache_labels(self, path=Path("./labels.cache")):
+    def cache_labels(self, path: Path = Path("./labels.cache")) -> Dict:
         """
         Cache dataset labels, check images and read shapes.
@@ -154,9 +155,9 @@ class YOLODataset(BaseDataset):
         save_dataset_cache_file(self.prefix, path, x, DATASET_CACHE_VERSION)
         return x
-    def get_labels(self):
+    def get_labels(self) -> List[Dict]:
         """
-        Returns dictionary of labels for YOLO training.
+        Return dictionary of labels for YOLO training.
         This method loads labels from disk or cache, verifies their integrity, and prepares them for training.
@@ -204,9 +205,9 @@ class YOLODataset(BaseDataset):
             LOGGER.warning(f"Labels are missing or empty in {cache_path}, training may not work correctly. {HELP_URL}")
         return labels
-    def build_transforms(self, hyp=None):
+    def build_transforms(self, hyp: Optional[Dict] = None) -> Compose:
         """
-        Builds and appends transforms to the list.
+        Build and append transforms to the list.
         Args:
             hyp (dict, optional): Hyperparameters for transforms.
@@ -236,7 +237,7 @@ class YOLODataset(BaseDataset):
         )
         return transforms
-    def close_mosaic(self, hyp):
+    def close_mosaic(self, hyp: Dict) -> None:
         """
         Disable mosaic, copy_paste, mixup and cutmix augmentations by setting their probabilities to 0.0.
@@ -249,9 +250,9 @@ class YOLODataset(BaseDataset):
         hyp.cutmix = 0.0
         self.transforms = self.build_transforms(hyp)
-    def update_labels_info(self, label):
+    def update_labels_info(self, label: Dict) -> Dict:
         """
-        Custom your label format here.
+        Update label format for different tasks.
         Args:
             label (dict): Label dictionary containing bboxes, segments, keypoints, etc.
@@ -283,9 +284,9 @@ class YOLODataset(BaseDataset):
         return label
     @staticmethod
-    def collate_fn(batch):
+    def collate_fn(batch: List[Dict]) -> Dict:
         """
-        Collates data samples into batches.
+        Collate data samples into batches.
         Args:
             batch (List[dict]): List of dictionaries containing sample data.
@@ -321,8 +322,8 @@ class YOLOMultiModalDataset(YOLODataset):
     process both image and text data.
     Methods:
-        update_labels_info: Adds text information for multi-modal model training.
-        build_transforms: Enhances data transformations with text augmentation.
+        update_labels_info: Add text information for multi-modal model training.
+        build_transforms: Enhance data transformations with text augmentation.
     Examples:
         >>> dataset = YOLOMultiModalDataset(img_path="path/to/images", data={"names": {0: "person"}}, task="detect")
@@ -330,7 +331,7 @@ class YOLOMultiModalDataset(YOLODataset):
         >>> print(batch.keys())  # Should include 'texts'
     """
-    def __init__(self, *args, data=None, task="detect", **kwargs):
+    def __init__(self, *args, data: Optional[Dict] = None, task: str = "detect", **kwargs):
         """
         Initialize a YOLOMultiModalDataset.
@@ -342,9 +343,9 @@ class YOLOMultiModalDataset(YOLODataset):
         """
         super().__init__(*args, data=data, task=task, **kwargs)
-    def update_labels_info(self, label):
+    def update_labels_info(self, label: Dict) -> Dict:
         """
-        Add texts information for multi-modal model training.
+        Add text information for multi-modal model training.
         Args:
             label (dict): Label dictionary containing bboxes, segments, keypoints, etc.
@@ -359,9 +360,9 @@ class YOLOMultiModalDataset(YOLODataset):
         return labels
-    def build_transforms(self, hyp=None):
+    def build_transforms(self, hyp: Optional[Dict] = None) -> Compose:
         """
-        Enhances data transformations with optional text augmentation for multi-modal training.
+        Enhance data transformations with optional text augmentation for multi-modal training.
         Args:
             hyp (dict, optional): Hyperparameters for transforms.
@@ -408,14 +409,14 @@ class YOLOMultiModalDataset(YOLODataset):
         return category_freq
     @staticmethod
-    def _get_neg_texts(category_freq, threshold=100):
+    def _get_neg_texts(category_freq: Dict, threshold: int = 100) -> List[str]:
         """Get negative text samples based on frequency threshold."""
         return [k for k, v in category_freq.items() if v >= threshold]
 class GroundingDataset(YOLODataset):
     """
-    Handles object detection tasks by loading annotations from a specified JSON file, supporting YOLO format.
+    Dataset class for object detection tasks using annotations from a JSON file in grounding format.
     This dataset is designed for grounding tasks where annotations are provided in a JSON file rather than
     the standard YOLO format text files.
@@ -424,16 +425,16 @@ class GroundingDataset(YOLODataset):
         json_file (str): Path to the JSON file containing annotations.
     Methods:
-        get_img_files: Returns empty list as image files are read in get_labels.
-        get_labels: Loads annotations from a JSON file and prepares them for training.
-        build_transforms: Configures augmentations for training with optional text loading.
+        get_img_files: Return empty list as image files are read in get_labels.
+        get_labels: Load annotations from a JSON file and prepare them for training.
+        build_transforms: Configure augmentations for training with optional text loading.
     Examples:
         >>> dataset = GroundingDataset(img_path="path/to/images", json_file="annotations.json", task="detect")
         >>> len(dataset)  # Number of valid images with annotations
     """
-    def __init__(self, *args, task="detect", json_file="", **kwargs):
+    def __init__(self, *args, task: str = "detect", json_file: str = "", **kwargs):
         """
         Initialize a GroundingDataset for object detection.
@@ -447,7 +448,7 @@ class GroundingDataset(YOLODataset):
         self.json_file = json_file
         super().__init__(*args, task=task, data={"channels": 3}, **kwargs)
-    def get_img_files(self, img_path):
+    def get_img_files(self, img_path: str) -> List:
         """
         The image files would be read in `get_labels` function, return empty list here.
@@ -459,7 +460,7 @@ class GroundingDataset(YOLODataset):
         """
         return []
-    def verify_labels(self, labels):
+    def verify_labels(self, labels: List[Dict]) -> None:
         """Verify the number of instances in the dataset matches expected counts."""
         instance_count = sum(label["bboxes"].shape[0] for label in labels)
         if "final_mixed_train_no_coco_segm" in self.json_file:
@@ -473,9 +474,9 @@ class GroundingDataset(YOLODataset):
         else:
             assert False
-    def cache_labels(self, path=Path("./labels.cache")):
+    def cache_labels(self, path: Path = Path("./labels.cache")) -> Dict:
         """
-        Loads annotations from a JSON file, filters, and normalizes bounding boxes for each image.
+        Load annotations from a JSON file, filter, and normalize bounding boxes for each image.
         Args:
             path (Path): Path where to save the cache file.
@@ -564,7 +565,7 @@ class GroundingDataset(YOLODataset):
         save_dataset_cache_file(self.prefix, path, x, DATASET_CACHE_VERSION)
         return x
-    def get_labels(self):
+    def get_labels(self) -> List[Dict]:
         """
         Load labels from cache or generate them from JSON file.
@@ -586,9 +587,9 @@ class GroundingDataset(YOLODataset):
             LOGGER.info(f"Load {self.json_file} from cache file {cache_path}")
         return labels
-    def build_transforms(self, hyp=None):
+    def build_transforms(self, hyp: Optional[Dict] = None) -> Compose:
         """
-        Configures augmentations for training with optional text loading.
+        Configure augmentations for training with optional text loading.
         Args:
             hyp (dict, optional): Hyperparameters for transforms.
@@ -627,7 +628,7 @@ class GroundingDataset(YOLODataset):
         return category_freq
     @staticmethod
-    def _get_neg_texts(category_freq, threshold=100):
+    def _get_neg_texts(category_freq: Dict, threshold: int = 100) -> List[str]:
         """Get negative text samples based on frequency threshold."""
         return [k for k, v in category_freq.items() if v >= threshold]
@@ -649,9 +650,9 @@ class YOLOConcatDataset(ConcatDataset):
     """
     @staticmethod
-    def collate_fn(batch):
+    def collate_fn(batch: List[Dict]) -> Dict:
         """
-        Collates data samples into batches.
+        Collate data samples into batches.
         Args:
             batch (List[dict]): List of dictionaries containing sample data.
@@ -661,9 +662,9 @@ class YOLOConcatDataset(ConcatDataset):
         """
         return YOLODataset.collate_fn(batch)
-    def close_mosaic(self, hyp):
+    def close_mosaic(self, hyp: Dict) -> None:
         """
-        Sets mosaic, copy_paste and mixup options to 0.0 and builds transformations.
+        Set mosaic, copy_paste and mixup options to 0.0 and build transformations.
         Args:
             hyp (dict): Hyperparameters for transforms.
@@ -685,7 +686,7 @@ class SemanticDataset(BaseDataset):
 class ClassificationDataset:
     """
-    Extends torchvision ImageFolder to support YOLO classification tasks.
+    Dataset class for image classification tasks extending torchvision ImageFolder functionality.
     This class offers functionalities like image augmentation, caching, and verification. It's designed to efficiently
     handle large datasets for training deep learning models, with optional image transformations and caching mechanisms
@@ -701,14 +702,14 @@ class ClassificationDataset:
         prefix (str): Prefix for logging and cache filenames.
     Methods:
-        __getitem__: Returns subset of data and targets corresponding to given indices.
-        __len__: Returns the total number of samples in the dataset.
-        verify_images: Verifies all images in dataset.
+        __getitem__: Return subset of data and targets corresponding to given indices.
+        __len__: Return the total number of samples in the dataset.
+        verify_images: Verify all images in dataset.
     """
-    def __init__(self, root, args, augment=False, prefix=""):
+    def __init__(self, root: str, args, augment: bool = False, prefix: str = ""):
         """
-        Initialize YOLO object with root, image size, augmentations, and cache settings.
+        Initialize YOLO classification dataset with root directory, arguments, augmentations, and cache settings.
         Args:
             root (str): Path to the dataset directory where images are stored in a class-specific folder structure.
@@ -758,9 +759,9 @@ class ClassificationDataset:
             else classify_transforms(size=args.imgsz)
         )
-    def __getitem__(self, i):
+    def __getitem__(self, i: int) -> Dict:
         """
-        Returns subset of data and targets corresponding to given indices.
+        Return subset of data and targets corresponding to given indices.
         Args:
             i (int): Index of the sample to retrieve.
@@ -787,7 +788,7 @@ class ClassificationDataset:
         """Return the total number of samples in the dataset."""
         return len(self.samples)
-    def verify_images(self):
+    def verify_images(self) -> List[Tuple]:
         """
         Verify all images in dataset.

ultralytics/data/loaders.py CHANGED Viewed

@@ -8,6 +8,7 @@ import urllib
 from dataclasses import dataclass
 from pathlib import Path
 from threading import Thread
+from typing import Any, List, Optional, Tuple, Union
 import cv2
 import numpy as np
@@ -90,8 +91,16 @@ class LoadStreams:
         - The class implements a buffer system to manage frame storage and retrieval.
     """
-    def __init__(self, sources="file.streams", vid_stride=1, buffer=False, channels=3):
-        """Initialize stream loader for multiple video sources, supporting various stream types."""
+    def __init__(self, sources: str = "file.streams", vid_stride: int = 1, buffer: bool = False, channels: int = 3):
+        """
+        Initialize stream loader for multiple video sources, supporting various stream types.
+        Args:
+            sources (str): Path to streams file or single stream URL.
+            vid_stride (int): Video frame-rate stride.
+            buffer (bool): Whether to buffer input streams.
+            channels (int): Number of image channels (1 for grayscale, 3 for RGB).
+        """
         torch.backends.cudnn.benchmark = True  # faster for fixed-size inference
         self.buffer = buffer  # buffer input streams
         self.running = True  # running flag for Thread
@@ -143,7 +152,7 @@ class LoadStreams:
             self.threads[i].start()
         LOGGER.info("")  # newline
-    def update(self, i, cap, stream):
+    def update(self, i: int, cap: cv2.VideoCapture, stream: str):
         """Read stream frames in daemon thread and update image buffer."""
         n, f = 0, self.frames[i]  # frame number, frame array
         while self.running and cap.isOpened() and n < (f - 1):
@@ -167,7 +176,7 @@ class LoadStreams:
                 time.sleep(0.01)  # wait until the buffer is empty
     def close(self):
-        """Terminates stream loader, stops threads, and releases video capture resources."""
+        """Terminate stream loader, stop threads, and release video capture resources."""
         self.running = False  # stop flag for Thread
         for thread in self.threads:
             if thread.is_alive():
@@ -180,12 +189,12 @@ class LoadStreams:
         cv2.destroyAllWindows()
     def __iter__(self):
-        """Iterates through YOLO image feed and re-opens unresponsive streams."""
+        """Iterate through YOLO image feed and re-open unresponsive streams."""
         self.count = -1
         return self
-    def __next__(self):
-        """Returns the next batch of frames from multiple video streams for processing."""
+    def __next__(self) -> Tuple[List[str], List[np.ndarray], List[str]]:
+        """Return the next batch of frames from multiple video streams for processing."""
         self.count += 1
         images = []
@@ -211,7 +220,7 @@ class LoadStreams:
         return self.sources, images, [""] * self.bs
-    def __len__(self):
+    def __len__(self) -> int:
         """Return the number of video streams in the LoadStreams object."""
         return self.bs  # 1E12 frames = 32 streams at 30 FPS for 30 years
@@ -248,8 +257,14 @@ class LoadScreenshots:
         ...     print(f"Captured frame: {im.shape}")
     """
-    def __init__(self, source, channels=3):
-        """Initialize screenshot capture with specified screen and region parameters."""
+    def __init__(self, source: str, channels: int = 3):
+        """
+        Initialize screenshot capture with specified screen and region parameters.
+        Args:
+            source (str): Screen capture source string in format "screen_num left top width height".
+            channels (int): Number of image channels (1 for grayscale, 3 for RGB).
+        """
         check_requirements("mss")
         import mss  # noqa
@@ -277,11 +292,11 @@ class LoadScreenshots:
         self.monitor = {"left": self.left, "top": self.top, "width": self.width, "height": self.height}
     def __iter__(self):
-        """Yields the next screenshot image from the specified screen or region for processing."""
+        """Yield the next screenshot image from the specified screen or region for processing."""
         return self
-    def __next__(self):
-        """Captures and returns the next screenshot as a numpy array using the mss library."""
+    def __next__(self) -> Tuple[List[str], List[np.ndarray], List[str]]:
+        """Capture and return the next screenshot as a numpy array using the mss library."""
         im0 = np.asarray(self.sct.grab(self.monitor))[:, :, :3]  # BGRA to BGR
         im0 = cv2.cvtColor(im0, cv2.COLOR_BGR2GRAY)[..., None] if self.cv2_flag == cv2.IMREAD_GRAYSCALE else im0
         s = f"screen {self.screen} (LTWH): {self.left},{self.top},{self.width},{self.height}: "
@@ -330,8 +345,16 @@ class LoadImagesAndVideos:
         - Can read from a text file containing paths to images and videos.
     """
-    def __init__(self, path, batch=1, vid_stride=1, channels=3):
-        """Initialize dataloader for images and videos, supporting various input formats."""
+    def __init__(self, path: Union[str, Path, List], batch: int = 1, vid_stride: int = 1, channels: int = 3):
+        """
+        Initialize dataloader for images and videos, supporting various input formats.
+        Args:
+            path (str | Path | List): Path to images/videos, directory, or list of paths.
+            batch (int): Batch size for processing.
+            vid_stride (int): Video frame-rate stride.
+            channels (int): Number of image channels (1 for grayscale, 3 for RGB).
+        """
         parent = None
         if isinstance(path, str) and Path(path).suffix == ".txt":  # *.txt file with img/vid/dir on each line
             parent = Path(path).parent
@@ -376,12 +399,12 @@ class LoadImagesAndVideos:
             raise FileNotFoundError(f"No images or videos found in {p}. {FORMATS_HELP_MSG}")
     def __iter__(self):
-        """Iterates through image/video files, yielding source paths, images, and metadata."""
+        """Iterate through image/video files, yielding source paths, images, and metadata."""
         self.count = 0
         return self
-    def __next__(self):
-        """Returns the next batch of images or video frames with their paths and metadata."""
+    def __next__(self) -> Tuple[List[str], List[np.ndarray], List[str]]:
+        """Return the next batch of images or video frames with their paths and metadata."""
         paths, imgs, info = [], [], []
         while len(imgs) < self.bs:
             if self.count >= self.nf:  # end of file list
@@ -450,8 +473,8 @@ class LoadImagesAndVideos:
         return paths, imgs, info
-    def _new_video(self, path):
-        """Creates a new video capture object for the given path and initializes video-related attributes."""
+    def _new_video(self, path: str):
+        """Create a new video capture object for the given path and initialize video-related attributes."""
         self.frame = 0
         self.cap = cv2.VideoCapture(path)
         self.fps = int(self.cap.get(cv2.CAP_PROP_FPS))
@@ -459,8 +482,8 @@ class LoadImagesAndVideos:
             raise FileNotFoundError(f"Failed to open video {path}")
         self.frames = int(self.cap.get(cv2.CAP_PROP_FRAME_COUNT) / self.vid_stride)
-    def __len__(self):
-        """Returns the number of files (images and videos) in the dataset."""
+    def __len__(self) -> int:
+        """Return the number of files (images and videos) in the dataset."""
         return math.ceil(self.nf / self.bs)  # number of batches
@@ -491,8 +514,14 @@ class LoadPilAndNumpy:
         Loaded 2 images
     """
-    def __init__(self, im0, channels=3):
-        """Initializes a loader for PIL and Numpy images, converting inputs to a standardized format."""
+    def __init__(self, im0: Union[Image.Image, np.ndarray, List], channels: int = 3):
+        """
+        Initialize a loader for PIL and Numpy images, converting inputs to a standardized format.
+        Args:
+            im0 (PIL.Image.Image | np.ndarray | List): Single image or list of images in PIL or numpy format.
+            channels (int): Number of image channels (1 for grayscale, 3 for RGB).
+        """
         if not isinstance(im0, list):
             im0 = [im0]
         # use `image{i}.jpg` when Image.filename returns an empty path.
@@ -503,7 +532,7 @@ class LoadPilAndNumpy:
         self.bs = len(self.im0)
     @staticmethod
-    def _single_check(im, flag="RGB"):
+    def _single_check(im: Union[Image.Image, np.ndarray], flag: str = "RGB") -> np.ndarray:
         """Validate and format an image to numpy array, ensuring RGB order and contiguous memory."""
         assert isinstance(im, (Image.Image, np.ndarray)), f"Expected PIL/np.ndarray image type, but got {type(im)}"
         if isinstance(im, Image.Image):
@@ -515,19 +544,19 @@ class LoadPilAndNumpy:
             im = im[..., None]
         return im
-    def __len__(self):
-        """Returns the length of the 'im0' attribute, representing the number of loaded images."""
+    def __len__(self) -> int:
+        """Return the length of the 'im0' attribute, representing the number of loaded images."""
         return len(self.im0)
-    def __next__(self):
-        """Returns the next batch of images, paths, and metadata for processing."""
+    def __next__(self) -> Tuple[List[str], List[np.ndarray], List[str]]:
+        """Return the next batch of images, paths, and metadata for processing."""
         if self.count == 1:  # loop only once as it's batch inference
             raise StopIteration
         self.count += 1
         return self.paths, self.im0, [""] * self.bs
     def __iter__(self):
-        """Iterates through PIL/numpy images, yielding paths, raw images, and metadata for processing."""
+        """Iterate through PIL/numpy images, yielding paths, raw images, and metadata for processing."""
         self.count = 0
         return self
@@ -556,16 +585,21 @@ class LoadTensor:
         >>> print(f"Processed {len(images)} images")
     """
-    def __init__(self, im0) -> None:
-        """Initialize LoadTensor object for processing torch.Tensor image data."""
+    def __init__(self, im0: torch.Tensor) -> None:
+        """
+        Initialize LoadTensor object for processing torch.Tensor image data.
+        Args:
+            im0 (torch.Tensor): Input tensor with shape (B, C, H, W).
+        """
         self.im0 = self._single_check(im0)
         self.bs = self.im0.shape[0]
         self.mode = "image"
         self.paths = [getattr(im, "filename", f"image{i}.jpg") for i, im in enumerate(im0)]
     @staticmethod
-    def _single_check(im, stride=32):
-        """Validates and formats a single image tensor, ensuring correct shape and normalization."""
+    def _single_check(im: torch.Tensor, stride: int = 32) -> torch.Tensor:
+        """Validate and format a single image tensor, ensuring correct shape and normalization."""
         s = (
             f"torch.Tensor inputs should be BCHW i.e. shape(1, 3, 640, 640) "
             f"divisible by stride {stride}. Input shape{tuple(im.shape)} is incompatible."
@@ -586,24 +620,24 @@ class LoadTensor:
         return im
     def __iter__(self):
-        """Yields an iterator object for iterating through tensor image data."""
+        """Yield an iterator object for iterating through tensor image data."""
         self.count = 0
         return self
-    def __next__(self):
-        """Yields the next batch of tensor images and metadata for processing."""
+    def __next__(self) -> Tuple[List[str], torch.Tensor, List[str]]:
+        """Yield the next batch of tensor images and metadata for processing."""
         if self.count == 1:
             raise StopIteration
         self.count += 1
         return self.paths, self.im0, [""] * self.bs
-    def __len__(self):
-        """Returns the batch size of the tensor input."""
+    def __len__(self) -> int:
+        """Return the batch size of the tensor input."""
         return self.bs
-def autocast_list(source):
-    """Merges a list of sources into a list of numpy arrays or PIL images for Ultralytics prediction."""
+def autocast_list(source: List[Any]) -> List[Union[Image.Image, np.ndarray]]:
+    """Merge a list of sources into a list of numpy arrays or PIL images for Ultralytics prediction."""
     files = []
     for im in source:
         if isinstance(im, (str, Path)):  # filename or uri
@@ -619,14 +653,13 @@ def autocast_list(source):
     return files
-def get_best_youtube_url(url, method="pytube"):
+def get_best_youtube_url(url: str, method: str = "pytube") -> Optional[str]:
     """
-    Retrieves the URL of the best quality MP4 video stream from a given YouTube video.
+    Retrieve the URL of the best quality MP4 video stream from a given YouTube video.
     Args:
         url (str): The URL of the YouTube video.
         method (str): The method to use for extracting video info. Options are "pytube", "pafy", and "yt-dlp".
-            Defaults to "pytube".
     Returns:
         (str | None): The URL of the best quality MP4 video stream, or None if no suitable stream is found.

ultralytics/data/split.py CHANGED Viewed

@@ -3,14 +3,15 @@
 import random
 import shutil
 from pathlib import Path
+from typing import Tuple, Union
 from ultralytics.data.utils import IMG_FORMATS, img2label_paths
 from ultralytics.utils import DATASETS_DIR, LOGGER, TQDM
-def split_classify_dataset(source_dir, train_ratio=0.8):
+def split_classify_dataset(source_dir: Union[str, Path], train_ratio: float = 0.8) -> Path:
     """
-    Split dataset into train and val directories in a new directory.
+    Split classification dataset into train and val directories in a new directory.
     Creates a new directory '{source_dir}_split' with train/val subdirectories, preserving the original class
     structure with an 80/20 split by default.
@@ -46,13 +47,17 @@ def split_classify_dataset(source_dir, train_ratio=0.8):
                 └── ...
     Args:
-        source_dir (str | Path): Path to Caltech dataset root directory.
+        source_dir (str | Path): Path to classification dataset root directory.
         train_ratio (float): Ratio for train split, between 0 and 1.
+    Returns:
+        (Path): Path to the created split directory.
     Examples:
-        >>> # Split dataset with default 80/20 ratio
+        Split dataset with default 80/20 ratio
         >>> split_classify_dataset("path/to/caltech")
-        >>> # Split with custom ratio
+        Split with custom ratio
         >>> split_classify_dataset("path/to/caltech", 0.75)
     """
     source_path = Path(source_dir)
@@ -90,18 +95,26 @@ def split_classify_dataset(source_dir, train_ratio=0.8):
     return split_path
-def autosplit(path=DATASETS_DIR / "coco8/images", weights=(0.9, 0.1, 0.0), annotated_only=False):
+def autosplit(
+    path: Path = DATASETS_DIR / "coco8/images",
+    weights: Tuple[float, float, float] = (0.9, 0.1, 0.0),
+    annotated_only: bool = False,
+) -> None:
     """
     Automatically split a dataset into train/val/test splits and save the resulting splits into autosplit_*.txt files.
     Args:
-        path (Path, optional): Path to images directory.
-        weights (list | tuple, optional): Train, validation, and test split fractions.
-        annotated_only (bool, optional): If True, only images with an associated txt file are used.
+        path (Path): Path to images directory.
+        weights (tuple): Train, validation, and test split fractions.
+        annotated_only (bool): If True, only images with an associated txt file are used.
     Examples:
+        Split images with default weights
         >>> from ultralytics.data.split import autosplit
         >>> autosplit()
+        Split with custom weights and annotated images only
+        >>> autosplit(path="path/to/images", weights=(0.8, 0.15, 0.05), annotated_only=True)
     """
     path = Path(path)  # images dir
     files = sorted(x for x in path.rglob("*.*") if x.suffix[1:].lower() in IMG_FORMATS)  # image files only

ultralytics 8.3.143__py3-none-any.whl → 8.3.145__py3-none-any.whl

ultralytics 8.3.143py3-none-any.whl → 8.3.145py3-none-any.whl