PyPI - ultralytics - Versions diffs - 8.0.195__py3-none-any.whl → 8.0.196__py3-none-any.whl - Mend

ultralytics 8.0.195py3-none-any.whl → 8.0.196py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of ultralytics might be problematic. Click here for more details.

Files changed (84) hide show

ultralytics/__init__.py +1 -1
ultralytics/cfg/__init__.py +5 -6
ultralytics/data/augment.py +234 -29
ultralytics/data/base.py +2 -1
ultralytics/data/build.py +9 -3
ultralytics/data/converter.py +5 -2
ultralytics/data/dataset.py +16 -2
ultralytics/data/loaders.py +111 -7
ultralytics/data/utils.py +3 -3
ultralytics/engine/exporter.py +1 -3
ultralytics/engine/model.py +3 -9
ultralytics/engine/predictor.py +10 -6
ultralytics/engine/results.py +18 -8
ultralytics/engine/trainer.py +19 -31
ultralytics/engine/tuner.py +20 -20
ultralytics/engine/validator.py +3 -4
ultralytics/hub/__init__.py +2 -2
ultralytics/hub/auth.py +18 -3
ultralytics/hub/session.py +1 -0
ultralytics/hub/utils.py +1 -3
ultralytics/models/fastsam/model.py +2 -1
ultralytics/models/fastsam/predict.py +2 -0
ultralytics/models/fastsam/prompt.py +15 -1
ultralytics/models/nas/model.py +3 -1
ultralytics/models/rtdetr/model.py +4 -6
ultralytics/models/rtdetr/predict.py +2 -1
ultralytics/models/rtdetr/train.py +2 -1
ultralytics/models/rtdetr/val.py +1 -0
ultralytics/models/sam/amg.py +12 -6
ultralytics/models/sam/model.py +5 -6
ultralytics/models/sam/modules/decoders.py +5 -1
ultralytics/models/sam/modules/encoders.py +15 -12
ultralytics/models/sam/modules/tiny_encoder.py +38 -2
ultralytics/models/sam/modules/transformer.py +2 -4
ultralytics/models/sam/predict.py +8 -4
ultralytics/models/utils/loss.py +35 -8
ultralytics/models/utils/ops.py +14 -18
ultralytics/models/yolo/classify/predict.py +1 -0
ultralytics/models/yolo/classify/train.py +4 -2
ultralytics/models/yolo/classify/val.py +1 -0
ultralytics/models/yolo/detect/train.py +4 -3
ultralytics/models/yolo/model.py +2 -4
ultralytics/models/yolo/pose/predict.py +1 -0
ultralytics/models/yolo/segment/predict.py +2 -0
ultralytics/models/yolo/segment/val.py +1 -1
ultralytics/nn/autobackend.py +45 -32
ultralytics/nn/modules/__init__.py +13 -9
ultralytics/nn/modules/block.py +11 -5
ultralytics/nn/modules/conv.py +16 -7
ultralytics/nn/modules/head.py +6 -3
ultralytics/nn/modules/transformer.py +47 -15
ultralytics/nn/modules/utils.py +6 -4
ultralytics/nn/tasks.py +61 -21
ultralytics/trackers/bot_sort.py +53 -6
ultralytics/trackers/byte_tracker.py +71 -15
ultralytics/trackers/track.py +0 -1
ultralytics/trackers/utils/gmc.py +23 -0
ultralytics/trackers/utils/kalman_filter.py +6 -6
ultralytics/utils/__init__.py +31 -18
ultralytics/utils/autobatch.py +1 -3
ultralytics/utils/benchmarks.py +14 -1
ultralytics/utils/callbacks/base.py +1 -3
ultralytics/utils/callbacks/comet.py +11 -3
ultralytics/utils/callbacks/dvc.py +9 -0
ultralytics/utils/callbacks/neptune.py +5 -6
ultralytics/utils/callbacks/wb.py +1 -0
ultralytics/utils/checks.py +13 -9
ultralytics/utils/dist.py +2 -1
ultralytics/utils/downloads.py +7 -3
ultralytics/utils/files.py +3 -3
ultralytics/utils/instance.py +12 -3
ultralytics/utils/loss.py +97 -22
ultralytics/utils/metrics.py +34 -34
ultralytics/utils/ops.py +10 -9
ultralytics/utils/patches.py +9 -7
ultralytics/utils/plotting.py +4 -3
ultralytics/utils/torch_utils.py +8 -6
ultralytics/utils/triton.py +2 -1
{ultralytics-8.0.195.dist-info → ultralytics-8.0.196.dist-info}/METADATA +1 -1
{ultralytics-8.0.195.dist-info → ultralytics-8.0.196.dist-info}/RECORD +84 -84
{ultralytics-8.0.195.dist-info → ultralytics-8.0.196.dist-info}/LICENSE +0 -0
{ultralytics-8.0.195.dist-info → ultralytics-8.0.196.dist-info}/WHEEL +0 -0
{ultralytics-8.0.195.dist-info → ultralytics-8.0.196.dist-info}/entry_points.txt +0 -0
{ultralytics-8.0.195.dist-info → ultralytics-8.0.196.dist-info}/top_level.txt +0 -0

ultralytics/models/fastsam/prompt.py CHANGED Viewed

@@ -15,6 +15,7 @@ from ultralytics.utils import TQDM
 class FastSAMPrompt:
     def __init__(self, source, results, device='cuda') -> None:
+        """Initializes FastSAMPrompt with given source, results and device, and assigns clip for linear assignment."""
         self.device = device
         self.results = results
         self.source = source
@@ -30,6 +31,7 @@ class FastSAMPrompt:
     @staticmethod
     def _segment_image(image, bbox):
+        """Segments the given image according to the provided bounding box coordinates."""
         image_array = np.array(image)
         segmented_image_array = np.zeros_like(image_array)
         x1, y1, x2, y2 = bbox
@@ -45,6 +47,9 @@ class FastSAMPrompt:
     @staticmethod
     def _format_results(result, filter=0):
+        """Formats detection results into list of annotations each containing ID, segmentation, bounding box, score and
+        area.
+        """
         annotations = []
         n = len(result.masks.data) if result.masks is not None else 0
         for i in range(n):
@@ -61,6 +66,9 @@ class FastSAMPrompt:
     @staticmethod
     def _get_bbox_from_mask(mask):
+        """Applies morphological transformations to the mask, displays it, and if with_contours is True, draws
+        contours.
+        """
         mask = mask.astype(np.uint8)
         contours, hierarchy = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
         x1, y1, w, h = cv2.boundingRect(contours[0])
@@ -195,6 +203,7 @@ class FastSAMPrompt:
     @torch.no_grad()
     def retrieve(self, model, preprocess, elements, search_text: str, device) -> int:
+        """Processes images and text with a model, calculates similarity, and returns softmax score."""
         preprocessed_images = [preprocess(image).to(device) for image in elements]
         tokenized_text = self.clip.tokenize([search_text]).to(device)
         stacked_images = torch.stack(preprocessed_images)
@@ -206,6 +215,7 @@ class FastSAMPrompt:
         return probs[:, 0].softmax(dim=0)
     def _crop_image(self, format_results):
+        """Crops an image based on provided annotation format and returns cropped images and related data."""
         if os.path.isdir(self.source):
             raise ValueError(f"'{self.source}' is a directory, not a valid source for this function.")
         image = Image.fromarray(cv2.cvtColor(self.results[0].orig_img, cv2.COLOR_BGR2RGB))
@@ -229,6 +239,7 @@ class FastSAMPrompt:
         return cropped_boxes, cropped_images, not_crop, filter_id, annotations
     def box_prompt(self, bbox):
+        """Modifies the bounding box properties and calculates IoU between masks and bounding box."""
         if self.results[0].masks is not None:
             assert (bbox[2] != 0 and bbox[3] != 0)
             if os.path.isdir(self.source):
@@ -261,7 +272,8 @@ class FastSAMPrompt:
             self.results[0].masks.data = torch.tensor(np.array([masks[max_iou_index].cpu().numpy()]))
         return self.results
-    def point_prompt(self, points, pointlabel):  # numpy 处理
+    def point_prompt(self, points, pointlabel):  # numpy
+        """Adjusts points on detected masks based on user input and returns the modified results."""
         if self.results[0].masks is not None:
             if os.path.isdir(self.source):
                 raise ValueError(f"'{self.source}' is a directory, not a valid source for this function.")
@@ -284,6 +296,7 @@ class FastSAMPrompt:
         return self.results
     def text_prompt(self, text):
+        """Processes a text prompt, applies it to existing results and returns the updated results."""
         if self.results[0].masks is not None:
             format_results = self._format_results(self.results[0], 0)
             cropped_boxes, cropped_images, not_crop, filter_id, annotations = self._crop_image(format_results)
@@ -296,4 +309,5 @@ class FastSAMPrompt:
         return self.results
     def everything_prompt(self):
+        """Returns the processed results from the previous methods in the class."""
         return self.results

ultralytics/models/nas/model.py CHANGED Viewed

@@ -25,12 +25,13 @@ from .val import NASValidator
 class NAS(Model):
     def __init__(self, model='yolo_nas_s.pt') -> None:
+        """Initializes the NAS model with the provided or default 'yolo_nas_s.pt' model."""
         assert Path(model).suffix not in ('.yaml', '.yml'), 'YOLO-NAS models only support pre-trained models.'
         super().__init__(model, task='detect')
     @smart_inference_mode()
     def _load(self, weights: str, task: str):
-        # Load or create new NAS model
+        """Loads an existing NAS model weights or creates a new NAS model with pretrained weights if not provided."""
         import super_gradients
         suffix = Path(weights).suffix
         if suffix == '.pt':
@@ -58,4 +59,5 @@ class NAS(Model):
     @property
     def task_map(self):
+        """Returns a dictionary mapping tasks to respective predictor and validator classes."""
         return {'detect': {'predictor': NASPredictor, 'validator': NASValidator}}

ultralytics/models/rtdetr/model.py CHANGED Viewed

@@ -1,7 +1,5 @@
 # Ultralytics YOLO 🚀, AGPL-3.0 license
-"""
-RT-DETR model interface
-"""
+"""RT-DETR model interface."""
 from ultralytics.engine.model import Model
 from ultralytics.nn.tasks import RTDETRDetectionModel
@@ -11,17 +9,17 @@ from .val import RTDETRValidator
 class RTDETR(Model):
-    """
-    RTDETR model interface.
-    """
+    """RTDETR model interface."""
     def __init__(self, model='rtdetr-l.pt') -> None:
+        """Initializes the RTDETR model with the given model file, defaulting to 'rtdetr-l.pt'."""
         if model and model.split('.')[-1] not in ('pt', 'yaml', 'yml'):
             raise NotImplementedError('RT-DETR only supports creating from *.pt file or *.yaml file.')
         super().__init__(model=model, task='detect')
     @property
     def task_map(self):
+        """Returns a dictionary mapping task names to corresponding Ultralytics task classes for RTDETR model."""
         return {
             'detect': {
                 'predictor': RTDETRPredictor,

ultralytics/models/rtdetr/predict.py CHANGED Viewed

@@ -48,7 +48,8 @@ class RTDETRPredictor(BasePredictor):
         return results
     def pre_transform(self, im):
-        """Pre-transform input image before inference.
+        """
+        Pre-transform input image before inference.
         Args:
             im (List(np.ndarray)): (N, 3, h, w) for tensor, [(h, w, 3) x N] for list.

ultralytics/models/rtdetr/train.py CHANGED Viewed

@@ -37,7 +37,8 @@ class RTDETRTrainer(DetectionTrainer):
         return model
     def build_dataset(self, img_path, mode='val', batch=None):
-        """Build RTDETR Dataset
+        """
+        Build RTDETR Dataset.
         Args:
             img_path (str): Path to the folder containing images.

ultralytics/models/rtdetr/val.py CHANGED Viewed

@@ -16,6 +16,7 @@ __all__ = 'RTDETRValidator',  # tuple or list
 class RTDETRDataset(YOLODataset):
     def __init__(self, *args, data=None, **kwargs):
+        """Initialize the RTDETRDataset class by inheriting from the YOLODataset class."""
         super().__init__(*args, data=data, use_segments=False, use_keypoints=False, **kwargs)
     # NOTE: add stretch version load_image for rtdetr mosaic

ultralytics/models/sam/amg.py CHANGED Viewed

@@ -32,9 +32,10 @@ def batch_iterator(batch_size: int, *args) -> Generator[List[Any], None, None]:
 def calculate_stability_score(masks: torch.Tensor, mask_threshold: float, threshold_offset: float) -> torch.Tensor:
     """
-    Computes the stability score for a batch of masks. The stability
-    score is the IoU between the binary masks obtained by thresholding
-    the predicted mask logits at high and low values.
+    Computes the stability score for a batch of masks.
+    The stability score is the IoU between the binary masks obtained by thresholding the predicted mask logits at high
+    and low values.
     """
     # One mask is always contained inside the other.
     # Save memory by preventing unnecessary cast to torch.int64
@@ -60,7 +61,11 @@ def build_all_layer_point_grids(n_per_side: int, n_layers: int, scale_per_layer:
 def generate_crop_boxes(im_size: Tuple[int, ...], n_layers: int,
                         overlap_ratio: float) -> Tuple[List[List[int]], List[int]]:
-    """Generates a list of crop boxes of different sizes. Each layer has (2**i)**2 boxes for the ith layer."""
+    """
+    Generates a list of crop boxes of different sizes.
+    Each layer has (2**i)**2 boxes for the ith layer.
+    """
     crop_boxes, layer_idxs = [], []
     im_h, im_w = im_size
     short_side = min(im_h, im_w)
@@ -145,8 +150,9 @@ def remove_small_regions(mask: np.ndarray, area_thresh: float, mode: str) -> Tup
 def batched_mask_to_box(masks: torch.Tensor) -> torch.Tensor:
     """
-    Calculates boxes in XYXY format around masks. Return [0,0,0,0] for
-    an empty mask. For input shape C1xC2x...xHxW, the output shape is C1xC2x...x4.
+    Calculates boxes in XYXY format around masks.
+    Return [0,0,0,0] for an empty mask. For input shape C1xC2x...xHxW, the output shape is C1xC2x...x4.
     """
     # torch.max below raises an error on empty inputs, just skip in this case
     if torch.numel(masks) == 0:

ultralytics/models/sam/model.py CHANGED Viewed

@@ -1,7 +1,5 @@
 # Ultralytics YOLO 🚀, AGPL-3.0 license
-"""
-SAM model interface
-"""
+"""SAM model interface."""
 from pathlib import Path
@@ -13,16 +11,16 @@ from .predict import Predictor
 class SAM(Model):
-    """
-    SAM model interface.
-    """
+    """SAM model interface."""
     def __init__(self, model='sam_b.pt') -> None:
+        """Initializes the SAM model instance with the specified pre-trained model file."""
         if model and Path(model).suffix not in ('.pt', '.pth'):
             raise NotImplementedError('SAM prediction requires pre-trained *.pt or *.pth model.')
         super().__init__(model=model, task='segment')
     def _load(self, weights: str, task=None):
+        """Loads the provided weights into the SAM model."""
         self.model = build_sam(weights)
     def predict(self, source, stream=False, bboxes=None, points=None, labels=None, **kwargs):
@@ -48,4 +46,5 @@ class SAM(Model):
     @property
     def task_map(self):
+        """Returns a dictionary mapping the 'segment' task to its corresponding 'Predictor'."""
         return {'segment': {'predictor': Predictor}}

ultralytics/models/sam/modules/decoders.py CHANGED Viewed

@@ -98,7 +98,11 @@ class MaskDecoder(nn.Module):
         sparse_prompt_embeddings: torch.Tensor,
         dense_prompt_embeddings: torch.Tensor,
     ) -> Tuple[torch.Tensor, torch.Tensor]:
-        """Predicts masks. See 'forward' for more details."""
+        """
+        Predicts masks.
+        See 'forward' for more details.
+        """
         # Concatenate output tokens
         output_tokens = torch.cat([self.iou_token.weight, self.mask_tokens.weight], dim=0)
         output_tokens = output_tokens.unsqueeze(0).expand(sparse_prompt_embeddings.size(0), -1, -1)

ultralytics/models/sam/modules/encoders.py CHANGED Viewed

@@ -100,6 +100,9 @@ class ImageEncoderViT(nn.Module):
         )
     def forward(self, x: torch.Tensor) -> torch.Tensor:
+        """Processes input through patch embedding, applies positional embedding if present, and passes through blocks
+        and neck.
+        """
         x = self.patch_embed(x)
         if self.pos_embed is not None:
             x = x + self.pos_embed
@@ -157,8 +160,8 @@ class PromptEncoder(nn.Module):
     def get_dense_pe(self) -> torch.Tensor:
         """
-        Returns the positional encoding used to encode point prompts,
-        applied to a dense set of points the shape of the image encoding.
+        Returns the positional encoding used to encode point prompts, applied to a dense set of points the shape of the
+        image encoding.
         Returns:
           torch.Tensor: Positional encoding with shape 1x(embed_dim)x(embedding_h)x(embedding_w)
@@ -204,9 +207,7 @@ class PromptEncoder(nn.Module):
         boxes: Optional[torch.Tensor],
         masks: Optional[torch.Tensor],
     ) -> int:
-        """
-        Gets the batch size of the output given the batch size of the input prompts.
-        """
+        """Gets the batch size of the output given the batch size of the input prompts."""
         if points is not None:
             return points[0].shape[0]
         elif boxes is not None:
@@ -217,6 +218,7 @@ class PromptEncoder(nn.Module):
             return 1
     def _get_device(self) -> torch.device:
+        """Returns the device of the first point embedding's weight tensor."""
         return self.point_embeddings[0].weight.device
     def forward(
@@ -259,11 +261,10 @@ class PromptEncoder(nn.Module):
 class PositionEmbeddingRandom(nn.Module):
-    """
-    Positional encoding using random spatial frequencies.
-    """
+    """Positional encoding using random spatial frequencies."""
     def __init__(self, num_pos_feats: int = 64, scale: Optional[float] = None) -> None:
+        """Initializes a position embedding using random spatial frequencies."""
         super().__init__()
         if scale is None or scale <= 0.0:
             scale = 1.0
@@ -304,7 +305,7 @@ class PositionEmbeddingRandom(nn.Module):
 class Block(nn.Module):
-    """Transformer blocks with support of window attention and residual propagation blocks"""
+    """Transformer blocks with support of window attention and residual propagation blocks."""
     def __init__(
         self,
@@ -351,6 +352,7 @@ class Block(nn.Module):
         self.window_size = window_size
     def forward(self, x: torch.Tensor) -> torch.Tensor:
+        """Executes a forward pass through the transformer block with window attention and non-overlapping windows."""
         shortcut = x
         x = self.norm1(x)
         # Window partition
@@ -404,6 +406,7 @@ class Attention(nn.Module):
             self.rel_pos_w = nn.Parameter(torch.zeros(2 * input_size[1] - 1, head_dim))
     def forward(self, x: torch.Tensor) -> torch.Tensor:
+        """Applies the forward operation including attention, normalization, MLP, and indexing within window limits."""
         B, H, W, _ = x.shape
         # qkv with shape (3, B, nHead, H * W, C)
         qkv = self.qkv(x).reshape(B, H * W, 3, self.num_heads, -1).permute(2, 0, 3, 1, 4)
@@ -448,6 +451,7 @@ def window_unpartition(windows: torch.Tensor, window_size: int, pad_hw: Tuple[in
                        hw: Tuple[int, int]) -> torch.Tensor:
     """
     Window unpartition into original sequences and removing padding.
     Args:
         windows (tensor): input tokens with [B * num_windows, window_size, window_size, C].
         window_size (int): window size.
@@ -540,9 +544,7 @@ def add_decomposed_rel_pos(
 class PatchEmbed(nn.Module):
-    """
-    Image to Patch Embedding.
-    """
+    """Image to Patch Embedding."""
     def __init__(
             self,
@@ -565,4 +567,5 @@ class PatchEmbed(nn.Module):
         self.proj = nn.Conv2d(in_chans, embed_dim, kernel_size=kernel_size, stride=stride, padding=padding)
     def forward(self, x: torch.Tensor) -> torch.Tensor:
+        """Computes patch embedding by applying convolution and transposing resulting tensor."""
         return self.proj(x).permute(0, 2, 3, 1)  # B C H W -> B H W C

ultralytics/models/sam/modules/tiny_encoder.py CHANGED Viewed

@@ -23,6 +23,9 @@ from ultralytics.utils.instance import to_2tuple
 class Conv2d_BN(torch.nn.Sequential):
     def __init__(self, a, b, ks=1, stride=1, pad=0, dilation=1, groups=1, bn_weight_init=1):
+        """Initializes the MBConv model with given input channels, output channels, expansion ratio, activation, and
+        drop path.
+        """
         super().__init__()
         self.add_module('c', torch.nn.Conv2d(a, b, ks, stride, pad, dilation, groups, bias=False))
         bn = torch.nn.BatchNorm2d(b)
@@ -34,6 +37,9 @@ class Conv2d_BN(torch.nn.Sequential):
 class PatchEmbed(nn.Module):
     def __init__(self, in_chans, embed_dim, resolution, activation):
+        """Initialize the PatchMerging class with specified input, output dimensions, resolution and activation
+        function.
+        """
         super().__init__()
         img_size: Tuple[int, int] = to_2tuple(resolution)
         self.patches_resolution = (img_size[0] // 4, img_size[1] // 4)
@@ -48,12 +54,16 @@ class PatchEmbed(nn.Module):
         )
     def forward(self, x):
+        """Runs input tensor 'x' through the PatchMerging model's sequence of operations."""
         return self.seq(x)
 class MBConv(nn.Module):
     def __init__(self, in_chans, out_chans, expand_ratio, activation, drop_path):
+        """Initializes a convolutional layer with specified dimensions, input resolution, depth, and activation
+        function.
+        """
         super().__init__()
         self.in_chans = in_chans
         self.hidden_chans = int(in_chans * expand_ratio)
@@ -73,6 +83,7 @@ class MBConv(nn.Module):
         self.drop_path = nn.Identity()
     def forward(self, x):
+        """Implements the forward pass for the model architecture."""
         shortcut = x
         x = self.conv1(x)
         x = self.act1(x)
@@ -87,6 +98,9 @@ class MBConv(nn.Module):
 class PatchMerging(nn.Module):
     def __init__(self, input_resolution, dim, out_dim, activation):
+        """Initializes the ConvLayer with specific dimension, input resolution, depth, activation, drop path, and other
+        optional parameters.
+        """
         super().__init__()
         self.input_resolution = input_resolution
@@ -99,6 +113,7 @@ class PatchMerging(nn.Module):
         self.conv3 = Conv2d_BN(out_dim, out_dim, 1, 1, 0)
     def forward(self, x):
+        """Applies forward pass on the input utilizing convolution and activation layers, and returns the result."""
         if x.ndim == 3:
             H, W = self.input_resolution
             B = len(x)
@@ -149,6 +164,7 @@ class ConvLayer(nn.Module):
             input_resolution, dim=dim, out_dim=out_dim, activation=activation)
     def forward(self, x):
+        """Processes the input through a series of convolutional layers and returns the activated output."""
         for blk in self.blocks:
             x = checkpoint.checkpoint(blk, x) if self.use_checkpoint else blk(x)
         return x if self.downsample is None else self.downsample(x)
@@ -157,6 +173,7 @@ class ConvLayer(nn.Module):
 class Mlp(nn.Module):
     def __init__(self, in_features, hidden_features=None, out_features=None, act_layer=nn.GELU, drop=0.):
+        """Initializes Attention module with the given parameters including dimension, key_dim, number of heads, etc."""
         super().__init__()
         out_features = out_features or in_features
         hidden_features = hidden_features or in_features
@@ -167,6 +184,7 @@ class Mlp(nn.Module):
         self.drop = nn.Dropout(drop)
     def forward(self, x):
+        """Applies operations on input x and returns modified x, runs downsample if not None."""
         x = self.norm(x)
         x = self.fc1(x)
         x = self.act(x)
@@ -216,6 +234,7 @@ class Attention(torch.nn.Module):
     @torch.no_grad()
     def train(self, mode=True):
+        """Sets the module in training mode and handles attribute 'ab' based on the mode."""
         super().train(mode)
         if mode and hasattr(self, 'ab'):
             del self.ab
@@ -298,6 +317,9 @@ class TinyViTBlock(nn.Module):
         self.local_conv = Conv2d_BN(dim, dim, ks=local_conv_size, stride=1, pad=pad, groups=dim)
     def forward(self, x):
+        """Applies attention-based transformation or padding to input 'x' before passing it through a local
+        convolution.
+        """
         H, W = self.input_resolution
         B, L, C = x.shape
         assert L == H * W, 'input feature has wrong size'
@@ -337,6 +359,9 @@ class TinyViTBlock(nn.Module):
         return x + self.drop_path(self.mlp(x))
     def extra_repr(self) -> str:
+        """Returns a formatted string representing the TinyViTBlock's parameters: dimension, input resolution, number of
+        attentions heads, window size, and MLP ratio.
+        """
         return f'dim={self.dim}, input_resolution={self.input_resolution}, num_heads={self.num_heads}, ' \
                f'window_size={self.window_size}, mlp_ratio={self.mlp_ratio}'
@@ -402,23 +427,28 @@ class BasicLayer(nn.Module):
             input_resolution, dim=dim, out_dim=out_dim, activation=activation)
     def forward(self, x):
+        """Performs forward propagation on the input tensor and returns a normalized tensor."""
         for blk in self.blocks:
             x = checkpoint.checkpoint(blk, x) if self.use_checkpoint else blk(x)
         return x if self.downsample is None else self.downsample(x)
     def extra_repr(self) -> str:
+        """Returns a string representation of the extra_repr function with the layer's parameters."""
         return f'dim={self.dim}, input_resolution={self.input_resolution}, depth={self.depth}'
 class LayerNorm2d(nn.Module):
+    """A PyTorch implementation of Layer Normalization in 2D."""
     def __init__(self, num_channels: int, eps: float = 1e-6) -> None:
+        """Initialize LayerNorm2d with the number of channels and an optional epsilon."""
         super().__init__()
         self.weight = nn.Parameter(torch.ones(num_channels))
         self.bias = nn.Parameter(torch.zeros(num_channels))
         self.eps = eps
     def forward(self, x: torch.Tensor) -> torch.Tensor:
+        """Perform a forward pass, normalizing the input tensor."""
         u = x.mean(1, keepdim=True)
         s = (x - u).pow(2).mean(1, keepdim=True)
         x = (x - u) / torch.sqrt(s + self.eps)
@@ -518,6 +548,7 @@ class TinyViT(nn.Module):
         )
     def set_layer_lr_decay(self, layer_lr_decay):
+        """Sets the learning rate decay for each layer in the TinyViT model."""
         decay_rate = layer_lr_decay
         # layers -> blocks (depth)
@@ -525,6 +556,7 @@ class TinyViT(nn.Module):
         lr_scales = [decay_rate ** (depth - i - 1) for i in range(depth)]
         def _set_lr_scale(m, scale):
+            """Sets the learning rate scale for each layer in the model based on the layer's depth."""
             for p in m.parameters():
                 p.lr_scale = scale
@@ -544,12 +576,14 @@ class TinyViT(nn.Module):
             p.param_name = k
         def _check_lr_scale(m):
+            """Checks if the learning rate scale attribute is present in module's parameters."""
             for p in m.parameters():
                 assert hasattr(p, 'lr_scale'), p.param_name
         self.apply(_check_lr_scale)
     def _init_weights(self, m):
+        """Initializes weights for linear layers and layer normalization in the given module."""
         if isinstance(m, nn.Linear):
             # NOTE: This initialization is needed only for training.
             # trunc_normal_(m.weight, std=.02)
@@ -561,11 +595,12 @@ class TinyViT(nn.Module):
     @torch.jit.ignore
     def no_weight_decay_keywords(self):
+        """Returns a dictionary of parameter names where weight decay should not be applied."""
         return {'attention_biases'}
     def forward_features(self, x):
-        # x: (N, C, H, W)
-        x = self.patch_embed(x)
+        """Runs the input through the model layers and returns the transformed output."""
+        x = self.patch_embed(x)  # x input is (N, C, H, W)
         x = self.layers[0](x)
         start_i = 1
@@ -579,4 +614,5 @@ class TinyViT(nn.Module):
         return self.neck(x)
     def forward(self, x):
+        """Executes a forward pass on the input tensor through the constructed model layers."""
         return self.forward_features(x)

ultralytics/models/sam/modules/transformer.py CHANGED Viewed

@@ -21,8 +21,7 @@ class TwoWayTransformer(nn.Module):
         attention_downsample_rate: int = 2,
     ) -> None:
         """
-        A transformer decoder that attends to an input image using
-        queries whose positional embedding is supplied.
+        A transformer decoder that attends to an input image using queries whose positional embedding is supplied.
         Args:
           depth (int): number of layers in the transformer
@@ -171,8 +170,7 @@ class TwoWayAttentionBlock(nn.Module):
 class Attention(nn.Module):
-    """
-    An attention layer that allows for downscaling the size of the embedding after projection to queries, keys, and
+    """An attention layer that allows for downscaling the size of the embedding after projection to queries, keys, and
     values.
     """

ultralytics/models/sam/predict.py CHANGED Viewed

@@ -19,6 +19,7 @@ from .build import build_sam
 class Predictor(BasePredictor):
     def __init__(self, cfg=DEFAULT_CFG, overrides=None, _callbacks=None):
+        """Initializes the Predictor class with default or provided configuration, overrides, and callbacks."""
         if overrides is None:
             overrides = {}
         overrides.update(dict(task='segment', mode='predict', imgsz=1024))
@@ -34,7 +35,8 @@ class Predictor(BasePredictor):
         self.segment_all = False
     def preprocess(self, im):
-        """Prepares input image before inference.
+        """
+        Prepares input image before inference.
         Args:
             im (torch.Tensor | List(np.ndarray)): BCHW for tensor, [(HWC) x B] for list.
@@ -189,7 +191,8 @@ class Predictor(BasePredictor):
                  stability_score_thresh=0.95,
                  stability_score_offset=0.95,
                  crop_nms_thresh=0.7):
-        """Segment the whole image.
+        """
+        Segment the whole image.
         Args:
             im (torch.Tensor): The preprocessed image, (N, C, H, W).
@@ -360,14 +363,15 @@ class Predictor(BasePredictor):
         self.prompts = prompts
     def reset_image(self):
+        """Resets the image and its features to None."""
         self.im = None
         self.features = None
     @staticmethod
     def remove_small_regions(masks, min_area=0, nms_thresh=0.7):
         """
-        Removes small disconnected regions and holes in masks, then reruns
-        box NMS to remove any new duplicates. Requires open-cv as a dependency.
+        Removes small disconnected regions and holes in masks, then reruns box NMS to remove any new duplicates.
+        Requires open-cv as a dependency.
         Args:
             masks (torch.Tensor): Masks, (N, H, W).

ultralytics 8.0.195__py3-none-any.whl → 8.0.196__py3-none-any.whl

Potentially problematic release.

ultralytics 8.0.195py3-none-any.whl → 8.0.196py3-none-any.whl