ultralytics 8.3.37__py3-none-any.whl → 8.3.39__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
ultralytics/__init__.py CHANGED
@@ -1,6 +1,6 @@
1
1
  # Ultralytics YOLO 🚀, AGPL-3.0 license
2
2
 
3
- __version__ = "8.3.37"
3
+ __version__ = "8.3.39"
4
4
 
5
5
  import os
6
6
 
@@ -11,7 +11,6 @@ import cv2
11
11
 
12
12
  from ultralytics.utils import (
13
13
  ASSETS,
14
- ASSETS_URL,
15
14
  DEFAULT_CFG,
16
15
  DEFAULT_CFG_DICT,
17
16
  DEFAULT_CFG_PATH,
@@ -160,7 +159,6 @@ CFG_FRACTION_KEYS = { # fractional float arguments with 0.0<=values<=1.0
160
159
  "weight_decay",
161
160
  "warmup_momentum",
162
161
  "warmup_bias_lr",
163
- "label_smoothing",
164
162
  "hsv_h",
165
163
  "hsv_s",
166
164
  "hsv_v",
@@ -436,6 +434,9 @@ def _handle_deprecation(custom):
436
434
  if key == "line_thickness":
437
435
  deprecation_warn(key, "line_width")
438
436
  custom["line_width"] = custom.pop("line_thickness")
437
+ if key == "label_smoothing":
438
+ deprecation_warn(key)
439
+ custom.pop("label_smoothing")
439
440
 
440
441
  return custom
441
442
 
@@ -738,9 +739,8 @@ def parse_key_value_pair(pair: str = "key=value"):
738
739
  pair (str): A string containing a key-value pair in the format "key=value".
739
740
 
740
741
  Returns:
741
- (tuple): A tuple containing two elements:
742
- - key (str): The parsed key.
743
- - value (str): The parsed value.
742
+ key (str): The parsed key.
743
+ value (str): The parsed value.
744
744
 
745
745
  Raises:
746
746
  AssertionError: If the value is missing or empty.
@@ -99,7 +99,6 @@ cls: 0.5 # (float) cls loss gain (scale with pixels)
99
99
  dfl: 1.5 # (float) dfl loss gain
100
100
  pose: 12.0 # (float) pose loss gain
101
101
  kobj: 1.0 # (float) keypoint obj loss gain
102
- label_smoothing: 0.0 # (float) label smoothing (fraction)
103
102
  nbs: 64 # (int) nominal batch size
104
103
  hsv_h: 0.015 # (float) image HSV-Hue augmentation (fraction)
105
104
  hsv_s: 0.7 # (float) image HSV-Saturation augmentation (fraction)
@@ -2111,10 +2111,9 @@ class Format:
2111
2111
  h (int): Height of the image.
2112
2112
 
2113
2113
  Returns:
2114
- (tuple): Tuple containing:
2115
- masks (numpy.ndarray): Bitmap masks with shape (N, H, W) or (1, H, W) if mask_overlap is True.
2116
- instances (Instances): Updated instances object with sorted segments if mask_overlap is True.
2117
- cls (numpy.ndarray): Updated class labels, sorted if mask_overlap is True.
2114
+ masks (numpy.ndarray): Bitmap masks with shape (N, H, W) or (1, H, W) if mask_overlap is True.
2115
+ instances (Instances): Updated instances object with sorted segments if mask_overlap is True.
2116
+ cls (numpy.ndarray): Updated class labels, sorted if mask_overlap is True.
2118
2117
 
2119
2118
  Notes:
2120
2119
  - If self.mask_overlap is True, masks are overlapped and sorted by area.
@@ -354,7 +354,7 @@ class LoadImagesAndVideos:
354
354
  self.nf = ni + nv # number of files
355
355
  self.ni = ni # number of images
356
356
  self.video_flag = [False] * ni + [True] * nv
357
- self.mode = "image"
357
+ self.mode = "video" if ni == 0 else "image" # default to video if no images
358
358
  self.vid_stride = vid_stride # video frame-rate stride
359
359
  self.bs = batch
360
360
  if any(videos):
@@ -220,6 +220,7 @@ class Exporter:
220
220
  self.args.device = "0"
221
221
  if fmt == "engine" and "dla" in str(self.args.device): # convert int/list to str first
222
222
  dla = self.args.device.split(":")[-1]
223
+ self.args.device = "0" # update device to "0"
223
224
  assert dla in {"0", "1"}, f"Expected self.args.device='dla:0' or 'dla:1, but got {self.args.device}."
224
225
  self.device = select_device("cpu" if self.args.device is None else self.args.device)
225
226
 
@@ -144,6 +144,9 @@ class Model(nn.Module):
144
144
  else:
145
145
  self._load(model, task=task)
146
146
 
147
+ # Delete super().training for accessing self.model.training
148
+ del self.training
149
+
147
150
  def __call__(
148
151
  self,
149
152
  source: Union[str, Path, int, Image.Image, list, tuple, np.ndarray, torch.Tensor] = None,
@@ -1143,3 +1146,29 @@ class Model(nn.Module):
1143
1146
  """
1144
1147
  self.model.eval()
1145
1148
  return self
1149
+
1150
+ def __getattr__(self, name):
1151
+ """
1152
+ Enables accessing model attributes directly through the Model class.
1153
+
1154
+ This method provides a way to access attributes of the underlying model directly through the Model class
1155
+ instance. It first checks if the requested attribute is 'model', in which case it returns the model from
1156
+ the module dictionary. Otherwise, it delegates the attribute lookup to the underlying model.
1157
+
1158
+ Args:
1159
+ name (str): The name of the attribute to retrieve.
1160
+
1161
+ Returns:
1162
+ (Any): The requested attribute value.
1163
+
1164
+ Raises:
1165
+ AttributeError: If the requested attribute does not exist in the model.
1166
+
1167
+ Examples:
1168
+ >>> model = YOLO("yolo11n.pt")
1169
+ >>> print(model.stride)
1170
+ >>> print(model.task)
1171
+ """
1172
+ if name == "model":
1173
+ return self._modules["model"]
1174
+ return getattr(self.model, name)
@@ -1,6 +1,6 @@
1
1
  # Ultralytics YOLO 🚀, AGPL-3.0 license
2
2
 
3
3
  from .model import SAM
4
- from .predict import Predictor, SAM2Predictor
4
+ from .predict import Predictor, SAM2Predictor, SAM2VideoPredictor
5
5
 
6
- __all__ = "SAM", "Predictor", "SAM2Predictor" # tuple or list
6
+ __all__ = "SAM", "Predictor", "SAM2Predictor", "SAM2VideoPredictor" # tuple or list
@@ -148,7 +148,7 @@ class SAM(Model):
148
148
  verbose (bool): If True, prints the information to the console.
149
149
 
150
150
  Returns:
151
- (Tuple): A tuple containing the model's information (string representations of the model).
151
+ (tuple): A tuple containing the model's information (string representations of the model).
152
152
 
153
153
  Examples:
154
154
  >>> sam = SAM("sam_b.pt")
@@ -36,8 +36,6 @@ class SAMModel(nn.Module):
36
36
  image_encoder (ImageEncoderViT): Backbone for encoding images into embeddings.
37
37
  prompt_encoder (PromptEncoder): Encoder for various types of input prompts.
38
38
  mask_decoder (MaskDecoder): Predicts object masks from image and prompt embeddings.
39
- pixel_mean (torch.Tensor): Mean pixel values for image normalization, shape (3, 1, 1).
40
- pixel_std (torch.Tensor): Standard deviation values for image normalization, shape (3, 1, 1).
41
39
 
42
40
  Methods:
43
41
  __init__: Initializes the SAMModel with encoders, decoder, and normalization parameters.
@@ -349,8 +347,7 @@ class SAM2Model(torch.nn.Module):
349
347
  self.sam_prompt_embed_dim = self.hidden_dim
350
348
  self.sam_image_embedding_size = self.image_size // self.backbone_stride
351
349
 
352
- # build PromptEncoder and MaskDecoder from SAM
353
- # (their hyperparameters like `mask_in_chans=16` are from SAM code)
350
+ # Build PromptEncoder and MaskDecoder from SAM (hyperparameters like `mask_in_chans=16` are from SAM code)
354
351
  self.sam_prompt_encoder = PromptEncoder(
355
352
  embed_dim=self.sam_prompt_embed_dim,
356
353
  image_embedding_size=(
@@ -425,8 +422,8 @@ class SAM2Model(torch.nn.Module):
425
422
  low_res_multimasks: Tensor of shape (B, M, H*4, W*4) with SAM output mask logits.
426
423
  high_res_multimasks: Tensor of shape (B, M, H*16, W*16) with upsampled mask logits.
427
424
  ious: Tensor of shape (B, M) with estimated IoU for each output mask.
428
- low_res_masks: Tensor of shape (B, 1, H*4, W*4) with best low-resolution mask.
429
- high_res_masks: Tensor of shape (B, 1, H*16, W*16) with best high-resolution mask.
425
+ low_res_masks: Tensor of shape (B, 1, H*4, W*4) with the best low-resolution mask.
426
+ high_res_masks: Tensor of shape (B, 1, H*16, W*16) with the best high-resolution mask.
430
427
  obj_ptr: Tensor of shape (B, C) with object pointer vector for the output mask.
431
428
  object_score_logits: Tensor of shape (B,) with object score logits.
432
429
 
@@ -488,12 +485,7 @@ class SAM2Model(torch.nn.Module):
488
485
  boxes=None,
489
486
  masks=sam_mask_prompt,
490
487
  )
491
- (
492
- low_res_multimasks,
493
- ious,
494
- sam_output_tokens,
495
- object_score_logits,
496
- ) = self.sam_mask_decoder(
488
+ low_res_multimasks, ious, sam_output_tokens, object_score_logits = self.sam_mask_decoder(
497
489
  image_embeddings=backbone_features,
498
490
  image_pe=self.sam_prompt_encoder.get_dense_pe(),
499
491
  sparse_prompt_embeddings=sparse_embeddings,
@@ -505,13 +497,8 @@ class SAM2Model(torch.nn.Module):
505
497
  if self.pred_obj_scores:
506
498
  is_obj_appearing = object_score_logits > 0
507
499
 
508
- # Mask used for spatial memories is always a *hard* choice between obj and no obj,
509
- # consistent with the actual mask prediction
510
- low_res_multimasks = torch.where(
511
- is_obj_appearing[:, None, None],
512
- low_res_multimasks,
513
- NO_OBJ_SCORE,
514
- )
500
+ # Spatial memory mask is a *hard* choice between obj and no obj, consistent with actual mask prediction
501
+ low_res_multimasks = torch.where(is_obj_appearing[:, None, None], low_res_multimasks, NO_OBJ_SCORE)
515
502
 
516
503
  # convert masks from possibly bfloat16 (or float16) to float32
517
504
  # (older PyTorch versions before 2.1 don't support `interpolate` on bf16)
@@ -617,7 +604,6 @@ class SAM2Model(torch.nn.Module):
617
604
 
618
605
  def _prepare_backbone_features(self, backbone_out):
619
606
  """Prepares and flattens visual features from the image backbone output for further processing."""
620
- backbone_out = backbone_out.copy()
621
607
  assert len(backbone_out["backbone_fpn"]) == len(backbone_out["vision_pos_enc"])
622
608
  assert len(backbone_out["backbone_fpn"]) >= self.num_feature_levels
623
609
 
@@ -826,11 +812,7 @@ class SAM2Model(torch.nn.Module):
826
812
  mask_for_mem = mask_for_mem * self.sigmoid_scale_for_mem_enc
827
813
  if self.sigmoid_bias_for_mem_enc != 0.0:
828
814
  mask_for_mem = mask_for_mem + self.sigmoid_bias_for_mem_enc
829
- maskmem_out = self.memory_encoder(
830
- pix_feat,
831
- mask_for_mem,
832
- skip_mask_sigmoid=True, # sigmoid already applied
833
- )
815
+ maskmem_out = self.memory_encoder(pix_feat, mask_for_mem, skip_mask_sigmoid=True) # sigmoid already applied
834
816
  maskmem_features = maskmem_out["vision_features"]
835
817
  maskmem_pos_enc = maskmem_out["vision_pos_enc"]
836
818
  # add a no-object embedding to the spatial memory to indicate that the frame
@@ -965,16 +947,7 @@ class SAM2Model(torch.nn.Module):
965
947
  track_in_reverse,
966
948
  prev_sam_mask_logits,
967
949
  )
968
-
969
- (
970
- _,
971
- _,
972
- _,
973
- low_res_masks,
974
- high_res_masks,
975
- obj_ptr,
976
- object_score_logits,
977
- ) = sam_outputs
950
+ _, _, _, low_res_masks, high_res_masks, obj_ptr, object_score_logits = sam_outputs
978
951
 
979
952
  current_out["pred_masks"] = low_res_masks
980
953
  current_out["pred_masks_high_res"] = high_res_masks
@@ -984,8 +957,7 @@ class SAM2Model(torch.nn.Module):
984
957
  # it's mainly used in the demo to encode spatial memories w/ consolidated masks)
985
958
  current_out["object_score_logits"] = object_score_logits
986
959
 
987
- # Finally run the memory encoder on the predicted mask to encode
988
- # it into a new memory feature (that can be used in future frames)
960
+ # Run memory encoder on the predicted mask to encode it into a new memory feature (for use in future frames)
989
961
  self._encode_memory_in_output(
990
962
  current_vision_feats,
991
963
  feat_sizes,
@@ -1007,8 +979,9 @@ class SAM2Model(torch.nn.Module):
1007
979
  and (self.multimask_min_pt_num <= num_pts <= self.multimask_max_pt_num)
1008
980
  )
1009
981
 
1010
- def _apply_non_overlapping_constraints(self, pred_masks):
1011
- """Applies non-overlapping constraints to masks, keeping highest scoring object per location."""
982
+ @staticmethod
983
+ def _apply_non_overlapping_constraints(pred_masks):
984
+ """Applies non-overlapping constraints to masks, keeping the highest scoring object per location."""
1012
985
  batch_size = pred_masks.size(0)
1013
986
  if batch_size == 1:
1014
987
  return pred_masks
@@ -1024,6 +997,10 @@ class SAM2Model(torch.nn.Module):
1024
997
  pred_masks = torch.where(keep, pred_masks, torch.clamp(pred_masks, max=-10.0))
1025
998
  return pred_masks
1026
999
 
1000
+ def set_binarize(self, binarize=False):
1001
+ """Set binarize for VideoPredictor."""
1002
+ self.binarize_mask_from_pts_for_mem_enc = binarize
1003
+
1027
1004
  def set_imgsz(self, imgsz):
1028
1005
  """
1029
1006
  Set image size to make model compatible with different image sizes.