ultralytics 8.2.83__py3-none-any.whl → 8.2.84__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ultralytics might be problematic. Click here for more details.

ultralytics/__init__.py CHANGED
@@ -1,6 +1,6 @@
1
1
  # Ultralytics YOLO 🚀, AGPL-3.0 license
2
2
 
3
- __version__ = "8.2.83"
3
+ __version__ = "8.2.84"
4
4
 
5
5
  import os
6
6
 
@@ -128,8 +128,10 @@ class Model(nn.Module):
128
128
  if self.is_hub_model(model):
129
129
  # Fetch model from HUB
130
130
  checks.check_requirements("hub-sdk>=0.0.8")
131
- self.session = HUBTrainingSession.create_session(model)
132
- model = self.session.model_file
131
+ session = HUBTrainingSession.create_session(model)
132
+ model = session.model_file
133
+ if session.train_args: # training sent from HUB
134
+ self.session = session
133
135
 
134
136
  # Check if Triton Server model
135
137
  elif self.is_triton_model(model):
@@ -50,6 +50,7 @@ class HUBTrainingSession:
50
50
  self.model = None
51
51
  self.model_url = None
52
52
  self.model_file = None
53
+ self.train_args = None
53
54
 
54
55
  # Parse input
55
56
  api_key, model_id, self.filename = self._parse_identifier(identifier)
@@ -106,7 +106,7 @@ class SAM(Model):
106
106
  ... print(f"Detected {len(r.masks)} masks")
107
107
  """
108
108
  overrides = dict(conf=0.25, task="segment", mode="predict", imgsz=1024)
109
- kwargs.update(overrides)
109
+ kwargs = {**overrides, **kwargs}
110
110
  prompts = dict(bboxes=bboxes, points=points, labels=labels)
111
111
  return super().predict(source, stream, prompts=prompts, **kwargs)
112
112
 
@@ -151,7 +151,12 @@ class ImageEncoderViT(nn.Module):
151
151
  """Processes input through patch embedding, positional embedding, transformer blocks, and neck module."""
152
152
  x = self.patch_embed(x)
153
153
  if self.pos_embed is not None:
154
- x = x + self.pos_embed
154
+ pos_embed = (
155
+ F.interpolate(self.pos_embed.permute(0, 3, 1, 2), scale_factor=self.img_size / 1024).permute(0, 2, 3, 1)
156
+ if self.img_size != 1024
157
+ else self.pos_embed
158
+ )
159
+ x = x + pos_embed
155
160
  for blk in self.blocks:
156
161
  x = blk(x)
157
162
  return self.neck(x.permute(0, 3, 1, 2))
@@ -90,6 +90,19 @@ class SAMModel(nn.Module):
90
90
  self.register_buffer("pixel_mean", torch.Tensor(pixel_mean).view(-1, 1, 1), False)
91
91
  self.register_buffer("pixel_std", torch.Tensor(pixel_std).view(-1, 1, 1), False)
92
92
 
93
+ def set_imgsz(self, imgsz):
94
+ """
95
+ Set image size to make model compatible with different image sizes.
96
+
97
+ Args:
98
+ imgsz (Tuple[int, int]): The size of the input image.
99
+ """
100
+ if hasattr(self.image_encoder, "set_imgsz"):
101
+ self.image_encoder.set_imgsz(imgsz)
102
+ self.prompt_encoder.input_image_size = imgsz
103
+ self.prompt_encoder.image_embedding_size = [x // 16 for x in imgsz] # 16 is fixed as patch size of ViT model
104
+ self.image_encoder.img_size = imgsz[0]
105
+
93
106
 
94
107
  class SAM2Model(torch.nn.Module):
95
108
  """
@@ -940,3 +953,14 @@ class SAM2Model(torch.nn.Module):
940
953
  # don't overlap (here sigmoid(-10.0)=4.5398e-05)
941
954
  pred_masks = torch.where(keep, pred_masks, torch.clamp(pred_masks, max=-10.0))
942
955
  return pred_masks
956
+
957
+ def set_imgsz(self, imgsz):
958
+ """
959
+ Set image size to make model compatible with different image sizes.
960
+
961
+ Args:
962
+ imgsz (Tuple[int, int]): The size of the input image.
963
+ """
964
+ self.image_size = imgsz[0]
965
+ self.sam_prompt_encoder.input_image_size = imgsz
966
+ self.sam_prompt_encoder.image_embedding_size = [x // 16 for x in imgsz] # fixed ViT patch size of 16
@@ -982,10 +982,31 @@ class TinyViT(nn.Module):
982
982
  layer = self.layers[i]
983
983
  x = layer(x)
984
984
  batch, _, channel = x.shape
985
- x = x.view(batch, 64, 64, channel)
985
+ x = x.view(batch, self.patches_resolution[0] // 4, self.patches_resolution[1] // 4, channel)
986
986
  x = x.permute(0, 3, 1, 2)
987
987
  return self.neck(x)
988
988
 
989
989
  def forward(self, x):
990
990
  """Performs the forward pass through the TinyViT model, extracting features from the input image."""
991
991
  return self.forward_features(x)
992
+
993
+ def set_imgsz(self, imgsz=[1024, 1024]):
994
+ """
995
+ Set image size to make model compatible with different image sizes.
996
+
997
+ Args:
998
+ imgsz (Tuple[int, int]): The size of the input image.
999
+ """
1000
+ imgsz = [s // 4 for s in imgsz]
1001
+ self.patches_resolution = imgsz
1002
+ for i, layer in enumerate(self.layers):
1003
+ input_resolution = (
1004
+ imgsz[0] // (2 ** (i - 1 if i == 3 else i)),
1005
+ imgsz[1] // (2 ** (i - 1 if i == 3 else i)),
1006
+ )
1007
+ layer.input_resolution = input_resolution
1008
+ if layer.downsample is not None:
1009
+ layer.downsample.input_resolution = input_resolution
1010
+ if isinstance(layer, BasicLayer):
1011
+ for b in layer.blocks:
1012
+ b.input_resolution = input_resolution
@@ -95,7 +95,7 @@ class Predictor(BasePredictor):
95
95
  """
96
96
  if overrides is None:
97
97
  overrides = {}
98
- overrides.update(dict(task="segment", mode="predict", imgsz=1024))
98
+ overrides.update(dict(task="segment", mode="predict"))
99
99
  super().__init__(cfg, overrides, _callbacks)
100
100
  self.args.retina_masks = True
101
101
  self.im = None
@@ -455,8 +455,11 @@ class Predictor(BasePredictor):
455
455
  cls = torch.arange(len(pred_masks), dtype=torch.int32, device=pred_masks.device)
456
456
  pred_bboxes = torch.cat([pred_bboxes, pred_scores[:, None], cls[:, None]], dim=-1)
457
457
 
458
- masks = ops.scale_masks(masks[None].float(), orig_img.shape[:2], padding=False)[0]
459
- masks = masks > self.model.mask_threshold # to bool
458
+ if len(masks) == 0:
459
+ masks = None
460
+ else:
461
+ masks = ops.scale_masks(masks[None].float(), orig_img.shape[:2], padding=False)[0]
462
+ masks = masks > self.model.mask_threshold # to bool
460
463
  results.append(Results(orig_img, path=img_path, names=names, masks=masks, boxes=pred_bboxes))
461
464
  # Reset segment-all mode.
462
465
  self.segment_all = False
@@ -522,6 +525,10 @@ class Predictor(BasePredictor):
522
525
 
523
526
  def get_im_features(self, im):
524
527
  """Extracts image features using the SAM model's image encoder for subsequent mask prediction."""
528
+ assert (
529
+ isinstance(self.imgsz, (tuple, list)) and self.imgsz[0] == self.imgsz[1]
530
+ ), f"SAM models only support square image size, but got {self.imgsz}."
531
+ self.model.set_imgsz(self.imgsz)
525
532
  return self.model.image_encoder(im)
526
533
 
527
534
  def set_prompts(self, prompts):
@@ -761,6 +768,12 @@ class SAM2Predictor(Predictor):
761
768
 
762
769
  def get_im_features(self, im):
763
770
  """Extracts image features from the SAM image encoder for subsequent processing."""
771
+ assert (
772
+ isinstance(self.imgsz, (tuple, list)) and self.imgsz[0] == self.imgsz[1]
773
+ ), f"SAM 2 models only support square image size, but got {self.imgsz}."
774
+ self.model.set_imgsz(self.imgsz)
775
+ self._bb_feat_sizes = [[x // (4 * i) for x in self.imgsz] for i in [1, 2, 4]]
776
+
764
777
  backbone_out = self.model.forward_image(im)
765
778
  _, vision_feats, _, _ = self.model._prepare_backbone_features(backbone_out)
766
779
  if self.model.directly_add_no_mem_embed:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: ultralytics
3
- Version: 8.2.83
3
+ Version: 8.2.84
4
4
  Summary: Ultralytics YOLOv8 for SOTA object detection, multi-object tracking, instance segmentation, pose estimation and image classification.
5
5
  Author: Glenn Jocher, Ayush Chaurasia, Jing Qiu
6
6
  Maintainer: Glenn Jocher, Ayush Chaurasia, Jing Qiu
@@ -8,7 +8,7 @@ tests/test_exports.py,sha256=Uezf3OatpPHlo5qoPw-2kqkZxuMCF9L4XF2riD4vmII,8225
8
8
  tests/test_integrations.py,sha256=xglcfMPjfVh346PV8WTpk6tBxraCXEFJEQyyJMr5tyU,6064
9
9
  tests/test_python.py,sha256=SxBf5GNu7vXQP8QxTlSOzCzcQNN0PLA6EX8M33VDHsU,21927
10
10
  tests/test_solutions.py,sha256=p_2edhl96Ty3jwzSf02Q2m2mTu9skc0Z-eMcUuuXfLg,3300
11
- ultralytics/__init__.py,sha256=GCuzRCtg8kNcfqSooPJeB90ILBmOEWf_LhJirWsy0Z4,694
11
+ ultralytics/__init__.py,sha256=YbOKlDA48wG1Ggkk8XH3ZxSQHQ2ys3jxPb9nb-H0lhk,694
12
12
  ultralytics/assets/bus.jpg,sha256=wCAZxJecGR63Od3ZRERe9Aja1Weayrb9Ug751DS_vGM,137419
13
13
  ultralytics/assets/zidane.jpg,sha256=Ftc4aeMmen1O0A3o6GCDO9FlfBslLpTAw0gnetx7bts,50427
14
14
  ultralytics/cfg/__init__.py,sha256=pkB7wk0pHOA3xzKzMbS-hA0iJoPOWVNnwZJh0LuWh-w,33089
@@ -99,7 +99,7 @@ ultralytics/data/explorer/gui/__init__.py,sha256=mHtJuK4hwF8cuV-VHDc7tp6u6D1gHz2
99
99
  ultralytics/data/explorer/gui/dash.py,sha256=vZ476NaUH4FKU08rAJ1K9WNyKtg0soMyJJxqg176yWc,10498
100
100
  ultralytics/engine/__init__.py,sha256=mHtJuK4hwF8cuV-VHDc7tp6u6D1gHz2Z7JI8grmQDTs,42
101
101
  ultralytics/engine/exporter.py,sha256=R46KseDRaSoPo8NTZX2yybxjJoLP8KCXIgMHNagE314,58888
102
- ultralytics/engine/model.py,sha256=2XsZYBZF3O4LEqRyffTudP31x7KmyV7jXzoIEMu8Mp8,52191
102
+ ultralytics/engine/model.py,sha256=AB9tu7kJW-QiTAp0F_J8KQJ4FijsHXcYBTaVHb7aMrg,52281
103
103
  ultralytics/engine/predictor.py,sha256=TtCOhjCOAz4iUXog8V2h_3VhsKPJM8HmP3i6W_qqdhk,17460
104
104
  ultralytics/engine/results.py,sha256=PgRcz90S7eMwlogqEvax8O1sU3CPA2tEmrAL5kSr6II,70537
105
105
  ultralytics/engine/trainer.py,sha256=ebFsES6KfVlVoCx9xeEpGDtVDumEndTHqojbcs9BzHg,35940
@@ -107,7 +107,7 @@ ultralytics/engine/tuner.py,sha256=gPqDTHH7vRB2O3YyH26m1BjVKbXxuA2XAlPRzTKFZsc,1
107
107
  ultralytics/engine/validator.py,sha256=u7qh9AiHasfhIqwojfjvAY8B2IIhd2MIHRwTxsTP6RY,14586
108
108
  ultralytics/hub/__init__.py,sha256=AM_twjV9ouUmyxh3opoPgTqDpMOd8xIOHsAKdWS2L18,5663
109
109
  ultralytics/hub/auth.py,sha256=kDLakGa2NbzvMAeXc2UdzZ65r0AH-XeM_JfsDY97WGk,5545
110
- ultralytics/hub/session.py,sha256=JgiCJ7lre6wW6B5Xylo_-f7x0_QkAAF-xCbTVl1eank,16857
110
+ ultralytics/hub/session.py,sha256=_5yQNKkeaOnxwBeL85ueCgR-IYnDQ89WuzFNjTNPflU,16888
111
111
  ultralytics/hub/utils.py,sha256=tXfM3QbXBcf4Y6StgHI1pktT4OM7Ic9eF3xiBFHGlhY,9721
112
112
  ultralytics/hub/google/__init__.py,sha256=qyvvpGP-4NAtrn7GLqfqxP_aWuRP1T0OvJYafWKvL2Q,7512
113
113
  ultralytics/models/__init__.py,sha256=TT9iLCL_n9Y80dcUq0Fo-p-GRZCSU2vrWXM3CoMwqqE,265
@@ -128,15 +128,15 @@ ultralytics/models/rtdetr/val.py,sha256=xVjZShZ1AvES97wVekl2q_1g20Pq-IIHhkJdWtxM
128
128
  ultralytics/models/sam/__init__.py,sha256=o4_D6y8YJlOXIK7Lwo9RHnIJJ9xoFNi4zK99QSc1kdM,176
129
129
  ultralytics/models/sam/amg.py,sha256=GrmO_8YfIDt_QkPEMF_WFjPZkhwhf7iwx7ig8JgOUnE,8709
130
130
  ultralytics/models/sam/build.py,sha256=zNQbrgSHUgz1gyXQwLKGTpa6CSEjeaevcP3w1Z1l3mo,12233
131
- ultralytics/models/sam/model.py,sha256=_EAgXA7nLeG_-wnvgG3tM_V4oDKlsHGTWX8El4xomo4,7374
132
- ultralytics/models/sam/predict.py,sha256=CFZriLbrH_ZoxP7DyYESuqw0rIrehu8Hctzr-IKucb0,37744
131
+ ultralytics/models/sam/model.py,sha256=2KFUp8SHiqOgwUjkdqdau0oduJwKQxm4N9GHWjdhUFo,7382
132
+ ultralytics/models/sam/predict.py,sha256=4HOvBp27MvO8ef3gD64wVooNT1P5eMy3Bk8W7ysU57o,38352
133
133
  ultralytics/models/sam/modules/__init__.py,sha256=mHtJuK4hwF8cuV-VHDc7tp6u6D1gHz2Z7JI8grmQDTs,42
134
134
  ultralytics/models/sam/modules/blocks.py,sha256=qXCXMqkQG0fpAvCkA9TrtimfOLDtyJfCx3bDfh3bJUs,45974
135
135
  ultralytics/models/sam/modules/decoders.py,sha256=qDr12mDvDA-VIMI7Q9oIYBG9DQcvxDFpPzyAjyqrcbg,25896
136
- ultralytics/models/sam/modules/encoders.py,sha256=KvQFAtqfGvCAr4kcMXxnJvjwIhaQ0a3Wwp0KhSSG_oA,34615
136
+ ultralytics/models/sam/modules/encoders.py,sha256=vDOv8sdbcWc31aVn7hg-JyLP6CRziPep5FPDG2wxwns,34848
137
137
  ultralytics/models/sam/modules/memory_attention.py,sha256=XilWBnRfH8wZxIoL2-yEk-dRypCsS0Jf_9t8WJxXKg0,9722
138
- ultralytics/models/sam/modules/sam.py,sha256=6GE0E4m1J91QgFeUo0vw-Cz7gxebD1VfIGTNKm_meUI,49558
139
- ultralytics/models/sam/modules/tiny_encoder.py,sha256=04btkm8KfLZBP-nPihFmpO-mNrD2euVFSZtuzc21IZk,40439
138
+ ultralytics/models/sam/modules/sam.py,sha256=0Df9psft2-uShp-WTP1oZT6x5QSE9S0i7XKBdZ4tpfE,50507
139
+ ultralytics/models/sam/modules/tiny_encoder.py,sha256=NyzeFMLnmqwcFQFs-JBM9PCWSsYoYZ_6h59Un1DeDV0,41332
140
140
  ultralytics/models/sam/modules/transformer.py,sha256=oMlns0i_bcEqdcdnDJzeM7er2_yqqdYk4hZd3QbEGWQ,16154
141
141
  ultralytics/models/sam/modules/utils.py,sha256=Y36V6BVy6GeaAvKE8gHmoDIa-f5LjJpmSVwywNkv2yk,12315
142
142
  ultralytics/models/utils/__init__.py,sha256=mHtJuK4hwF8cuV-VHDc7tp6u6D1gHz2Z7JI8grmQDTs,42
@@ -225,9 +225,9 @@ ultralytics/utils/callbacks/neptune.py,sha256=5Z3ua5YBTUS56FH8VQKQG1aaIo9fH8GEyz
225
225
  ultralytics/utils/callbacks/raytune.py,sha256=ODVYzy-CoM4Uge0zjkh3Hnh9nF2M0vhDrSenXnvcizw,705
226
226
  ultralytics/utils/callbacks/tensorboard.py,sha256=0kn4IR10no99UCIheojWRujgybmUHSx5fPI6Vsq6l_g,4135
227
227
  ultralytics/utils/callbacks/wb.py,sha256=9-fjQIdLjr3b73DTE3rHO171KvbH1VweJ-bmbv-rqTw,6747
228
- ultralytics-8.2.83.dist-info/LICENSE,sha256=DZak_2itbUtvHzD3E7GNUYSRK6jdOJ-GqncQ2weavLA,34523
229
- ultralytics-8.2.83.dist-info/METADATA,sha256=bcRaFabp1etZymVCQyCNkWtJeVceTpSGXSOmywZaBiA,41778
230
- ultralytics-8.2.83.dist-info/WHEEL,sha256=UvcQYKBHoFqaQd6LKyqHw9fxEolWLQnlzP0h_LgJAfI,91
231
- ultralytics-8.2.83.dist-info/entry_points.txt,sha256=YM_wiKyTe9yRrsEfqvYolNO5ngwfoL4-NwgKzc8_7sI,93
232
- ultralytics-8.2.83.dist-info/top_level.txt,sha256=XP49TwiMw4QGsvTLSYiJhz1xF_k7ev5mQ8jJXaXi45Q,12
233
- ultralytics-8.2.83.dist-info/RECORD,,
228
+ ultralytics-8.2.84.dist-info/LICENSE,sha256=DZak_2itbUtvHzD3E7GNUYSRK6jdOJ-GqncQ2weavLA,34523
229
+ ultralytics-8.2.84.dist-info/METADATA,sha256=NPkRQek_u6FUwJA7J-GnbT4OSmqxRJYZTR6CIWVTE24,41778
230
+ ultralytics-8.2.84.dist-info/WHEEL,sha256=UvcQYKBHoFqaQd6LKyqHw9fxEolWLQnlzP0h_LgJAfI,91
231
+ ultralytics-8.2.84.dist-info/entry_points.txt,sha256=YM_wiKyTe9yRrsEfqvYolNO5ngwfoL4-NwgKzc8_7sI,93
232
+ ultralytics-8.2.84.dist-info/top_level.txt,sha256=XP49TwiMw4QGsvTLSYiJhz1xF_k7ev5mQ8jJXaXi45Q,12
233
+ ultralytics-8.2.84.dist-info/RECORD,,