ultralytics 8.3.189__py3-none-any.whl → 8.3.191__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (111) hide show
  1. tests/test_cuda.py +6 -5
  2. tests/test_exports.py +1 -6
  3. tests/test_python.py +1 -4
  4. tests/test_solutions.py +1 -1
  5. ultralytics/__init__.py +1 -1
  6. ultralytics/cfg/__init__.py +16 -14
  7. ultralytics/cfg/datasets/VisDrone.yaml +4 -4
  8. ultralytics/data/annotator.py +6 -6
  9. ultralytics/data/augment.py +53 -51
  10. ultralytics/data/base.py +15 -13
  11. ultralytics/data/build.py +7 -4
  12. ultralytics/data/converter.py +9 -10
  13. ultralytics/data/dataset.py +24 -22
  14. ultralytics/data/loaders.py +13 -11
  15. ultralytics/data/split.py +4 -3
  16. ultralytics/data/split_dota.py +14 -12
  17. ultralytics/data/utils.py +31 -25
  18. ultralytics/engine/exporter.py +7 -4
  19. ultralytics/engine/model.py +16 -14
  20. ultralytics/engine/predictor.py +9 -7
  21. ultralytics/engine/results.py +59 -57
  22. ultralytics/engine/trainer.py +7 -0
  23. ultralytics/engine/tuner.py +4 -3
  24. ultralytics/engine/validator.py +3 -1
  25. ultralytics/hub/__init__.py +6 -2
  26. ultralytics/hub/auth.py +2 -2
  27. ultralytics/hub/google/__init__.py +9 -8
  28. ultralytics/hub/session.py +11 -11
  29. ultralytics/hub/utils.py +8 -9
  30. ultralytics/models/fastsam/model.py +8 -6
  31. ultralytics/models/nas/model.py +5 -3
  32. ultralytics/models/rtdetr/train.py +4 -3
  33. ultralytics/models/rtdetr/val.py +6 -4
  34. ultralytics/models/sam/amg.py +13 -10
  35. ultralytics/models/sam/model.py +3 -2
  36. ultralytics/models/sam/modules/blocks.py +21 -21
  37. ultralytics/models/sam/modules/decoders.py +11 -11
  38. ultralytics/models/sam/modules/encoders.py +25 -25
  39. ultralytics/models/sam/modules/memory_attention.py +9 -8
  40. ultralytics/models/sam/modules/sam.py +8 -10
  41. ultralytics/models/sam/modules/tiny_encoder.py +21 -20
  42. ultralytics/models/sam/modules/transformer.py +6 -5
  43. ultralytics/models/sam/modules/utils.py +7 -5
  44. ultralytics/models/sam/predict.py +32 -31
  45. ultralytics/models/utils/loss.py +29 -27
  46. ultralytics/models/utils/ops.py +10 -8
  47. ultralytics/models/yolo/classify/train.py +7 -5
  48. ultralytics/models/yolo/classify/val.py +10 -8
  49. ultralytics/models/yolo/detect/predict.py +3 -3
  50. ultralytics/models/yolo/detect/train.py +8 -6
  51. ultralytics/models/yolo/detect/val.py +23 -21
  52. ultralytics/models/yolo/model.py +14 -14
  53. ultralytics/models/yolo/obb/train.py +5 -3
  54. ultralytics/models/yolo/obb/val.py +13 -10
  55. ultralytics/models/yolo/pose/train.py +7 -5
  56. ultralytics/models/yolo/pose/val.py +11 -9
  57. ultralytics/models/yolo/segment/train.py +4 -5
  58. ultralytics/models/yolo/segment/val.py +12 -10
  59. ultralytics/models/yolo/world/train.py +9 -7
  60. ultralytics/models/yolo/yoloe/train.py +7 -6
  61. ultralytics/models/yolo/yoloe/val.py +10 -8
  62. ultralytics/nn/autobackend.py +40 -52
  63. ultralytics/nn/modules/__init__.py +3 -3
  64. ultralytics/nn/modules/block.py +12 -12
  65. ultralytics/nn/modules/conv.py +4 -3
  66. ultralytics/nn/modules/head.py +46 -38
  67. ultralytics/nn/modules/transformer.py +22 -21
  68. ultralytics/nn/tasks.py +2 -2
  69. ultralytics/nn/text_model.py +6 -5
  70. ultralytics/solutions/analytics.py +7 -5
  71. ultralytics/solutions/config.py +12 -10
  72. ultralytics/solutions/distance_calculation.py +3 -3
  73. ultralytics/solutions/heatmap.py +4 -2
  74. ultralytics/solutions/object_counter.py +5 -3
  75. ultralytics/solutions/parking_management.py +4 -2
  76. ultralytics/solutions/region_counter.py +7 -5
  77. ultralytics/solutions/similarity_search.py +5 -3
  78. ultralytics/solutions/solutions.py +38 -36
  79. ultralytics/solutions/streamlit_inference.py +8 -7
  80. ultralytics/trackers/bot_sort.py +11 -9
  81. ultralytics/trackers/byte_tracker.py +17 -15
  82. ultralytics/trackers/utils/gmc.py +4 -3
  83. ultralytics/utils/__init__.py +27 -77
  84. ultralytics/utils/autobatch.py +3 -2
  85. ultralytics/utils/autodevice.py +10 -10
  86. ultralytics/utils/benchmarks.py +11 -10
  87. ultralytics/utils/callbacks/comet.py +9 -9
  88. ultralytics/utils/callbacks/platform.py +2 -1
  89. ultralytics/utils/checks.py +20 -29
  90. ultralytics/utils/downloads.py +2 -2
  91. ultralytics/utils/export.py +12 -11
  92. ultralytics/utils/files.py +8 -7
  93. ultralytics/utils/git.py +139 -0
  94. ultralytics/utils/instance.py +8 -7
  95. ultralytics/utils/logger.py +7 -6
  96. ultralytics/utils/loss.py +15 -13
  97. ultralytics/utils/metrics.py +62 -62
  98. ultralytics/utils/nms.py +346 -0
  99. ultralytics/utils/ops.py +83 -251
  100. ultralytics/utils/patches.py +6 -4
  101. ultralytics/utils/plotting.py +18 -16
  102. ultralytics/utils/tal.py +1 -1
  103. ultralytics/utils/torch_utils.py +4 -2
  104. ultralytics/utils/tqdm.py +47 -33
  105. ultralytics/utils/triton.py +3 -2
  106. {ultralytics-8.3.189.dist-info → ultralytics-8.3.191.dist-info}/METADATA +1 -1
  107. {ultralytics-8.3.189.dist-info → ultralytics-8.3.191.dist-info}/RECORD +111 -109
  108. {ultralytics-8.3.189.dist-info → ultralytics-8.3.191.dist-info}/WHEEL +0 -0
  109. {ultralytics-8.3.189.dist-info → ultralytics-8.3.191.dist-info}/entry_points.txt +0 -0
  110. {ultralytics-8.3.189.dist-info → ultralytics-8.3.191.dist-info}/licenses/LICENSE +0 -0
  111. {ultralytics-8.3.189.dist-info → ultralytics-8.3.191.dist-info}/top_level.txt +0 -0
@@ -1,7 +1,7 @@
1
1
  # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
2
2
  """Block modules."""
3
3
 
4
- from typing import List, Optional, Tuple
4
+ from __future__ import annotations
5
5
 
6
6
  import torch
7
7
  import torch.nn as nn
@@ -192,7 +192,7 @@ class HGBlock(nn.Module):
192
192
  class SPP(nn.Module):
193
193
  """Spatial Pyramid Pooling (SPP) layer https://arxiv.org/abs/1406.4729."""
194
194
 
195
- def __init__(self, c1: int, c2: int, k: Tuple[int, ...] = (5, 9, 13)):
195
+ def __init__(self, c1: int, c2: int, k: tuple[int, ...] = (5, 9, 13)):
196
196
  """
197
197
  Initialize the SPP layer with input/output channels and pooling kernel sizes.
198
198
 
@@ -471,7 +471,7 @@ class Bottleneck(nn.Module):
471
471
  """Standard bottleneck."""
472
472
 
473
473
  def __init__(
474
- self, c1: int, c2: int, shortcut: bool = True, g: int = 1, k: Tuple[int, int] = (3, 3), e: float = 0.5
474
+ self, c1: int, c2: int, shortcut: bool = True, g: int = 1, k: tuple[int, int] = (3, 3), e: float = 0.5
475
475
  ):
476
476
  """
477
477
  Initialize a standard bottleneck module.
@@ -711,7 +711,7 @@ class ImagePoolingAttn(nn.Module):
711
711
  """ImagePoolingAttn: Enhance the text embeddings with image-aware information."""
712
712
 
713
713
  def __init__(
714
- self, ec: int = 256, ch: Tuple[int, ...] = (), ct: int = 512, nh: int = 8, k: int = 3, scale: bool = False
714
+ self, ec: int = 256, ch: tuple[int, ...] = (), ct: int = 512, nh: int = 8, k: int = 3, scale: bool = False
715
715
  ):
716
716
  """
717
717
  Initialize ImagePoolingAttn module.
@@ -740,7 +740,7 @@ class ImagePoolingAttn(nn.Module):
740
740
  self.hc = ec // nh
741
741
  self.k = k
742
742
 
743
- def forward(self, x: List[torch.Tensor], text: torch.Tensor) -> torch.Tensor:
743
+ def forward(self, x: list[torch.Tensor], text: torch.Tensor) -> torch.Tensor:
744
744
  """
745
745
  Forward pass of ImagePoolingAttn.
746
746
 
@@ -856,7 +856,7 @@ class RepBottleneck(Bottleneck):
856
856
  """Rep bottleneck."""
857
857
 
858
858
  def __init__(
859
- self, c1: int, c2: int, shortcut: bool = True, g: int = 1, k: Tuple[int, int] = (3, 3), e: float = 0.5
859
+ self, c1: int, c2: int, shortcut: bool = True, g: int = 1, k: tuple[int, int] = (3, 3), e: float = 0.5
860
860
  ):
861
861
  """
862
862
  Initialize RepBottleneck.
@@ -1026,7 +1026,7 @@ class SPPELAN(nn.Module):
1026
1026
  class CBLinear(nn.Module):
1027
1027
  """CBLinear."""
1028
1028
 
1029
- def __init__(self, c1: int, c2s: List[int], k: int = 1, s: int = 1, p: Optional[int] = None, g: int = 1):
1029
+ def __init__(self, c1: int, c2s: list[int], k: int = 1, s: int = 1, p: int | None = None, g: int = 1):
1030
1030
  """
1031
1031
  Initialize CBLinear module.
1032
1032
 
@@ -1042,7 +1042,7 @@ class CBLinear(nn.Module):
1042
1042
  self.c2s = c2s
1043
1043
  self.conv = nn.Conv2d(c1, sum(c2s), k, s, autopad(k, p), groups=g, bias=True)
1044
1044
 
1045
- def forward(self, x: torch.Tensor) -> List[torch.Tensor]:
1045
+ def forward(self, x: torch.Tensor) -> list[torch.Tensor]:
1046
1046
  """Forward pass through CBLinear layer."""
1047
1047
  return self.conv(x).split(self.c2s, dim=1)
1048
1048
 
@@ -1050,7 +1050,7 @@ class CBLinear(nn.Module):
1050
1050
  class CBFuse(nn.Module):
1051
1051
  """CBFuse."""
1052
1052
 
1053
- def __init__(self, idx: List[int]):
1053
+ def __init__(self, idx: list[int]):
1054
1054
  """
1055
1055
  Initialize CBFuse module.
1056
1056
 
@@ -1060,7 +1060,7 @@ class CBFuse(nn.Module):
1060
1060
  super().__init__()
1061
1061
  self.idx = idx
1062
1062
 
1063
- def forward(self, xs: List[torch.Tensor]) -> torch.Tensor:
1063
+ def forward(self, xs: list[torch.Tensor]) -> torch.Tensor:
1064
1064
  """
1065
1065
  Forward pass through CBFuse layer.
1066
1066
 
@@ -1974,7 +1974,7 @@ class Residual(nn.Module):
1974
1974
  class SAVPE(nn.Module):
1975
1975
  """Spatial-Aware Visual Prompt Embedding module for feature enhancement."""
1976
1976
 
1977
- def __init__(self, ch: List[int], c3: int, embed: int):
1977
+ def __init__(self, ch: list[int], c3: int, embed: int):
1978
1978
  """
1979
1979
  Initialize SAVPE module with channels, intermediate channels, and embedding dimension.
1980
1980
 
@@ -2002,7 +2002,7 @@ class SAVPE(nn.Module):
2002
2002
  self.cv5 = nn.Conv2d(1, self.c, 3, padding=1)
2003
2003
  self.cv6 = nn.Sequential(Conv(2 * self.c, self.c, 3), nn.Conv2d(self.c, self.c, 3, padding=1))
2004
2004
 
2005
- def forward(self, x: List[torch.Tensor], vp: torch.Tensor) -> torch.Tensor:
2005
+ def forward(self, x: list[torch.Tensor], vp: torch.Tensor) -> torch.Tensor:
2006
2006
  """Process input features and visual prompts to generate enhanced embeddings."""
2007
2007
  y = [self.cv2[i](xi) for i, xi in enumerate(x)]
2008
2008
  y = self.cv4(torch.cat(y, dim=1))
@@ -1,8 +1,9 @@
1
1
  # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
2
2
  """Convolution modules."""
3
3
 
4
+ from __future__ import annotations
5
+
4
6
  import math
5
- from typing import List
6
7
 
7
8
  import numpy as np
8
9
  import torch
@@ -669,7 +670,7 @@ class Concat(nn.Module):
669
670
  super().__init__()
670
671
  self.d = dimension
671
672
 
672
- def forward(self, x: List[torch.Tensor]):
673
+ def forward(self, x: list[torch.Tensor]):
673
674
  """
674
675
  Concatenate input tensors along specified dimension.
675
676
 
@@ -700,7 +701,7 @@ class Index(nn.Module):
700
701
  super().__init__()
701
702
  self.index = index
702
703
 
703
- def forward(self, x: List[torch.Tensor]):
704
+ def forward(self, x: list[torch.Tensor]):
704
705
  """
705
706
  Select and return a particular index from input.
706
707
 
@@ -1,15 +1,17 @@
1
1
  # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
2
2
  """Model head modules."""
3
3
 
4
+ from __future__ import annotations
5
+
4
6
  import copy
5
7
  import math
6
- from typing import List, Optional, Tuple, Union
7
8
 
8
9
  import torch
9
10
  import torch.nn as nn
10
11
  import torch.nn.functional as F
11
12
  from torch.nn.init import constant_, xavier_uniform_
12
13
 
14
+ from ultralytics.utils import NOT_MACOS14
13
15
  from ultralytics.utils.tal import TORCH_1_10, dist2bbox, dist2rbox, make_anchors
14
16
  from ultralytics.utils.torch_utils import fuse_conv_and_bn, smart_inference_mode
15
17
 
@@ -75,7 +77,7 @@ class Detect(nn.Module):
75
77
  legacy = False # backward compatibility for v3/v5/v8/v9 models
76
78
  xyxy = False # xyxy or xywh output
77
79
 
78
- def __init__(self, nc: int = 80, ch: Tuple = ()):
80
+ def __init__(self, nc: int = 80, ch: tuple = ()):
79
81
  """
80
82
  Initialize the YOLO detection layer with specified number of classes and channels.
81
83
 
@@ -111,7 +113,7 @@ class Detect(nn.Module):
111
113
  self.one2one_cv2 = copy.deepcopy(self.cv2)
112
114
  self.one2one_cv3 = copy.deepcopy(self.cv3)
113
115
 
114
- def forward(self, x: List[torch.Tensor]) -> Union[List[torch.Tensor], Tuple]:
116
+ def forward(self, x: list[torch.Tensor]) -> list[torch.Tensor] | tuple:
115
117
  """Concatenate and return predicted bounding boxes and class probabilities."""
116
118
  if self.end2end:
117
119
  return self.forward_end2end(x)
@@ -123,7 +125,7 @@ class Detect(nn.Module):
123
125
  y = self._inference(x)
124
126
  return y if self.export else (y, x)
125
127
 
126
- def forward_end2end(self, x: List[torch.Tensor]) -> Union[dict, Tuple]:
128
+ def forward_end2end(self, x: list[torch.Tensor]) -> dict | tuple:
127
129
  """
128
130
  Perform forward pass of the v10Detect module.
129
131
 
@@ -147,7 +149,7 @@ class Detect(nn.Module):
147
149
  y = self.postprocess(y.permute(0, 2, 1), self.max_det, self.nc)
148
150
  return y if self.export else (y, {"one2many": x, "one2one": one2one})
149
151
 
150
- def _inference(self, x: List[torch.Tensor]) -> torch.Tensor:
152
+ def _inference(self, x: list[torch.Tensor]) -> torch.Tensor:
151
153
  """
152
154
  Decode predicted bounding boxes and class probabilities based on multiple-level feature maps.
153
155
 
@@ -199,7 +201,12 @@ class Detect(nn.Module):
199
201
 
200
202
  def decode_bboxes(self, bboxes: torch.Tensor, anchors: torch.Tensor, xywh: bool = True) -> torch.Tensor:
201
203
  """Decode bounding boxes from predictions."""
202
- return dist2bbox(bboxes, anchors, xywh=xywh and not (self.end2end or self.xyxy), dim=1)
204
+ return dist2bbox(
205
+ bboxes,
206
+ anchors,
207
+ xywh=xywh and not self.end2end and not self.xyxy,
208
+ dim=1,
209
+ )
203
210
 
204
211
  @staticmethod
205
212
  def postprocess(preds: torch.Tensor, max_det: int, nc: int = 80) -> torch.Tensor:
@@ -248,7 +255,7 @@ class Segment(Detect):
248
255
  >>> outputs = segment(x)
249
256
  """
250
257
 
251
- def __init__(self, nc: int = 80, nm: int = 32, npr: int = 256, ch: Tuple = ()):
258
+ def __init__(self, nc: int = 80, nm: int = 32, npr: int = 256, ch: tuple = ()):
252
259
  """
253
260
  Initialize the YOLO model attributes such as the number of masks, prototypes, and the convolution layers.
254
261
 
@@ -266,7 +273,7 @@ class Segment(Detect):
266
273
  c4 = max(ch[0] // 4, self.nm)
267
274
  self.cv4 = nn.ModuleList(nn.Sequential(Conv(x, c4, 3), Conv(c4, c4, 3), nn.Conv2d(c4, self.nm, 1)) for x in ch)
268
275
 
269
- def forward(self, x: List[torch.Tensor]) -> Union[Tuple, List[torch.Tensor]]:
276
+ def forward(self, x: list[torch.Tensor]) -> tuple | list[torch.Tensor]:
270
277
  """Return model outputs and mask coefficients if training, otherwise return outputs and mask coefficients."""
271
278
  p = self.proto(x[0]) # mask protos
272
279
  bs = p.shape[0] # batch size
@@ -300,7 +307,7 @@ class OBB(Detect):
300
307
  >>> outputs = obb(x)
301
308
  """
302
309
 
303
- def __init__(self, nc: int = 80, ne: int = 1, ch: Tuple = ()):
310
+ def __init__(self, nc: int = 80, ne: int = 1, ch: tuple = ()):
304
311
  """
305
312
  Initialize OBB with number of classes `nc` and layer channels `ch`.
306
313
 
@@ -315,7 +322,7 @@ class OBB(Detect):
315
322
  c4 = max(ch[0] // 4, self.ne)
316
323
  self.cv4 = nn.ModuleList(nn.Sequential(Conv(x, c4, 3), Conv(c4, c4, 3), nn.Conv2d(c4, self.ne, 1)) for x in ch)
317
324
 
318
- def forward(self, x: List[torch.Tensor]) -> Union[torch.Tensor, Tuple]:
325
+ def forward(self, x: list[torch.Tensor]) -> torch.Tensor | tuple:
319
326
  """Concatenate and return predicted bounding boxes and class probabilities."""
320
327
  bs = x[0].shape[0] # batch size
321
328
  angle = torch.cat([self.cv4[i](x[i]).view(bs, self.ne, -1) for i in range(self.nl)], 2) # OBB theta logits
@@ -356,7 +363,7 @@ class Pose(Detect):
356
363
  >>> outputs = pose(x)
357
364
  """
358
365
 
359
- def __init__(self, nc: int = 80, kpt_shape: Tuple = (17, 3), ch: Tuple = ()):
366
+ def __init__(self, nc: int = 80, kpt_shape: tuple = (17, 3), ch: tuple = ()):
360
367
  """
361
368
  Initialize YOLO network with default parameters and Convolutional Layers.
362
369
 
@@ -372,7 +379,7 @@ class Pose(Detect):
372
379
  c4 = max(ch[0] // 4, self.nk)
373
380
  self.cv4 = nn.ModuleList(nn.Sequential(Conv(x, c4, 3), Conv(c4, c4, 3), nn.Conv2d(c4, self.nk, 1)) for x in ch)
374
381
 
375
- def forward(self, x: List[torch.Tensor]) -> Union[torch.Tensor, Tuple]:
382
+ def forward(self, x: list[torch.Tensor]) -> torch.Tensor | tuple:
376
383
  """Perform forward pass through YOLO model and return predictions."""
377
384
  bs = x[0].shape[0] # batch size
378
385
  kpt = torch.cat([self.cv4[i](x[i]).view(bs, self.nk, -1) for i in range(self.nl)], -1) # (bs, 17*3, h*w)
@@ -408,7 +415,10 @@ class Pose(Detect):
408
415
  else:
409
416
  y = kpts.clone()
410
417
  if ndim == 3:
411
- y[:, 2::ndim] = y[:, 2::ndim].sigmoid() # sigmoid (WARNING: inplace .sigmoid_() Apple MPS bug)
418
+ if NOT_MACOS14:
419
+ y[:, 2::ndim].sigmoid_()
420
+ else: # Apple macOS14 MPS bug https://github.com/ultralytics/ultralytics/pull/21878
421
+ y[:, 2::ndim] = y[:, 2::ndim].sigmoid()
412
422
  y[:, 0::ndim] = (y[:, 0::ndim] * 2.0 + (self.anchors[0] - 0.5)) * self.strides
413
423
  y[:, 1::ndim] = (y[:, 1::ndim] * 2.0 + (self.anchors[1] - 0.5)) * self.strides
414
424
  return y
@@ -439,7 +449,7 @@ class Classify(nn.Module):
439
449
 
440
450
  export = False # export mode
441
451
 
442
- def __init__(self, c1: int, c2: int, k: int = 1, s: int = 1, p: Optional[int] = None, g: int = 1):
452
+ def __init__(self, c1: int, c2: int, k: int = 1, s: int = 1, p: int | None = None, g: int = 1):
443
453
  """
444
454
  Initialize YOLO classification head to transform input tensor from (b,c1,20,20) to (b,c2) shape.
445
455
 
@@ -458,7 +468,7 @@ class Classify(nn.Module):
458
468
  self.drop = nn.Dropout(p=0.0, inplace=True)
459
469
  self.linear = nn.Linear(c_, c2) # to x(b,c2)
460
470
 
461
- def forward(self, x: Union[List[torch.Tensor], torch.Tensor]) -> Union[torch.Tensor, Tuple]:
471
+ def forward(self, x: list[torch.Tensor] | torch.Tensor) -> torch.Tensor | tuple:
462
472
  """Perform forward pass of the YOLO model on input image data."""
463
473
  if isinstance(x, list):
464
474
  x = torch.cat(x, 1)
@@ -492,7 +502,7 @@ class WorldDetect(Detect):
492
502
  >>> outputs = world_detect(x, text)
493
503
  """
494
504
 
495
- def __init__(self, nc: int = 80, embed: int = 512, with_bn: bool = False, ch: Tuple = ()):
505
+ def __init__(self, nc: int = 80, embed: int = 512, with_bn: bool = False, ch: tuple = ()):
496
506
  """
497
507
  Initialize YOLO detection layer with nc classes and layer channels ch.
498
508
 
@@ -507,7 +517,7 @@ class WorldDetect(Detect):
507
517
  self.cv3 = nn.ModuleList(nn.Sequential(Conv(x, c3, 3), Conv(c3, c3, 3), nn.Conv2d(c3, embed, 1)) for x in ch)
508
518
  self.cv4 = nn.ModuleList(BNContrastiveHead(embed) if with_bn else ContrastiveHead() for _ in ch)
509
519
 
510
- def forward(self, x: List[torch.Tensor], text: torch.Tensor) -> Union[List[torch.Tensor], Tuple]:
520
+ def forward(self, x: list[torch.Tensor], text: torch.Tensor) -> list[torch.Tensor] | tuple:
511
521
  """Concatenate and return predicted bounding boxes and class probabilities."""
512
522
  for i in range(self.nl):
513
523
  x[i] = torch.cat((self.cv2[i](x[i]), self.cv4[i](self.cv3[i](x[i]), text)), 1)
@@ -576,7 +586,7 @@ class LRPCHead(nn.Module):
576
586
  linear.bias.data = conv.bias.data
577
587
  return linear
578
588
 
579
- def forward(self, cls_feat: torch.Tensor, loc_feat: torch.Tensor, conf: float) -> Tuple[Tuple, torch.Tensor]:
589
+ def forward(self, cls_feat: torch.Tensor, loc_feat: torch.Tensor, conf: float) -> tuple[tuple, torch.Tensor]:
580
590
  """Process classification and localization features to generate detection proposals."""
581
591
  if self.enabled:
582
592
  pf_score = self.pf(cls_feat)[0, 0].flatten(0)
@@ -625,7 +635,7 @@ class YOLOEDetect(Detect):
625
635
 
626
636
  is_fused = False
627
637
 
628
- def __init__(self, nc: int = 80, embed: int = 512, with_bn: bool = False, ch: Tuple = ()):
638
+ def __init__(self, nc: int = 80, embed: int = 512, with_bn: bool = False, ch: tuple = ()):
629
639
  """
630
640
  Initialize YOLO detection layer with nc classes and layer channels ch.
631
641
 
@@ -638,7 +648,7 @@ class YOLOEDetect(Detect):
638
648
  super().__init__(nc, ch)
639
649
  c3 = max(ch[0], min(self.nc, 100))
640
650
  assert c3 <= embed
641
- assert with_bn is True
651
+ assert with_bn
642
652
  self.cv3 = (
643
653
  nn.ModuleList(nn.Sequential(Conv(x, c3, 3), Conv(c3, c3, 3), nn.Conv2d(c3, embed, 1)) for x in ch)
644
654
  if self.legacy
@@ -705,11 +715,11 @@ class YOLOEDetect(Detect):
705
715
  self.reprta = nn.Identity()
706
716
  self.is_fused = True
707
717
 
708
- def get_tpe(self, tpe: Optional[torch.Tensor]) -> Optional[torch.Tensor]:
718
+ def get_tpe(self, tpe: torch.Tensor | None) -> torch.Tensor | None:
709
719
  """Get text prompt embeddings with normalization."""
710
720
  return None if tpe is None else F.normalize(self.reprta(tpe), dim=-1, p=2)
711
721
 
712
- def get_vpe(self, x: List[torch.Tensor], vpe: torch.Tensor) -> torch.Tensor:
722
+ def get_vpe(self, x: list[torch.Tensor], vpe: torch.Tensor) -> torch.Tensor:
713
723
  """Get visual prompt embeddings with spatial awareness."""
714
724
  if vpe.shape[1] == 0: # no visual prompt embeddings
715
725
  return torch.zeros(x[0].shape[0], 0, self.embed, device=x[0].device)
@@ -718,7 +728,7 @@ class YOLOEDetect(Detect):
718
728
  assert vpe.ndim == 3 # (B, N, D)
719
729
  return vpe
720
730
 
721
- def forward_lrpc(self, x: List[torch.Tensor], return_mask: bool = False) -> Union[torch.Tensor, Tuple]:
731
+ def forward_lrpc(self, x: list[torch.Tensor], return_mask: bool = False) -> torch.Tensor | tuple:
722
732
  """Process features with fused text embeddings to generate detections for prompt-free model."""
723
733
  masks = []
724
734
  assert self.is_fused, "Prompt-free inference requires model to be fused!"
@@ -756,9 +766,7 @@ class YOLOEDetect(Detect):
756
766
  else:
757
767
  return y if self.export else (y, x)
758
768
 
759
- def forward(
760
- self, x: List[torch.Tensor], cls_pe: torch.Tensor, return_mask: bool = False
761
- ) -> Union[torch.Tensor, Tuple]:
769
+ def forward(self, x: list[torch.Tensor], cls_pe: torch.Tensor, return_mask: bool = False) -> torch.Tensor | tuple:
762
770
  """Process features with class prompt embeddings to generate detections."""
763
771
  if hasattr(self, "lrpc"): # for prompt-free inference
764
772
  return self.forward_lrpc(x, return_mask)
@@ -807,7 +815,7 @@ class YOLOESegment(YOLOEDetect):
807
815
  """
808
816
 
809
817
  def __init__(
810
- self, nc: int = 80, nm: int = 32, npr: int = 256, embed: int = 512, with_bn: bool = False, ch: Tuple = ()
818
+ self, nc: int = 80, nm: int = 32, npr: int = 256, embed: int = 512, with_bn: bool = False, ch: tuple = ()
811
819
  ):
812
820
  """
813
821
  Initialize YOLOESegment with class count, mask parameters, and embedding dimensions.
@@ -828,7 +836,7 @@ class YOLOESegment(YOLOEDetect):
828
836
  c5 = max(ch[0] // 4, self.nm)
829
837
  self.cv5 = nn.ModuleList(nn.Sequential(Conv(x, c5, 3), Conv(c5, c5, 3), nn.Conv2d(c5, self.nm, 1)) for x in ch)
830
838
 
831
- def forward(self, x: List[torch.Tensor], text: torch.Tensor) -> Union[Tuple, torch.Tensor]:
839
+ def forward(self, x: list[torch.Tensor], text: torch.Tensor) -> tuple | torch.Tensor:
832
840
  """Return model outputs and mask coefficients if training, otherwise return outputs and mask coefficients."""
833
841
  p = self.proto(x[0]) # mask protos
834
842
  bs = p.shape[0] # batch size
@@ -896,7 +904,7 @@ class RTDETRDecoder(nn.Module):
896
904
  def __init__(
897
905
  self,
898
906
  nc: int = 80,
899
- ch: Tuple = (512, 1024, 2048),
907
+ ch: tuple = (512, 1024, 2048),
900
908
  hd: int = 256, # hidden dim
901
909
  nq: int = 300, # num queries
902
910
  ndp: int = 4, # num decoder points
@@ -972,7 +980,7 @@ class RTDETRDecoder(nn.Module):
972
980
 
973
981
  self._reset_parameters()
974
982
 
975
- def forward(self, x: List[torch.Tensor], batch: Optional[dict] = None) -> Union[Tuple, torch.Tensor]:
983
+ def forward(self, x: list[torch.Tensor], batch: dict | None = None) -> tuple | torch.Tensor:
976
984
  """
977
985
  Run the forward pass of the module, returning bounding box and classification scores for the input.
978
986
 
@@ -1024,12 +1032,12 @@ class RTDETRDecoder(nn.Module):
1024
1032
 
1025
1033
  def _generate_anchors(
1026
1034
  self,
1027
- shapes: List[List[int]],
1035
+ shapes: list[list[int]],
1028
1036
  grid_size: float = 0.05,
1029
1037
  dtype: torch.dtype = torch.float32,
1030
1038
  device: str = "cpu",
1031
1039
  eps: float = 1e-2,
1032
- ) -> Tuple[torch.Tensor, torch.Tensor]:
1040
+ ) -> tuple[torch.Tensor, torch.Tensor]:
1033
1041
  """
1034
1042
  Generate anchor bounding boxes for given shapes with specific grid size and validate them.
1035
1043
 
@@ -1062,7 +1070,7 @@ class RTDETRDecoder(nn.Module):
1062
1070
  anchors = anchors.masked_fill(~valid_mask, float("inf"))
1063
1071
  return anchors, valid_mask
1064
1072
 
1065
- def _get_encoder_input(self, x: List[torch.Tensor]) -> Tuple[torch.Tensor, List[List[int]]]:
1073
+ def _get_encoder_input(self, x: list[torch.Tensor]) -> tuple[torch.Tensor, list[list[int]]]:
1066
1074
  """
1067
1075
  Process and return encoder inputs by getting projection features from input and concatenating them.
1068
1076
 
@@ -1092,10 +1100,10 @@ class RTDETRDecoder(nn.Module):
1092
1100
  def _get_decoder_input(
1093
1101
  self,
1094
1102
  feats: torch.Tensor,
1095
- shapes: List[List[int]],
1096
- dn_embed: Optional[torch.Tensor] = None,
1097
- dn_bbox: Optional[torch.Tensor] = None,
1098
- ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
1103
+ shapes: list[list[int]],
1104
+ dn_embed: torch.Tensor | None = None,
1105
+ dn_bbox: torch.Tensor | None = None,
1106
+ ) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]:
1099
1107
  """
1100
1108
  Generate and prepare the input required for the decoder from the provided features and shapes.
1101
1109
 
@@ -1200,7 +1208,7 @@ class v10Detect(Detect):
1200
1208
 
1201
1209
  end2end = True
1202
1210
 
1203
- def __init__(self, nc: int = 80, ch: Tuple = ()):
1211
+ def __init__(self, nc: int = 80, ch: tuple = ()):
1204
1212
  """
1205
1213
  Initialize the v10Detect object with the specified number of classes and input channels.
1206
1214
 
@@ -1,8 +1,9 @@
1
1
  # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
2
2
  """Transformer modules."""
3
3
 
4
+ from __future__ import annotations
5
+
4
6
  import math
5
- from typing import List, Optional
6
7
 
7
8
  import torch
8
9
  import torch.nn as nn
@@ -88,16 +89,16 @@ class TransformerEncoderLayer(nn.Module):
88
89
  self.normalize_before = normalize_before
89
90
 
90
91
  @staticmethod
91
- def with_pos_embed(tensor: torch.Tensor, pos: Optional[torch.Tensor] = None) -> torch.Tensor:
92
+ def with_pos_embed(tensor: torch.Tensor, pos: torch.Tensor | None = None) -> torch.Tensor:
92
93
  """Add position embeddings to the tensor if provided."""
93
94
  return tensor if pos is None else tensor + pos
94
95
 
95
96
  def forward_post(
96
97
  self,
97
98
  src: torch.Tensor,
98
- src_mask: Optional[torch.Tensor] = None,
99
- src_key_padding_mask: Optional[torch.Tensor] = None,
100
- pos: Optional[torch.Tensor] = None,
99
+ src_mask: torch.Tensor | None = None,
100
+ src_key_padding_mask: torch.Tensor | None = None,
101
+ pos: torch.Tensor | None = None,
101
102
  ) -> torch.Tensor:
102
103
  """
103
104
  Perform forward pass with post-normalization.
@@ -122,9 +123,9 @@ class TransformerEncoderLayer(nn.Module):
122
123
  def forward_pre(
123
124
  self,
124
125
  src: torch.Tensor,
125
- src_mask: Optional[torch.Tensor] = None,
126
- src_key_padding_mask: Optional[torch.Tensor] = None,
127
- pos: Optional[torch.Tensor] = None,
126
+ src_mask: torch.Tensor | None = None,
127
+ src_key_padding_mask: torch.Tensor | None = None,
128
+ pos: torch.Tensor | None = None,
128
129
  ) -> torch.Tensor:
129
130
  """
130
131
  Perform forward pass with pre-normalization.
@@ -149,9 +150,9 @@ class TransformerEncoderLayer(nn.Module):
149
150
  def forward(
150
151
  self,
151
152
  src: torch.Tensor,
152
- src_mask: Optional[torch.Tensor] = None,
153
- src_key_padding_mask: Optional[torch.Tensor] = None,
154
- pos: Optional[torch.Tensor] = None,
153
+ src_mask: torch.Tensor | None = None,
154
+ src_key_padding_mask: torch.Tensor | None = None,
155
+ pos: torch.Tensor | None = None,
155
156
  ) -> torch.Tensor:
156
157
  """
157
158
  Forward propagate the input through the encoder module.
@@ -533,8 +534,8 @@ class MSDeformAttn(nn.Module):
533
534
  query: torch.Tensor,
534
535
  refer_bbox: torch.Tensor,
535
536
  value: torch.Tensor,
536
- value_shapes: List,
537
- value_mask: Optional[torch.Tensor] = None,
537
+ value_shapes: list,
538
+ value_mask: torch.Tensor | None = None,
538
539
  ) -> torch.Tensor:
539
540
  """
540
541
  Perform forward pass for multiscale deformable attention.
@@ -649,7 +650,7 @@ class DeformableTransformerDecoderLayer(nn.Module):
649
650
  self.norm3 = nn.LayerNorm(d_model)
650
651
 
651
652
  @staticmethod
652
- def with_pos_embed(tensor: torch.Tensor, pos: Optional[torch.Tensor]) -> torch.Tensor:
653
+ def with_pos_embed(tensor: torch.Tensor, pos: torch.Tensor | None) -> torch.Tensor:
653
654
  """Add positional embeddings to the input tensor, if provided."""
654
655
  return tensor if pos is None else tensor + pos
655
656
 
@@ -672,10 +673,10 @@ class DeformableTransformerDecoderLayer(nn.Module):
672
673
  embed: torch.Tensor,
673
674
  refer_bbox: torch.Tensor,
674
675
  feats: torch.Tensor,
675
- shapes: List,
676
- padding_mask: Optional[torch.Tensor] = None,
677
- attn_mask: Optional[torch.Tensor] = None,
678
- query_pos: Optional[torch.Tensor] = None,
676
+ shapes: list,
677
+ padding_mask: torch.Tensor | None = None,
678
+ attn_mask: torch.Tensor | None = None,
679
+ query_pos: torch.Tensor | None = None,
679
680
  ) -> torch.Tensor:
680
681
  """
681
682
  Perform the forward pass through the entire decoder layer.
@@ -749,12 +750,12 @@ class DeformableTransformerDecoder(nn.Module):
749
750
  embed: torch.Tensor, # decoder embeddings
750
751
  refer_bbox: torch.Tensor, # anchor
751
752
  feats: torch.Tensor, # image features
752
- shapes: List, # feature shapes
753
+ shapes: list, # feature shapes
753
754
  bbox_head: nn.Module,
754
755
  score_head: nn.Module,
755
756
  pos_mlp: nn.Module,
756
- attn_mask: Optional[torch.Tensor] = None,
757
- padding_mask: Optional[torch.Tensor] = None,
757
+ attn_mask: torch.Tensor | None = None,
758
+ padding_mask: torch.Tensor | None = None,
758
759
  ):
759
760
  """
760
761
  Perform the forward pass through the entire decoder.
ultralytics/nn/tasks.py CHANGED
@@ -1548,7 +1548,7 @@ def attempt_load_one_weight(weight, device=None, inplace=True, fuse=False):
1548
1548
  """
1549
1549
  ckpt, weight = torch_safe_load(weight) # load ckpt
1550
1550
  args = {**DEFAULT_CFG_DICT, **(ckpt.get("train_args", {}))} # combine model and default args, preferring model args
1551
- model = (ckpt.get("ema") or ckpt["model"]).to(device).float() # FP32 model
1551
+ model = (ckpt.get("ema") or ckpt["model"]).float() # FP32 model
1552
1552
 
1553
1553
  # Model compatibility updates
1554
1554
  model.args = {k: v for k, v in args.items() if k in DEFAULT_CFG_KEYS} # attach args to model
@@ -1557,7 +1557,7 @@ def attempt_load_one_weight(weight, device=None, inplace=True, fuse=False):
1557
1557
  if not hasattr(model, "stride"):
1558
1558
  model.stride = torch.tensor([32.0])
1559
1559
 
1560
- model = model.fuse().eval() if fuse and hasattr(model, "fuse") else model.eval() # model in eval mode
1560
+ model = (model.fuse() if fuse and hasattr(model, "fuse") else model).eval().to(device) # model in eval mode
1561
1561
 
1562
1562
  # Module updates
1563
1563
  for m in model.modules():
@@ -1,8 +1,9 @@
1
1
  # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
2
2
 
3
+ from __future__ import annotations
4
+
3
5
  from abc import abstractmethod
4
6
  from pathlib import Path
5
- from typing import List, Union
6
7
 
7
8
  import torch
8
9
  import torch.nn as nn
@@ -91,7 +92,7 @@ class CLIP(TextModel):
91
92
  self.device = device
92
93
  self.eval()
93
94
 
94
- def tokenize(self, texts: Union[str, List[str]]) -> torch.Tensor:
95
+ def tokenize(self, texts: str | list[str]) -> torch.Tensor:
95
96
  """
96
97
  Convert input texts to CLIP tokens.
97
98
 
@@ -135,7 +136,7 @@ class CLIP(TextModel):
135
136
  return txt_feats
136
137
 
137
138
  @smart_inference_mode()
138
- def encode_image(self, image: Union[Image.Image, torch.Tensor], dtype: torch.dtype = torch.float32) -> torch.Tensor:
139
+ def encode_image(self, image: Image.Image | torch.Tensor, dtype: torch.dtype = torch.float32) -> torch.Tensor:
139
140
  """
140
141
  Encode preprocessed images into normalized feature vectors.
141
142
 
@@ -234,7 +235,7 @@ class MobileCLIP(TextModel):
234
235
  self.device = device
235
236
  self.eval()
236
237
 
237
- def tokenize(self, texts: List[str]) -> torch.Tensor:
238
+ def tokenize(self, texts: list[str]) -> torch.Tensor:
238
239
  """
239
240
  Convert input texts to MobileCLIP tokens.
240
241
 
@@ -319,7 +320,7 @@ class MobileCLIPTS(TextModel):
319
320
  self.tokenizer = clip.clip.tokenize
320
321
  self.device = device
321
322
 
322
- def tokenize(self, texts: List[str]) -> torch.Tensor:
323
+ def tokenize(self, texts: list[str]) -> torch.Tensor:
323
324
  """
324
325
  Convert input texts to MobileCLIP tokens.
325
326
 
@@ -1,7 +1,9 @@
1
1
  # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
2
2
 
3
+ from __future__ import annotations
4
+
3
5
  from itertools import cycle
4
- from typing import Any, Dict, Optional
6
+ from typing import Any
5
7
 
6
8
  import cv2
7
9
  import numpy as np
@@ -135,7 +137,7 @@ class Analytics(BaseSolution):
135
137
  return SolutionResults(plot_im=plot_im, total_tracks=len(self.track_ids), classwise_count=self.clswise_count)
136
138
 
137
139
  def update_graph(
138
- self, frame_number: int, count_dict: Optional[Dict[str, int]] = None, plot: str = "line"
140
+ self, frame_number: int, count_dict: dict[str, int] | None = None, plot: str = "line"
139
141
  ) -> np.ndarray:
140
142
  """
141
143
  Update the graph with new data for single or multiple classes.
@@ -204,7 +206,7 @@ class Analytics(BaseSolution):
204
206
  markersize=self.line_width * 5,
205
207
  label=f"{key} Data Points",
206
208
  )
207
- if plot == "bar":
209
+ elif plot == "bar":
208
210
  self.ax.clear() # clear bar data
209
211
  for label in labels: # Map labels to colors
210
212
  if label not in self.color_mapping:
@@ -224,12 +226,12 @@ class Analytics(BaseSolution):
224
226
  for bar, label in zip(bars, labels):
225
227
  bar.set_label(label) # Assign label to each bar
226
228
  self.ax.legend(loc="upper left", fontsize=13, facecolor=self.fg_color, edgecolor=self.fg_color)
227
- if plot == "pie":
229
+ elif plot == "pie":
228
230
  total = sum(counts)
229
231
  percentages = [size / total * 100 for size in counts]
230
- start_angle = 90
231
232
  self.ax.clear()
232
233
 
234
+ start_angle = 90
233
235
  # Create pie chart and create legend labels with percentages
234
236
  wedges, _ = self.ax.pie(
235
237
  counts, labels=labels, startangle=start_angle, textprops={"color": self.fg_color}, autopct=None