ultralytics 8.3.143__py3-none-any.whl → 8.3.145__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (148) hide show
  1. tests/conftest.py +7 -24
  2. tests/test_cli.py +1 -1
  3. tests/test_cuda.py +7 -2
  4. tests/test_engine.py +7 -8
  5. tests/test_exports.py +16 -16
  6. tests/test_integrations.py +1 -1
  7. tests/test_solutions.py +11 -11
  8. ultralytics/__init__.py +1 -1
  9. ultralytics/cfg/__init__.py +16 -13
  10. ultralytics/data/annotator.py +6 -5
  11. ultralytics/data/augment.py +127 -126
  12. ultralytics/data/base.py +54 -51
  13. ultralytics/data/build.py +47 -23
  14. ultralytics/data/converter.py +47 -43
  15. ultralytics/data/dataset.py +51 -50
  16. ultralytics/data/loaders.py +77 -44
  17. ultralytics/data/split.py +22 -9
  18. ultralytics/data/split_dota.py +63 -39
  19. ultralytics/data/utils.py +59 -39
  20. ultralytics/engine/exporter.py +79 -27
  21. ultralytics/engine/model.py +52 -51
  22. ultralytics/engine/predictor.py +37 -28
  23. ultralytics/engine/results.py +191 -161
  24. ultralytics/engine/trainer.py +36 -19
  25. ultralytics/engine/tuner.py +12 -9
  26. ultralytics/engine/validator.py +7 -9
  27. ultralytics/hub/__init__.py +11 -13
  28. ultralytics/hub/auth.py +22 -2
  29. ultralytics/hub/google/__init__.py +19 -19
  30. ultralytics/hub/session.py +37 -51
  31. ultralytics/hub/utils.py +19 -5
  32. ultralytics/models/fastsam/model.py +30 -12
  33. ultralytics/models/fastsam/predict.py +5 -6
  34. ultralytics/models/fastsam/utils.py +3 -3
  35. ultralytics/models/fastsam/val.py +10 -6
  36. ultralytics/models/nas/model.py +9 -5
  37. ultralytics/models/nas/predict.py +6 -6
  38. ultralytics/models/nas/val.py +3 -3
  39. ultralytics/models/rtdetr/model.py +7 -6
  40. ultralytics/models/rtdetr/predict.py +14 -7
  41. ultralytics/models/rtdetr/train.py +10 -4
  42. ultralytics/models/rtdetr/val.py +36 -9
  43. ultralytics/models/sam/amg.py +30 -12
  44. ultralytics/models/sam/build.py +22 -22
  45. ultralytics/models/sam/model.py +10 -9
  46. ultralytics/models/sam/modules/blocks.py +76 -80
  47. ultralytics/models/sam/modules/decoders.py +6 -8
  48. ultralytics/models/sam/modules/encoders.py +23 -26
  49. ultralytics/models/sam/modules/memory_attention.py +13 -1
  50. ultralytics/models/sam/modules/sam.py +57 -26
  51. ultralytics/models/sam/modules/tiny_encoder.py +232 -237
  52. ultralytics/models/sam/modules/transformer.py +13 -13
  53. ultralytics/models/sam/modules/utils.py +11 -19
  54. ultralytics/models/sam/predict.py +114 -101
  55. ultralytics/models/utils/loss.py +98 -77
  56. ultralytics/models/utils/ops.py +116 -67
  57. ultralytics/models/yolo/classify/predict.py +5 -5
  58. ultralytics/models/yolo/classify/train.py +32 -28
  59. ultralytics/models/yolo/classify/val.py +7 -8
  60. ultralytics/models/yolo/detect/predict.py +1 -0
  61. ultralytics/models/yolo/detect/train.py +15 -14
  62. ultralytics/models/yolo/detect/val.py +37 -36
  63. ultralytics/models/yolo/model.py +106 -23
  64. ultralytics/models/yolo/obb/predict.py +3 -4
  65. ultralytics/models/yolo/obb/train.py +14 -6
  66. ultralytics/models/yolo/obb/val.py +29 -23
  67. ultralytics/models/yolo/pose/predict.py +9 -8
  68. ultralytics/models/yolo/pose/train.py +24 -16
  69. ultralytics/models/yolo/pose/val.py +44 -26
  70. ultralytics/models/yolo/segment/predict.py +5 -5
  71. ultralytics/models/yolo/segment/train.py +11 -7
  72. ultralytics/models/yolo/segment/val.py +2 -2
  73. ultralytics/models/yolo/world/train.py +33 -23
  74. ultralytics/models/yolo/world/train_world.py +11 -3
  75. ultralytics/models/yolo/yoloe/predict.py +11 -11
  76. ultralytics/models/yolo/yoloe/train.py +73 -21
  77. ultralytics/models/yolo/yoloe/train_seg.py +10 -7
  78. ultralytics/models/yolo/yoloe/val.py +42 -18
  79. ultralytics/nn/autobackend.py +59 -15
  80. ultralytics/nn/modules/__init__.py +4 -4
  81. ultralytics/nn/modules/activation.py +4 -1
  82. ultralytics/nn/modules/block.py +178 -111
  83. ultralytics/nn/modules/conv.py +6 -5
  84. ultralytics/nn/modules/head.py +469 -121
  85. ultralytics/nn/modules/transformer.py +147 -58
  86. ultralytics/nn/tasks.py +227 -20
  87. ultralytics/nn/text_model.py +30 -33
  88. ultralytics/solutions/ai_gym.py +4 -6
  89. ultralytics/solutions/analytics.py +7 -4
  90. ultralytics/solutions/config.py +10 -10
  91. ultralytics/solutions/distance_calculation.py +11 -10
  92. ultralytics/solutions/heatmap.py +2 -2
  93. ultralytics/solutions/instance_segmentation.py +7 -4
  94. ultralytics/solutions/object_blurrer.py +3 -3
  95. ultralytics/solutions/object_counter.py +15 -11
  96. ultralytics/solutions/object_cropper.py +3 -2
  97. ultralytics/solutions/parking_management.py +29 -28
  98. ultralytics/solutions/queue_management.py +6 -6
  99. ultralytics/solutions/region_counter.py +10 -3
  100. ultralytics/solutions/security_alarm.py +3 -3
  101. ultralytics/solutions/similarity_search.py +85 -24
  102. ultralytics/solutions/solutions.py +189 -79
  103. ultralytics/solutions/speed_estimation.py +28 -22
  104. ultralytics/solutions/streamlit_inference.py +17 -12
  105. ultralytics/solutions/trackzone.py +4 -4
  106. ultralytics/trackers/basetrack.py +16 -23
  107. ultralytics/trackers/bot_sort.py +30 -20
  108. ultralytics/trackers/byte_tracker.py +70 -64
  109. ultralytics/trackers/track.py +4 -8
  110. ultralytics/trackers/utils/gmc.py +31 -58
  111. ultralytics/trackers/utils/kalman_filter.py +37 -37
  112. ultralytics/trackers/utils/matching.py +1 -1
  113. ultralytics/utils/__init__.py +105 -89
  114. ultralytics/utils/autobatch.py +16 -3
  115. ultralytics/utils/autodevice.py +54 -24
  116. ultralytics/utils/benchmarks.py +45 -29
  117. ultralytics/utils/callbacks/base.py +3 -3
  118. ultralytics/utils/callbacks/clearml.py +9 -9
  119. ultralytics/utils/callbacks/comet.py +67 -25
  120. ultralytics/utils/callbacks/dvc.py +7 -10
  121. ultralytics/utils/callbacks/mlflow.py +2 -5
  122. ultralytics/utils/callbacks/neptune.py +7 -13
  123. ultralytics/utils/callbacks/raytune.py +1 -1
  124. ultralytics/utils/callbacks/tensorboard.py +5 -6
  125. ultralytics/utils/callbacks/wb.py +14 -14
  126. ultralytics/utils/checks.py +14 -13
  127. ultralytics/utils/dist.py +5 -5
  128. ultralytics/utils/downloads.py +94 -67
  129. ultralytics/utils/errors.py +5 -5
  130. ultralytics/utils/export.py +61 -47
  131. ultralytics/utils/files.py +23 -22
  132. ultralytics/utils/instance.py +48 -52
  133. ultralytics/utils/loss.py +78 -40
  134. ultralytics/utils/metrics.py +186 -130
  135. ultralytics/utils/ops.py +186 -190
  136. ultralytics/utils/patches.py +15 -17
  137. ultralytics/utils/plotting.py +71 -27
  138. ultralytics/utils/tal.py +21 -15
  139. ultralytics/utils/torch_utils.py +53 -50
  140. ultralytics/utils/triton.py +5 -4
  141. ultralytics/utils/tuner.py +5 -5
  142. {ultralytics-8.3.143.dist-info → ultralytics-8.3.145.dist-info}/METADATA +2 -2
  143. ultralytics-8.3.145.dist-info/RECORD +272 -0
  144. ultralytics-8.3.143.dist-info/RECORD +0 -272
  145. {ultralytics-8.3.143.dist-info → ultralytics-8.3.145.dist-info}/WHEEL +0 -0
  146. {ultralytics-8.3.143.dist-info → ultralytics-8.3.145.dist-info}/entry_points.txt +0 -0
  147. {ultralytics-8.3.143.dist-info → ultralytics-8.3.145.dist-info}/licenses/LICENSE +0 -0
  148. {ultralytics-8.3.143.dist-info → ultralytics-8.3.145.dist-info}/top_level.txt +0 -0
@@ -1,6 +1,8 @@
1
1
  # Ultralytics 🚀 AGPL-3.0 License - https://ultralytics.com/license
2
2
  """Block modules."""
3
3
 
4
+ from typing import List, Optional, Tuple
5
+
4
6
  import torch
5
7
  import torch.nn as nn
6
8
  import torch.nn.functional as F
@@ -60,15 +62,20 @@ class DFL(nn.Module):
60
62
  Proposed in Generalized Focal Loss https://ieeexplore.ieee.org/document/9792391
61
63
  """
62
64
 
63
- def __init__(self, c1=16):
64
- """Initialize a convolutional layer with a given number of input channels."""
65
+ def __init__(self, c1: int = 16):
66
+ """
67
+ Initialize a convolutional layer with a given number of input channels.
68
+
69
+ Args:
70
+ c1 (int): Number of input channels.
71
+ """
65
72
  super().__init__()
66
73
  self.conv = nn.Conv2d(c1, 1, 1, bias=False).requires_grad_(False)
67
74
  x = torch.arange(c1, dtype=torch.float)
68
75
  self.conv.weight.data[:] = nn.Parameter(x.view(1, c1, 1, 1))
69
76
  self.c1 = c1
70
77
 
71
- def forward(self, x):
78
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
72
79
  """Apply the DFL module to input tensor and return transformed output."""
73
80
  b, _, a = x.shape # batch, channels, anchors
74
81
  return self.conv(x.view(b, 4, self.c1, a).transpose(2, 1).softmax(1)).view(b, 4, a)
@@ -78,7 +85,7 @@ class DFL(nn.Module):
78
85
  class Proto(nn.Module):
79
86
  """Ultralytics YOLO models mask Proto module for segmentation models."""
80
87
 
81
- def __init__(self, c1, c_=256, c2=32):
88
+ def __init__(self, c1: int, c_: int = 256, c2: int = 32):
82
89
  """
83
90
  Initialize the Ultralytics YOLO models mask Proto module with specified number of protos and masks.
84
91
 
@@ -93,7 +100,7 @@ class Proto(nn.Module):
93
100
  self.cv2 = Conv(c_, c_, k=3)
94
101
  self.cv3 = Conv(c_, c2)
95
102
 
96
- def forward(self, x):
103
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
97
104
  """Perform a forward pass through layers using an upsampled input image."""
98
105
  return self.cv3(self.cv2(self.upsample(self.cv1(x))))
99
106
 
@@ -105,7 +112,7 @@ class HGStem(nn.Module):
105
112
  https://github.com/PaddlePaddle/PaddleDetection/blob/develop/ppdet/modeling/backbones/hgnet_v2.py
106
113
  """
107
114
 
108
- def __init__(self, c1, cm, c2):
115
+ def __init__(self, c1: int, cm: int, c2: int):
109
116
  """
110
117
  Initialize the StemBlock of PPHGNetV2.
111
118
 
@@ -122,7 +129,7 @@ class HGStem(nn.Module):
122
129
  self.stem4 = Conv(cm, c2, 1, 1, act=nn.ReLU())
123
130
  self.pool = nn.MaxPool2d(kernel_size=2, stride=1, padding=0, ceil_mode=True)
124
131
 
125
- def forward(self, x):
132
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
126
133
  """Forward pass of a PPHGNetV2 backbone layer."""
127
134
  x = self.stem1(x)
128
135
  x = F.pad(x, [0, 1, 0, 1])
@@ -143,7 +150,17 @@ class HGBlock(nn.Module):
143
150
  https://github.com/PaddlePaddle/PaddleDetection/blob/develop/ppdet/modeling/backbones/hgnet_v2.py
144
151
  """
145
152
 
146
- def __init__(self, c1, cm, c2, k=3, n=6, lightconv=False, shortcut=False, act=nn.ReLU()):
153
+ def __init__(
154
+ self,
155
+ c1: int,
156
+ cm: int,
157
+ c2: int,
158
+ k: int = 3,
159
+ n: int = 6,
160
+ lightconv: bool = False,
161
+ shortcut: bool = False,
162
+ act: nn.Module = nn.ReLU(),
163
+ ):
147
164
  """
148
165
  Initialize HGBlock with specified parameters.
149
166
 
@@ -164,7 +181,7 @@ class HGBlock(nn.Module):
164
181
  self.ec = Conv(c2 // 2, c2, 1, 1, act=act) # excitation conv
165
182
  self.add = shortcut and c1 == c2
166
183
 
167
- def forward(self, x):
184
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
168
185
  """Forward pass of a PPHGNetV2 backbone layer."""
169
186
  y = [x]
170
187
  y.extend(m(y[-1]) for m in self.m)
@@ -175,14 +192,14 @@ class HGBlock(nn.Module):
175
192
  class SPP(nn.Module):
176
193
  """Spatial Pyramid Pooling (SPP) layer https://arxiv.org/abs/1406.4729."""
177
194
 
178
- def __init__(self, c1, c2, k=(5, 9, 13)):
195
+ def __init__(self, c1: int, c2: int, k: Tuple[int, ...] = (5, 9, 13)):
179
196
  """
180
197
  Initialize the SPP layer with input/output channels and pooling kernel sizes.
181
198
 
182
199
  Args:
183
200
  c1 (int): Input channels.
184
201
  c2 (int): Output channels.
185
- k (Tuple[int, int, int]): Kernel sizes for max pooling.
202
+ k (tuple): Kernel sizes for max pooling.
186
203
  """
187
204
  super().__init__()
188
205
  c_ = c1 // 2 # hidden channels
@@ -190,7 +207,7 @@ class SPP(nn.Module):
190
207
  self.cv2 = Conv(c_ * (len(k) + 1), c2, 1, 1)
191
208
  self.m = nn.ModuleList([nn.MaxPool2d(kernel_size=x, stride=1, padding=x // 2) for x in k])
192
209
 
193
- def forward(self, x):
210
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
194
211
  """Forward pass of the SPP layer, performing spatial pyramid pooling."""
195
212
  x = self.cv1(x)
196
213
  return self.cv2(torch.cat([x] + [m(x) for m in self.m], 1))
@@ -199,7 +216,7 @@ class SPP(nn.Module):
199
216
  class SPPF(nn.Module):
200
217
  """Spatial Pyramid Pooling - Fast (SPPF) layer for YOLOv5 by Glenn Jocher."""
201
218
 
202
- def __init__(self, c1, c2, k=5):
219
+ def __init__(self, c1: int, c2: int, k: int = 5):
203
220
  """
204
221
  Initialize the SPPF layer with given input/output channels and kernel size.
205
222
 
@@ -217,7 +234,7 @@ class SPPF(nn.Module):
217
234
  self.cv2 = Conv(c_ * 4, c2, 1, 1)
218
235
  self.m = nn.MaxPool2d(kernel_size=k, stride=1, padding=k // 2)
219
236
 
220
- def forward(self, x):
237
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
221
238
  """Apply sequential pooling operations to input and return concatenated feature maps."""
222
239
  y = [self.cv1(x)]
223
240
  y.extend(self.m(y[-1]) for _ in range(3))
@@ -227,7 +244,7 @@ class SPPF(nn.Module):
227
244
  class C1(nn.Module):
228
245
  """CSP Bottleneck with 1 convolution."""
229
246
 
230
- def __init__(self, c1, c2, n=1):
247
+ def __init__(self, c1: int, c2: int, n: int = 1):
231
248
  """
232
249
  Initialize the CSP Bottleneck with 1 convolution.
233
250
 
@@ -240,7 +257,7 @@ class C1(nn.Module):
240
257
  self.cv1 = Conv(c1, c2, 1, 1)
241
258
  self.m = nn.Sequential(*(Conv(c2, c2, 3) for _ in range(n)))
242
259
 
243
- def forward(self, x):
260
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
244
261
  """Apply convolution and residual connection to input tensor."""
245
262
  y = self.cv1(x)
246
263
  return self.m(y) + y
@@ -249,7 +266,7 @@ class C1(nn.Module):
249
266
  class C2(nn.Module):
250
267
  """CSP Bottleneck with 2 convolutions."""
251
268
 
252
- def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
269
+ def __init__(self, c1: int, c2: int, n: int = 1, shortcut: bool = True, g: int = 1, e: float = 0.5):
253
270
  """
254
271
  Initialize a CSP Bottleneck with 2 convolutions.
255
272
 
@@ -268,7 +285,7 @@ class C2(nn.Module):
268
285
  # self.attention = ChannelAttention(2 * self.c) # or SpatialAttention()
269
286
  self.m = nn.Sequential(*(Bottleneck(self.c, self.c, shortcut, g, k=((3, 3), (3, 3)), e=1.0) for _ in range(n)))
270
287
 
271
- def forward(self, x):
288
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
272
289
  """Forward pass through the CSP bottleneck with 2 convolutions."""
273
290
  a, b = self.cv1(x).chunk(2, 1)
274
291
  return self.cv2(torch.cat((self.m(a), b), 1))
@@ -277,7 +294,7 @@ class C2(nn.Module):
277
294
  class C2f(nn.Module):
278
295
  """Faster Implementation of CSP Bottleneck with 2 convolutions."""
279
296
 
280
- def __init__(self, c1, c2, n=1, shortcut=False, g=1, e=0.5):
297
+ def __init__(self, c1: int, c2: int, n: int = 1, shortcut: bool = False, g: int = 1, e: float = 0.5):
281
298
  """
282
299
  Initialize a CSP bottleneck with 2 convolutions.
283
300
 
@@ -295,13 +312,13 @@ class C2f(nn.Module):
295
312
  self.cv2 = Conv((2 + n) * self.c, c2, 1) # optional act=FReLU(c2)
296
313
  self.m = nn.ModuleList(Bottleneck(self.c, self.c, shortcut, g, k=((3, 3), (3, 3)), e=1.0) for _ in range(n))
297
314
 
298
- def forward(self, x):
315
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
299
316
  """Forward pass through C2f layer."""
300
317
  y = list(self.cv1(x).chunk(2, 1))
301
318
  y.extend(m(y[-1]) for m in self.m)
302
319
  return self.cv2(torch.cat(y, 1))
303
320
 
304
- def forward_split(self, x):
321
+ def forward_split(self, x: torch.Tensor) -> torch.Tensor:
305
322
  """Forward pass using split() instead of chunk()."""
306
323
  y = self.cv1(x).split((self.c, self.c), 1)
307
324
  y = [y[0], y[1]]
@@ -312,7 +329,7 @@ class C2f(nn.Module):
312
329
  class C3(nn.Module):
313
330
  """CSP Bottleneck with 3 convolutions."""
314
331
 
315
- def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
332
+ def __init__(self, c1: int, c2: int, n: int = 1, shortcut: bool = True, g: int = 1, e: float = 0.5):
316
333
  """
317
334
  Initialize the CSP Bottleneck with 3 convolutions.
318
335
 
@@ -331,7 +348,7 @@ class C3(nn.Module):
331
348
  self.cv3 = Conv(2 * c_, c2, 1) # optional act=FReLU(c2)
332
349
  self.m = nn.Sequential(*(Bottleneck(c_, c_, shortcut, g, k=((1, 1), (3, 3)), e=1.0) for _ in range(n)))
333
350
 
334
- def forward(self, x):
351
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
335
352
  """Forward pass through the CSP bottleneck with 3 convolutions."""
336
353
  return self.cv3(torch.cat((self.m(self.cv1(x)), self.cv2(x)), 1))
337
354
 
@@ -339,7 +356,7 @@ class C3(nn.Module):
339
356
  class C3x(C3):
340
357
  """C3 module with cross-convolutions."""
341
358
 
342
- def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
359
+ def __init__(self, c1: int, c2: int, n: int = 1, shortcut: bool = True, g: int = 1, e: float = 0.5):
343
360
  """
344
361
  Initialize C3 module with cross-convolutions.
345
362
 
@@ -359,7 +376,7 @@ class C3x(C3):
359
376
  class RepC3(nn.Module):
360
377
  """Rep C3."""
361
378
 
362
- def __init__(self, c1, c2, n=3, e=1.0):
379
+ def __init__(self, c1: int, c2: int, n: int = 3, e: float = 1.0):
363
380
  """
364
381
  Initialize CSP Bottleneck with a single convolution.
365
382
 
@@ -376,7 +393,7 @@ class RepC3(nn.Module):
376
393
  self.m = nn.Sequential(*[RepConv(c_, c_) for _ in range(n)])
377
394
  self.cv3 = Conv(c_, c2, 1, 1) if c_ != c2 else nn.Identity()
378
395
 
379
- def forward(self, x):
396
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
380
397
  """Forward pass of RepC3 module."""
381
398
  return self.cv3(self.m(self.cv1(x)) + self.cv2(x))
382
399
 
@@ -384,7 +401,7 @@ class RepC3(nn.Module):
384
401
  class C3TR(C3):
385
402
  """C3 module with TransformerBlock()."""
386
403
 
387
- def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
404
+ def __init__(self, c1: int, c2: int, n: int = 1, shortcut: bool = True, g: int = 1, e: float = 0.5):
388
405
  """
389
406
  Initialize C3 module with TransformerBlock.
390
407
 
@@ -404,7 +421,7 @@ class C3TR(C3):
404
421
  class C3Ghost(C3):
405
422
  """C3 module with GhostBottleneck()."""
406
423
 
407
- def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
424
+ def __init__(self, c1: int, c2: int, n: int = 1, shortcut: bool = True, g: int = 1, e: float = 0.5):
408
425
  """
409
426
  Initialize C3 module with GhostBottleneck.
410
427
 
@@ -424,7 +441,7 @@ class C3Ghost(C3):
424
441
  class GhostBottleneck(nn.Module):
425
442
  """Ghost Bottleneck https://github.com/huawei-noah/Efficient-AI-Backbones."""
426
443
 
427
- def __init__(self, c1, c2, k=3, s=1):
444
+ def __init__(self, c1: int, c2: int, k: int = 3, s: int = 1):
428
445
  """
429
446
  Initialize Ghost Bottleneck module.
430
447
 
@@ -445,7 +462,7 @@ class GhostBottleneck(nn.Module):
445
462
  nn.Sequential(DWConv(c1, c1, k, s, act=False), Conv(c1, c2, 1, 1, act=False)) if s == 2 else nn.Identity()
446
463
  )
447
464
 
448
- def forward(self, x):
465
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
449
466
  """Apply skip connection and concatenation to input tensor."""
450
467
  return self.conv(x) + self.shortcut(x)
451
468
 
@@ -453,7 +470,9 @@ class GhostBottleneck(nn.Module):
453
470
  class Bottleneck(nn.Module):
454
471
  """Standard bottleneck."""
455
472
 
456
- def __init__(self, c1, c2, shortcut=True, g=1, k=(3, 3), e=0.5):
473
+ def __init__(
474
+ self, c1: int, c2: int, shortcut: bool = True, g: int = 1, k: Tuple[int, int] = (3, 3), e: float = 0.5
475
+ ):
457
476
  """
458
477
  Initialize a standard bottleneck module.
459
478
 
@@ -462,7 +481,7 @@ class Bottleneck(nn.Module):
462
481
  c2 (int): Output channels.
463
482
  shortcut (bool): Whether to use shortcut connection.
464
483
  g (int): Groups for convolutions.
465
- k (Tuple[int, int]): Kernel sizes for convolutions.
484
+ k (tuple): Kernel sizes for convolutions.
466
485
  e (float): Expansion ratio.
467
486
  """
468
487
  super().__init__()
@@ -471,7 +490,7 @@ class Bottleneck(nn.Module):
471
490
  self.cv2 = Conv(c_, c2, k[1], 1, g=g)
472
491
  self.add = shortcut and c1 == c2
473
492
 
474
- def forward(self, x):
493
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
475
494
  """Apply bottleneck with optional shortcut connection."""
476
495
  return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x))
477
496
 
@@ -479,7 +498,7 @@ class Bottleneck(nn.Module):
479
498
  class BottleneckCSP(nn.Module):
480
499
  """CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks."""
481
500
 
482
- def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
501
+ def __init__(self, c1: int, c2: int, n: int = 1, shortcut: bool = True, g: int = 1, e: float = 0.5):
483
502
  """
484
503
  Initialize CSP Bottleneck.
485
504
 
@@ -501,7 +520,7 @@ class BottleneckCSP(nn.Module):
501
520
  self.act = nn.SiLU()
502
521
  self.m = nn.Sequential(*(Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)))
503
522
 
504
- def forward(self, x):
523
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
505
524
  """Apply CSP bottleneck with 3 convolutions."""
506
525
  y1 = self.cv3(self.m(self.cv1(x)))
507
526
  y2 = self.cv2(x)
@@ -511,7 +530,7 @@ class BottleneckCSP(nn.Module):
511
530
  class ResNetBlock(nn.Module):
512
531
  """ResNet block with standard convolution layers."""
513
532
 
514
- def __init__(self, c1, c2, s=1, e=4):
533
+ def __init__(self, c1: int, c2: int, s: int = 1, e: int = 4):
515
534
  """
516
535
  Initialize ResNet block.
517
536
 
@@ -528,7 +547,7 @@ class ResNetBlock(nn.Module):
528
547
  self.cv3 = Conv(c2, c3, k=1, act=False)
529
548
  self.shortcut = nn.Sequential(Conv(c1, c3, k=1, s=s, act=False)) if s != 1 or c1 != c3 else nn.Identity()
530
549
 
531
- def forward(self, x):
550
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
532
551
  """Forward pass through the ResNet block."""
533
552
  return F.relu(self.cv3(self.cv2(self.cv1(x))) + self.shortcut(x))
534
553
 
@@ -536,7 +555,7 @@ class ResNetBlock(nn.Module):
536
555
  class ResNetLayer(nn.Module):
537
556
  """ResNet layer with multiple ResNet blocks."""
538
557
 
539
- def __init__(self, c1, c2, s=1, is_first=False, n=1, e=4):
558
+ def __init__(self, c1: int, c2: int, s: int = 1, is_first: bool = False, n: int = 1, e: int = 4):
540
559
  """
541
560
  Initialize ResNet layer.
542
561
 
@@ -560,7 +579,7 @@ class ResNetLayer(nn.Module):
560
579
  blocks.extend([ResNetBlock(e * c2, c2, 1, e=e) for _ in range(n - 1)])
561
580
  self.layer = nn.Sequential(*blocks)
562
581
 
563
- def forward(self, x):
582
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
564
583
  """Forward pass through the ResNet layer."""
565
584
  return self.layer(x)
566
585
 
@@ -568,7 +587,7 @@ class ResNetLayer(nn.Module):
568
587
  class MaxSigmoidAttnBlock(nn.Module):
569
588
  """Max Sigmoid attention block."""
570
589
 
571
- def __init__(self, c1, c2, nh=1, ec=128, gc=512, scale=False):
590
+ def __init__(self, c1: int, c2: int, nh: int = 1, ec: int = 128, gc: int = 512, scale: bool = False):
572
591
  """
573
592
  Initialize MaxSigmoidAttnBlock.
574
593
 
@@ -589,7 +608,7 @@ class MaxSigmoidAttnBlock(nn.Module):
589
608
  self.proj_conv = Conv(c1, c2, k=3, s=1, act=False)
590
609
  self.scale = nn.Parameter(torch.ones(1, nh, 1, 1)) if scale else 1.0
591
610
 
592
- def forward(self, x, guide):
611
+ def forward(self, x: torch.Tensor, guide: torch.Tensor) -> torch.Tensor:
593
612
  """
594
613
  Forward pass of MaxSigmoidAttnBlock.
595
614
 
@@ -622,7 +641,18 @@ class MaxSigmoidAttnBlock(nn.Module):
622
641
  class C2fAttn(nn.Module):
623
642
  """C2f module with an additional attn module."""
624
643
 
625
- def __init__(self, c1, c2, n=1, ec=128, nh=1, gc=512, shortcut=False, g=1, e=0.5):
644
+ def __init__(
645
+ self,
646
+ c1: int,
647
+ c2: int,
648
+ n: int = 1,
649
+ ec: int = 128,
650
+ nh: int = 1,
651
+ gc: int = 512,
652
+ shortcut: bool = False,
653
+ g: int = 1,
654
+ e: float = 0.5,
655
+ ):
626
656
  """
627
657
  Initialize C2f module with attention mechanism.
628
658
 
@@ -644,7 +674,7 @@ class C2fAttn(nn.Module):
644
674
  self.m = nn.ModuleList(Bottleneck(self.c, self.c, shortcut, g, k=((3, 3), (3, 3)), e=1.0) for _ in range(n))
645
675
  self.attn = MaxSigmoidAttnBlock(self.c, self.c, gc=gc, ec=ec, nh=nh)
646
676
 
647
- def forward(self, x, guide):
677
+ def forward(self, x: torch.Tensor, guide: torch.Tensor) -> torch.Tensor:
648
678
  """
649
679
  Forward pass through C2f layer with attention.
650
680
 
@@ -660,7 +690,7 @@ class C2fAttn(nn.Module):
660
690
  y.append(self.attn(y[-1], guide))
661
691
  return self.cv2(torch.cat(y, 1))
662
692
 
663
- def forward_split(self, x, guide):
693
+ def forward_split(self, x: torch.Tensor, guide: torch.Tensor) -> torch.Tensor:
664
694
  """
665
695
  Forward pass using split() instead of chunk().
666
696
 
@@ -680,7 +710,9 @@ class C2fAttn(nn.Module):
680
710
  class ImagePoolingAttn(nn.Module):
681
711
  """ImagePoolingAttn: Enhance the text embeddings with image-aware information."""
682
712
 
683
- def __init__(self, ec=256, ch=(), ct=512, nh=8, k=3, scale=False):
713
+ def __init__(
714
+ self, ec: int = 256, ch: Tuple[int, ...] = (), ct: int = 512, nh: int = 8, k: int = 3, scale: bool = False
715
+ ):
684
716
  """
685
717
  Initialize ImagePoolingAttn module.
686
718
 
@@ -708,7 +740,7 @@ class ImagePoolingAttn(nn.Module):
708
740
  self.hc = ec // nh
709
741
  self.k = k
710
742
 
711
- def forward(self, x, text):
743
+ def forward(self, x: List[torch.Tensor], text: torch.Tensor) -> torch.Tensor:
712
744
  """
713
745
  Forward pass of ImagePoolingAttn.
714
746
 
@@ -752,7 +784,7 @@ class ContrastiveHead(nn.Module):
752
784
  self.bias = nn.Parameter(torch.tensor([-10.0]))
753
785
  self.logit_scale = nn.Parameter(torch.ones([]) * torch.tensor(1 / 0.07).log())
754
786
 
755
- def forward(self, x, w):
787
+ def forward(self, x: torch.Tensor, w: torch.Tensor) -> torch.Tensor:
756
788
  """
757
789
  Forward function of contrastive learning.
758
790
 
@@ -798,15 +830,11 @@ class BNContrastiveHead(nn.Module):
798
830
  del self.logit_scale
799
831
  self.forward = self.forward_fuse
800
832
 
801
- def forward_fuse(self, x, w):
802
- """
803
- Passes input out unchanged.
804
-
805
- TODO: Update or remove?
806
- """
833
+ def forward_fuse(self, x: torch.Tensor, w: torch.Tensor) -> torch.Tensor:
834
+ """Passes input out unchanged."""
807
835
  return x
808
836
 
809
- def forward(self, x, w):
837
+ def forward(self, x: torch.Tensor, w: torch.Tensor) -> torch.Tensor:
810
838
  """
811
839
  Forward function of contrastive learning with batch normalization.
812
840
 
@@ -827,7 +855,9 @@ class BNContrastiveHead(nn.Module):
827
855
  class RepBottleneck(Bottleneck):
828
856
  """Rep bottleneck."""
829
857
 
830
- def __init__(self, c1, c2, shortcut=True, g=1, k=(3, 3), e=0.5):
858
+ def __init__(
859
+ self, c1: int, c2: int, shortcut: bool = True, g: int = 1, k: Tuple[int, int] = (3, 3), e: float = 0.5
860
+ ):
831
861
  """
832
862
  Initialize RepBottleneck.
833
863
 
@@ -836,7 +866,7 @@ class RepBottleneck(Bottleneck):
836
866
  c2 (int): Output channels.
837
867
  shortcut (bool): Whether to use shortcut connection.
838
868
  g (int): Groups for convolutions.
839
- k (Tuple[int, int]): Kernel sizes for convolutions.
869
+ k (tuple): Kernel sizes for convolutions.
840
870
  e (float): Expansion ratio.
841
871
  """
842
872
  super().__init__(c1, c2, shortcut, g, k, e)
@@ -847,7 +877,7 @@ class RepBottleneck(Bottleneck):
847
877
  class RepCSP(C3):
848
878
  """Repeatable Cross Stage Partial Network (RepCSP) module for efficient feature extraction."""
849
879
 
850
- def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
880
+ def __init__(self, c1: int, c2: int, n: int = 1, shortcut: bool = True, g: int = 1, e: float = 0.5):
851
881
  """
852
882
  Initialize RepCSP layer.
853
883
 
@@ -867,7 +897,7 @@ class RepCSP(C3):
867
897
  class RepNCSPELAN4(nn.Module):
868
898
  """CSP-ELAN."""
869
899
 
870
- def __init__(self, c1, c2, c3, c4, n=1):
900
+ def __init__(self, c1: int, c2: int, c3: int, c4: int, n: int = 1):
871
901
  """
872
902
  Initialize CSP-ELAN layer.
873
903
 
@@ -885,13 +915,13 @@ class RepNCSPELAN4(nn.Module):
885
915
  self.cv3 = nn.Sequential(RepCSP(c4, c4, n), Conv(c4, c4, 3, 1))
886
916
  self.cv4 = Conv(c3 + (2 * c4), c2, 1, 1)
887
917
 
888
- def forward(self, x):
918
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
889
919
  """Forward pass through RepNCSPELAN4 layer."""
890
920
  y = list(self.cv1(x).chunk(2, 1))
891
921
  y.extend((m(y[-1])) for m in [self.cv2, self.cv3])
892
922
  return self.cv4(torch.cat(y, 1))
893
923
 
894
- def forward_split(self, x):
924
+ def forward_split(self, x: torch.Tensor) -> torch.Tensor:
895
925
  """Forward pass using split() instead of chunk()."""
896
926
  y = list(self.cv1(x).split((self.c, self.c), 1))
897
927
  y.extend(m(y[-1]) for m in [self.cv2, self.cv3])
@@ -901,7 +931,7 @@ class RepNCSPELAN4(nn.Module):
901
931
  class ELAN1(RepNCSPELAN4):
902
932
  """ELAN1 module with 4 convolutions."""
903
933
 
904
- def __init__(self, c1, c2, c3, c4):
934
+ def __init__(self, c1: int, c2: int, c3: int, c4: int):
905
935
  """
906
936
  Initialize ELAN1 layer.
907
937
 
@@ -922,7 +952,7 @@ class ELAN1(RepNCSPELAN4):
922
952
  class AConv(nn.Module):
923
953
  """AConv."""
924
954
 
925
- def __init__(self, c1, c2):
955
+ def __init__(self, c1: int, c2: int):
926
956
  """
927
957
  Initialize AConv module.
928
958
 
@@ -933,7 +963,7 @@ class AConv(nn.Module):
933
963
  super().__init__()
934
964
  self.cv1 = Conv(c1, c2, 3, 2, 1)
935
965
 
936
- def forward(self, x):
966
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
937
967
  """Forward pass through AConv layer."""
938
968
  x = torch.nn.functional.avg_pool2d(x, 2, 1, 0, False, True)
939
969
  return self.cv1(x)
@@ -942,7 +972,7 @@ class AConv(nn.Module):
942
972
  class ADown(nn.Module):
943
973
  """ADown."""
944
974
 
945
- def __init__(self, c1, c2):
975
+ def __init__(self, c1: int, c2: int):
946
976
  """
947
977
  Initialize ADown module.
948
978
 
@@ -955,7 +985,7 @@ class ADown(nn.Module):
955
985
  self.cv1 = Conv(c1 // 2, self.c, 3, 2, 1)
956
986
  self.cv2 = Conv(c1 // 2, self.c, 1, 1, 0)
957
987
 
958
- def forward(self, x):
988
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
959
989
  """Forward pass through ADown layer."""
960
990
  x = torch.nn.functional.avg_pool2d(x, 2, 1, 0, False, True)
961
991
  x1, x2 = x.chunk(2, 1)
@@ -968,7 +998,7 @@ class ADown(nn.Module):
968
998
  class SPPELAN(nn.Module):
969
999
  """SPP-ELAN."""
970
1000
 
971
- def __init__(self, c1, c2, c3, k=5):
1001
+ def __init__(self, c1: int, c2: int, c3: int, k: int = 5):
972
1002
  """
973
1003
  Initialize SPP-ELAN block.
974
1004
 
@@ -986,7 +1016,7 @@ class SPPELAN(nn.Module):
986
1016
  self.cv4 = nn.MaxPool2d(kernel_size=k, stride=1, padding=k // 2)
987
1017
  self.cv5 = Conv(4 * c3, c2, 1, 1)
988
1018
 
989
- def forward(self, x):
1019
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
990
1020
  """Forward pass through SPPELAN layer."""
991
1021
  y = [self.cv1(x)]
992
1022
  y.extend(m(y[-1]) for m in [self.cv2, self.cv3, self.cv4])
@@ -996,7 +1026,7 @@ class SPPELAN(nn.Module):
996
1026
  class CBLinear(nn.Module):
997
1027
  """CBLinear."""
998
1028
 
999
- def __init__(self, c1, c2s, k=1, s=1, p=None, g=1):
1029
+ def __init__(self, c1: int, c2s: List[int], k: int = 1, s: int = 1, p: Optional[int] = None, g: int = 1):
1000
1030
  """
1001
1031
  Initialize CBLinear module.
1002
1032
 
@@ -1012,7 +1042,7 @@ class CBLinear(nn.Module):
1012
1042
  self.c2s = c2s
1013
1043
  self.conv = nn.Conv2d(c1, sum(c2s), k, s, autopad(k, p), groups=g, bias=True)
1014
1044
 
1015
- def forward(self, x):
1045
+ def forward(self, x: torch.Tensor) -> List[torch.Tensor]:
1016
1046
  """Forward pass through CBLinear layer."""
1017
1047
  return self.conv(x).split(self.c2s, dim=1)
1018
1048
 
@@ -1020,7 +1050,7 @@ class CBLinear(nn.Module):
1020
1050
  class CBFuse(nn.Module):
1021
1051
  """CBFuse."""
1022
1052
 
1023
- def __init__(self, idx):
1053
+ def __init__(self, idx: List[int]):
1024
1054
  """
1025
1055
  Initialize CBFuse module.
1026
1056
 
@@ -1030,7 +1060,7 @@ class CBFuse(nn.Module):
1030
1060
  super().__init__()
1031
1061
  self.idx = idx
1032
1062
 
1033
- def forward(self, xs):
1063
+ def forward(self, xs: List[torch.Tensor]) -> torch.Tensor:
1034
1064
  """
1035
1065
  Forward pass through CBFuse layer.
1036
1066
 
@@ -1048,7 +1078,7 @@ class CBFuse(nn.Module):
1048
1078
  class C3f(nn.Module):
1049
1079
  """Faster Implementation of CSP Bottleneck with 2 convolutions."""
1050
1080
 
1051
- def __init__(self, c1, c2, n=1, shortcut=False, g=1, e=0.5):
1081
+ def __init__(self, c1: int, c2: int, n: int = 1, shortcut: bool = False, g: int = 1, e: float = 0.5):
1052
1082
  """
1053
1083
  Initialize CSP bottleneck layer with two convolutions.
1054
1084
 
@@ -1067,7 +1097,7 @@ class C3f(nn.Module):
1067
1097
  self.cv3 = Conv((2 + n) * c_, c2, 1) # optional act=FReLU(c2)
1068
1098
  self.m = nn.ModuleList(Bottleneck(c_, c_, shortcut, g, k=((3, 3), (3, 3)), e=1.0) for _ in range(n))
1069
1099
 
1070
- def forward(self, x):
1100
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
1071
1101
  """Forward pass through C3f layer."""
1072
1102
  y = [self.cv2(x), self.cv1(x)]
1073
1103
  y.extend(m(y[-1]) for m in self.m)
@@ -1077,7 +1107,9 @@ class C3f(nn.Module):
1077
1107
  class C3k2(C2f):
1078
1108
  """Faster Implementation of CSP Bottleneck with 2 convolutions."""
1079
1109
 
1080
- def __init__(self, c1, c2, n=1, c3k=False, e=0.5, g=1, shortcut=True):
1110
+ def __init__(
1111
+ self, c1: int, c2: int, n: int = 1, c3k: bool = False, e: float = 0.5, g: int = 1, shortcut: bool = True
1112
+ ):
1081
1113
  """
1082
1114
  Initialize C3k2 module.
1083
1115
 
@@ -1099,7 +1131,7 @@ class C3k2(C2f):
1099
1131
  class C3k(C3):
1100
1132
  """C3k is a CSP bottleneck module with customizable kernel sizes for feature extraction in neural networks."""
1101
1133
 
1102
- def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5, k=3):
1134
+ def __init__(self, c1: int, c2: int, n: int = 1, shortcut: bool = True, g: int = 1, e: float = 0.5, k: int = 3):
1103
1135
  """
1104
1136
  Initialize C3k module.
1105
1137
 
@@ -1121,7 +1153,7 @@ class C3k(C3):
1121
1153
  class RepVGGDW(torch.nn.Module):
1122
1154
  """RepVGGDW is a class that represents a depth wise separable convolutional block in RepVGG architecture."""
1123
1155
 
1124
- def __init__(self, ed) -> None:
1156
+ def __init__(self, ed: int) -> None:
1125
1157
  """
1126
1158
  Initialize RepVGGDW module.
1127
1159
 
@@ -1134,7 +1166,7 @@ class RepVGGDW(torch.nn.Module):
1134
1166
  self.dim = ed
1135
1167
  self.act = nn.SiLU()
1136
1168
 
1137
- def forward(self, x):
1169
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
1138
1170
  """
1139
1171
  Perform a forward pass of the RepVGGDW block.
1140
1172
 
@@ -1146,7 +1178,7 @@ class RepVGGDW(torch.nn.Module):
1146
1178
  """
1147
1179
  return self.act(self.conv(x) + self.conv1(x))
1148
1180
 
1149
- def forward_fuse(self, x):
1181
+ def forward_fuse(self, x: torch.Tensor) -> torch.Tensor:
1150
1182
  """
1151
1183
  Perform a forward pass of the RepVGGDW block without fusing the convolutions.
1152
1184
 
@@ -1197,7 +1229,7 @@ class CIB(nn.Module):
1197
1229
  lk (bool, optional): Whether to use RepVGGDW for the third convolutional layer. Defaults to False.
1198
1230
  """
1199
1231
 
1200
- def __init__(self, c1, c2, shortcut=True, e=0.5, lk=False):
1232
+ def __init__(self, c1: int, c2: int, shortcut: bool = True, e: float = 0.5, lk: bool = False):
1201
1233
  """
1202
1234
  Initialize the CIB module.
1203
1235
 
@@ -1220,7 +1252,7 @@ class CIB(nn.Module):
1220
1252
 
1221
1253
  self.add = shortcut and c1 == c2
1222
1254
 
1223
- def forward(self, x):
1255
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
1224
1256
  """
1225
1257
  Forward pass of the CIB module.
1226
1258
 
@@ -1247,7 +1279,9 @@ class C2fCIB(C2f):
1247
1279
  e (float, optional): Expansion ratio for CIB modules. Defaults to 0.5.
1248
1280
  """
1249
1281
 
1250
- def __init__(self, c1, c2, n=1, shortcut=False, lk=False, g=1, e=0.5):
1282
+ def __init__(
1283
+ self, c1: int, c2: int, n: int = 1, shortcut: bool = False, lk: bool = False, g: int = 1, e: float = 0.5
1284
+ ):
1251
1285
  """
1252
1286
  Initialize C2fCIB module.
1253
1287
 
@@ -1283,7 +1317,7 @@ class Attention(nn.Module):
1283
1317
  pe (Conv): Convolutional layer for positional encoding.
1284
1318
  """
1285
1319
 
1286
- def __init__(self, dim, num_heads=8, attn_ratio=0.5):
1320
+ def __init__(self, dim: int, num_heads: int = 8, attn_ratio: float = 0.5):
1287
1321
  """
1288
1322
  Initialize multi-head attention module.
1289
1323
 
@@ -1303,7 +1337,7 @@ class Attention(nn.Module):
1303
1337
  self.proj = Conv(dim, dim, 1, act=False)
1304
1338
  self.pe = Conv(dim, dim, 3, 1, g=dim, act=False)
1305
1339
 
1306
- def forward(self, x):
1340
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
1307
1341
  """
1308
1342
  Forward pass of the Attention module.
1309
1343
 
@@ -1349,7 +1383,7 @@ class PSABlock(nn.Module):
1349
1383
  >>> output_tensor = psablock(input_tensor)
1350
1384
  """
1351
1385
 
1352
- def __init__(self, c, attn_ratio=0.5, num_heads=4, shortcut=True) -> None:
1386
+ def __init__(self, c: int, attn_ratio: float = 0.5, num_heads: int = 4, shortcut: bool = True) -> None:
1353
1387
  """
1354
1388
  Initialize the PSABlock.
1355
1389
 
@@ -1365,7 +1399,7 @@ class PSABlock(nn.Module):
1365
1399
  self.ffn = nn.Sequential(Conv(c, c * 2, 1), Conv(c * 2, c, 1, act=False))
1366
1400
  self.add = shortcut
1367
1401
 
1368
- def forward(self, x):
1402
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
1369
1403
  """
1370
1404
  Execute a forward pass through PSABlock.
1371
1405
 
@@ -1404,7 +1438,7 @@ class PSA(nn.Module):
1404
1438
  >>> output_tensor = psa.forward(input_tensor)
1405
1439
  """
1406
1440
 
1407
- def __init__(self, c1, c2, e=0.5):
1441
+ def __init__(self, c1: int, c2: int, e: float = 0.5):
1408
1442
  """
1409
1443
  Initialize PSA module.
1410
1444
 
@@ -1422,7 +1456,7 @@ class PSA(nn.Module):
1422
1456
  self.attn = Attention(self.c, attn_ratio=0.5, num_heads=self.c // 64)
1423
1457
  self.ffn = nn.Sequential(Conv(self.c, self.c * 2, 1), Conv(self.c * 2, self.c, 1, act=False))
1424
1458
 
1425
- def forward(self, x):
1459
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
1426
1460
  """
1427
1461
  Execute forward pass in PSA module.
1428
1462
 
@@ -1463,7 +1497,7 @@ class C2PSA(nn.Module):
1463
1497
  >>> output_tensor = c2psa(input_tensor)
1464
1498
  """
1465
1499
 
1466
- def __init__(self, c1, c2, n=1, e=0.5):
1500
+ def __init__(self, c1: int, c2: int, n: int = 1, e: float = 0.5):
1467
1501
  """
1468
1502
  Initialize C2PSA module.
1469
1503
 
@@ -1481,7 +1515,7 @@ class C2PSA(nn.Module):
1481
1515
 
1482
1516
  self.m = nn.Sequential(*(PSABlock(self.c, attn_ratio=0.5, num_heads=self.c // 64) for _ in range(n)))
1483
1517
 
1484
- def forward(self, x):
1518
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
1485
1519
  """
1486
1520
  Process the input tensor through a series of PSA blocks.
1487
1521
 
@@ -1521,7 +1555,7 @@ class C2fPSA(C2f):
1521
1555
  >>> print(output.shape)
1522
1556
  """
1523
1557
 
1524
- def __init__(self, c1, c2, n=1, e=0.5):
1558
+ def __init__(self, c1: int, c2: int, n: int = 1, e: float = 0.5):
1525
1559
  """
1526
1560
  Initialize C2fPSA module.
1527
1561
 
@@ -1560,7 +1594,7 @@ class SCDown(nn.Module):
1560
1594
  torch.Size([1, 128, 64, 64])
1561
1595
  """
1562
1596
 
1563
- def __init__(self, c1, c2, k, s):
1597
+ def __init__(self, c1: int, c2: int, k: int, s: int):
1564
1598
  """
1565
1599
  Initialize SCDown module.
1566
1600
 
@@ -1574,7 +1608,7 @@ class SCDown(nn.Module):
1574
1608
  self.cv1 = Conv(c1, c2, 1, 1)
1575
1609
  self.cv2 = Conv(c2, c2, k=k, s=s, g=c2, act=False)
1576
1610
 
1577
- def forward(self, x):
1611
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
1578
1612
  """
1579
1613
  Apply convolution and downsampling to the input tensor.
1580
1614
 
@@ -1604,7 +1638,9 @@ class TorchVision(nn.Module):
1604
1638
  split (bool, optional): Returns output from intermediate child modules as list. Default is False.
1605
1639
  """
1606
1640
 
1607
- def __init__(self, model, weights="DEFAULT", unwrap=True, truncate=2, split=False):
1641
+ def __init__(
1642
+ self, model: str, weights: str = "DEFAULT", unwrap: bool = True, truncate: int = 2, split: bool = False
1643
+ ):
1608
1644
  """
1609
1645
  Load the model and weights from torchvision.
1610
1646
 
@@ -1632,7 +1668,7 @@ class TorchVision(nn.Module):
1632
1668
  self.split = False
1633
1669
  self.m.head = self.m.heads = nn.Identity()
1634
1670
 
1635
- def forward(self, x):
1671
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
1636
1672
  """
1637
1673
  Forward pass through the model.
1638
1674
 
@@ -1676,14 +1712,14 @@ class AAttn(nn.Module):
1676
1712
  torch.Size([1, 256, 32, 32])
1677
1713
  """
1678
1714
 
1679
- def __init__(self, dim, num_heads, area=1):
1715
+ def __init__(self, dim: int, num_heads: int, area: int = 1):
1680
1716
  """
1681
1717
  Initialize an Area-attention module for YOLO models.
1682
1718
 
1683
1719
  Args:
1684
1720
  dim (int): Number of hidden channels.
1685
1721
  num_heads (int): Number of heads into which the attention mechanism is divided.
1686
- area (int): Number of areas the feature map is divided, default is 1.
1722
+ area (int): Number of areas the feature map is divided.
1687
1723
  """
1688
1724
  super().__init__()
1689
1725
  self.area = area
@@ -1696,7 +1732,7 @@ class AAttn(nn.Module):
1696
1732
  self.proj = Conv(all_head_dim, dim, 1, act=False)
1697
1733
  self.pe = Conv(all_head_dim, dim, 7, 1, 3, g=dim, act=False)
1698
1734
 
1699
- def forward(self, x):
1735
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
1700
1736
  """
1701
1737
  Process the input tensor through the area-attention.
1702
1738
 
@@ -1760,7 +1796,7 @@ class ABlock(nn.Module):
1760
1796
  torch.Size([1, 256, 32, 32])
1761
1797
  """
1762
1798
 
1763
- def __init__(self, dim, num_heads, mlp_ratio=1.2, area=1):
1799
+ def __init__(self, dim: int, num_heads: int, mlp_ratio: float = 1.2, area: int = 1):
1764
1800
  """
1765
1801
  Initialize an Area-attention block module.
1766
1802
 
@@ -1778,7 +1814,7 @@ class ABlock(nn.Module):
1778
1814
 
1779
1815
  self.apply(self._init_weights)
1780
1816
 
1781
- def _init_weights(self, m):
1817
+ def _init_weights(self, m: nn.Module):
1782
1818
  """
1783
1819
  Initialize weights using a truncated normal distribution.
1784
1820
 
@@ -1790,7 +1826,7 @@ class ABlock(nn.Module):
1790
1826
  if m.bias is not None:
1791
1827
  nn.init.constant_(m.bias, 0)
1792
1828
 
1793
- def forward(self, x):
1829
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
1794
1830
  """
1795
1831
  Forward pass through ABlock.
1796
1832
 
@@ -1828,7 +1864,19 @@ class A2C2f(nn.Module):
1828
1864
  torch.Size([1, 512, 32, 32])
1829
1865
  """
1830
1866
 
1831
- def __init__(self, c1, c2, n=1, a2=True, area=1, residual=False, mlp_ratio=2.0, e=0.5, g=1, shortcut=True):
1867
+ def __init__(
1868
+ self,
1869
+ c1: int,
1870
+ c2: int,
1871
+ n: int = 1,
1872
+ a2: bool = True,
1873
+ area: int = 1,
1874
+ residual: bool = False,
1875
+ mlp_ratio: float = 2.0,
1876
+ e: float = 0.5,
1877
+ g: int = 1,
1878
+ shortcut: bool = True,
1879
+ ):
1832
1880
  """
1833
1881
  Initialize Area-Attention C2f module.
1834
1882
 
@@ -1859,7 +1907,7 @@ class A2C2f(nn.Module):
1859
1907
  for _ in range(n)
1860
1908
  )
1861
1909
 
1862
- def forward(self, x):
1910
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
1863
1911
  """
1864
1912
  Forward pass through A2C2f layer.
1865
1913
 
@@ -1880,13 +1928,20 @@ class A2C2f(nn.Module):
1880
1928
  class SwiGLUFFN(nn.Module):
1881
1929
  """SwiGLU Feed-Forward Network for transformer-based architectures."""
1882
1930
 
1883
- def __init__(self, gc, ec, e=4) -> None:
1884
- """Initialize SwiGLU FFN with input dimension, output dimension, and expansion factor."""
1931
+ def __init__(self, gc: int, ec: int, e: int = 4) -> None:
1932
+ """
1933
+ Initialize SwiGLU FFN with input dimension, output dimension, and expansion factor.
1934
+
1935
+ Args:
1936
+ gc (int): Guide channels.
1937
+ ec (int): Embedding channels.
1938
+ e (int): Expansion factor.
1939
+ """
1885
1940
  super().__init__()
1886
1941
  self.w12 = nn.Linear(gc, e * ec)
1887
1942
  self.w3 = nn.Linear(e * ec // 2, ec)
1888
1943
 
1889
- def forward(self, x):
1944
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
1890
1945
  """Apply SwiGLU transformation to input features."""
1891
1946
  x12 = self.w12(x)
1892
1947
  x1, x2 = x12.chunk(2, dim=-1)
@@ -1897,8 +1952,13 @@ class SwiGLUFFN(nn.Module):
1897
1952
  class Residual(nn.Module):
1898
1953
  """Residual connection wrapper for neural network modules."""
1899
1954
 
1900
- def __init__(self, m) -> None:
1901
- """Initialize residual module with the wrapped module."""
1955
+ def __init__(self, m: nn.Module) -> None:
1956
+ """
1957
+ Initialize residual module with the wrapped module.
1958
+
1959
+ Args:
1960
+ m (nn.Module): Module to wrap with residual connection.
1961
+ """
1902
1962
  super().__init__()
1903
1963
  self.m = m
1904
1964
  nn.init.zeros_(self.m.w3.bias)
@@ -1906,7 +1966,7 @@ class Residual(nn.Module):
1906
1966
  # nn.init.constant_(self.m.w3.weight, 1e-6)
1907
1967
  nn.init.zeros_(self.m.w3.weight)
1908
1968
 
1909
- def forward(self, x):
1969
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
1910
1970
  """Apply residual connection to input features."""
1911
1971
  return x + self.m(x)
1912
1972
 
@@ -1914,8 +1974,15 @@ class Residual(nn.Module):
1914
1974
  class SAVPE(nn.Module):
1915
1975
  """Spatial-Aware Visual Prompt Embedding module for feature enhancement."""
1916
1976
 
1917
- def __init__(self, ch, c3, embed):
1918
- """Initialize SAVPE module with channels, intermediate channels, and embedding dimension."""
1977
+ def __init__(self, ch: List[int], c3: int, embed: int):
1978
+ """
1979
+ Initialize SAVPE module with channels, intermediate channels, and embedding dimension.
1980
+
1981
+ Args:
1982
+ ch (List[int]): List of input channel dimensions.
1983
+ c3 (int): Intermediate channels.
1984
+ embed (int): Embedding dimension.
1985
+ """
1919
1986
  super().__init__()
1920
1987
  self.cv1 = nn.ModuleList(
1921
1988
  nn.Sequential(
@@ -1935,7 +2002,7 @@ class SAVPE(nn.Module):
1935
2002
  self.cv5 = nn.Conv2d(1, self.c, 3, padding=1)
1936
2003
  self.cv6 = nn.Sequential(Conv(2 * self.c, self.c, 3), nn.Conv2d(self.c, self.c, 3, padding=1))
1937
2004
 
1938
- def forward(self, x, vp):
2005
+ def forward(self, x: List[torch.Tensor], vp: torch.Tensor) -> torch.Tensor:
1939
2006
  """Process input features and visual prompts to generate enhanced embeddings."""
1940
2007
  y = [self.cv2[i](xi) for i, xi in enumerate(x)]
1941
2008
  y = self.cv4(torch.cat(y, dim=1))