ultralytics 8.2.71__py3-none-any.whl → 8.2.73__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of ultralytics might be problematic. Click here for more details.

Files changed (35) hide show
  1. tests/test_cli.py +3 -0
  2. ultralytics/__init__.py +2 -3
  3. ultralytics/models/__init__.py +1 -2
  4. ultralytics/models/sam/__init__.py +2 -2
  5. ultralytics/models/sam/amg.py +27 -21
  6. ultralytics/models/sam/build.py +200 -9
  7. ultralytics/models/sam/model.py +86 -34
  8. ultralytics/models/sam/modules/blocks.py +1131 -0
  9. ultralytics/models/sam/modules/decoders.py +390 -23
  10. ultralytics/models/sam/modules/encoders.py +508 -323
  11. ultralytics/models/{sam2 → sam}/modules/memory_attention.py +73 -6
  12. ultralytics/models/sam/modules/sam.py +887 -16
  13. ultralytics/models/sam/modules/tiny_encoder.py +376 -126
  14. ultralytics/models/sam/modules/transformer.py +155 -54
  15. ultralytics/models/{sam2 → sam}/modules/utils.py +105 -3
  16. ultralytics/models/sam/predict.py +382 -92
  17. ultralytics/nn/modules/transformer.py +2 -2
  18. ultralytics/utils/downloads.py +2 -2
  19. ultralytics/utils/ops.py +2 -2
  20. ultralytics/utils/plotting.py +3 -3
  21. {ultralytics-8.2.71.dist-info → ultralytics-8.2.73.dist-info}/METADATA +44 -44
  22. {ultralytics-8.2.71.dist-info → ultralytics-8.2.73.dist-info}/RECORD +26 -34
  23. ultralytics/models/sam2/__init__.py +0 -6
  24. ultralytics/models/sam2/build.py +0 -156
  25. ultralytics/models/sam2/model.py +0 -97
  26. ultralytics/models/sam2/modules/__init__.py +0 -1
  27. ultralytics/models/sam2/modules/decoders.py +0 -305
  28. ultralytics/models/sam2/modules/encoders.py +0 -332
  29. ultralytics/models/sam2/modules/sam2.py +0 -804
  30. ultralytics/models/sam2/modules/sam2_blocks.py +0 -715
  31. ultralytics/models/sam2/predict.py +0 -182
  32. {ultralytics-8.2.71.dist-info → ultralytics-8.2.73.dist-info}/LICENSE +0 -0
  33. {ultralytics-8.2.71.dist-info → ultralytics-8.2.73.dist-info}/WHEEL +0 -0
  34. {ultralytics-8.2.71.dist-info → ultralytics-8.2.73.dist-info}/entry_points.txt +0 -0
  35. {ultralytics-8.2.71.dist-info → ultralytics-8.2.73.dist-info}/top_level.txt +0 -0
@@ -6,11 +6,50 @@ from typing import Optional
6
6
  import torch
7
7
  from torch import Tensor, nn
8
8
 
9
- from .sam2_blocks import RoPEAttention
9
+ from .blocks import RoPEAttention
10
10
 
11
11
 
12
12
  class MemoryAttentionLayer(nn.Module):
13
- """Implements a memory attention layer with self-attention and cross-attention mechanisms for neural networks."""
13
+ """
14
+ Implements a memory attention layer with self-attention and cross-attention mechanisms for neural networks.
15
+
16
+ This class combines self-attention, cross-attention, and feedforward components to process input tensors and
17
+ generate memory-based attention outputs.
18
+
19
+ Attributes:
20
+ d_model (int): Dimensionality of the model.
21
+ dim_feedforward (int): Dimensionality of the feedforward network.
22
+ dropout_value (float): Dropout rate for regularization.
23
+ self_attn (RoPEAttention): Self-attention mechanism using RoPE (Rotary Position Embedding).
24
+ cross_attn_image (RoPEAttention): Cross-attention mechanism for image processing.
25
+ linear1 (nn.Linear): First linear layer of the feedforward network.
26
+ linear2 (nn.Linear): Second linear layer of the feedforward network.
27
+ norm1 (nn.LayerNorm): Layer normalization for self-attention output.
28
+ norm2 (nn.LayerNorm): Layer normalization for cross-attention output.
29
+ norm3 (nn.LayerNorm): Layer normalization for feedforward network output.
30
+ dropout1 (nn.Dropout): Dropout layer after self-attention.
31
+ dropout2 (nn.Dropout): Dropout layer after cross-attention.
32
+ dropout3 (nn.Dropout): Dropout layer after feedforward network.
33
+ activation (nn.ReLU): Activation function for the feedforward network.
34
+ pos_enc_at_attn (bool): Flag to add positional encoding at attention.
35
+ pos_enc_at_cross_attn_queries (bool): Flag to add positional encoding to cross-attention queries.
36
+ pos_enc_at_cross_attn_keys (bool): Flag to add positional encoding to cross-attention keys.
37
+
38
+ Methods:
39
+ forward: Performs the full memory attention operation on input tensors.
40
+ _forward_sa: Performs self-attention on input tensor.
41
+ _forward_ca: Performs cross-attention between target and memory tensors.
42
+
43
+ Examples:
44
+ >>> layer = MemoryAttentionLayer(d_model=256, dim_feedforward=2048, dropout=0.1)
45
+ >>> tgt = torch.randn(1, 100, 256)
46
+ >>> memory = torch.randn(1, 100, 64)
47
+ >>> pos = torch.randn(1, 100, 256)
48
+ >>> query_pos = torch.randn(1, 100, 256)
49
+ >>> output = layer(tgt, memory, pos, query_pos)
50
+ >>> print(output.shape)
51
+ torch.Size([1, 100, 256])
52
+ """
14
53
 
15
54
  def __init__(
16
55
  self,
@@ -21,7 +60,7 @@ class MemoryAttentionLayer(nn.Module):
21
60
  pos_enc_at_cross_attn_keys: bool = True,
22
61
  pos_enc_at_cross_attn_queries: bool = False,
23
62
  ):
24
- """Initializes a MemoryAttentionLayer with self-attention, cross-attention, and feedforward components."""
63
+ """Initializes a memory attention layer with self-attention, cross-attention, and feedforward components."""
25
64
  super().__init__()
26
65
  self.d_model = d_model
27
66
  self.dim_feedforward = dim_feedforward
@@ -88,7 +127,7 @@ class MemoryAttentionLayer(nn.Module):
88
127
  query_pos: Optional[Tensor] = None,
89
128
  num_k_exclude_rope: int = 0,
90
129
  ) -> torch.Tensor:
91
- """Performs self-attention, cross-attention, and MLP operations on input tensors for memory-based attention."""
130
+ """Processes input tensors using self-attention, cross-attention, and MLP for memory-based attention."""
92
131
  tgt = self._forward_sa(tgt, query_pos)
93
132
  tgt = self._forward_ca(tgt, memory, query_pos, pos, num_k_exclude_rope)
94
133
  # MLP
@@ -99,7 +138,35 @@ class MemoryAttentionLayer(nn.Module):
99
138
 
100
139
 
101
140
  class MemoryAttention(nn.Module):
102
- """Memory attention module for processing sequential data with self and cross-attention mechanisms."""
141
+ """
142
+ Memory attention module for processing sequential data with self and cross-attention mechanisms.
143
+
144
+ This class implements a multi-layer attention mechanism that combines self-attention and cross-attention
145
+ for processing sequential data, particularly useful in transformer-like architectures.
146
+
147
+ Attributes:
148
+ d_model (int): The dimension of the model's hidden state.
149
+ layers (nn.ModuleList): A list of MemoryAttentionLayer modules.
150
+ num_layers (int): The number of attention layers.
151
+ norm (nn.LayerNorm): Layer normalization applied to the output.
152
+ pos_enc_at_input (bool): Whether to apply positional encoding at the input.
153
+ batch_first (bool): Whether the input tensors are in batch-first format.
154
+
155
+ Methods:
156
+ forward: Processes input tensors through the attention layers.
157
+
158
+ Examples:
159
+ >>> d_model = 256
160
+ >>> layer = MemoryAttentionLayer(d_model)
161
+ >>> attention = MemoryAttention(d_model, pos_enc_at_input=True, layer=layer, num_layers=3)
162
+ >>> curr = torch.randn(10, 32, d_model) # (seq_len, batch_size, d_model)
163
+ >>> memory = torch.randn(20, 32, d_model) # (mem_len, batch_size, d_model)
164
+ >>> curr_pos = torch.randn(10, 32, d_model)
165
+ >>> memory_pos = torch.randn(20, 32, d_model)
166
+ >>> output = attention(curr, memory, curr_pos, memory_pos)
167
+ >>> print(output.shape)
168
+ torch.Size([10, 32, 256])
169
+ """
103
170
 
104
171
  def __init__(
105
172
  self,
@@ -126,7 +193,7 @@ class MemoryAttention(nn.Module):
126
193
  memory_pos: Optional[Tensor] = None, # pos_enc for cross-attention inputs
127
194
  num_obj_ptr_tokens: int = 0, # number of object pointer *tokens*
128
195
  ):
129
- """Applies self-attention and cross-attention to input tensors, processing through multiple layers."""
196
+ """Processes input tensors through multiple attention layers, applying self and cross-attention mechanisms."""
130
197
  if isinstance(curr, list):
131
198
  assert isinstance(curr_pos, list)
132
199
  assert len(curr) == len(curr_pos) == 1