PyPI - sopro - Versions diffs - 1.0.1__py3-none-any.whl → 1.5.0__py3-none-any.whl - Mend

sopro 1.0.1py3-none-any.whl → 1.5.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

sopro/__init__.py +1 -1
sopro/cli.py +31 -46
sopro/config.py +15 -20
sopro/hub.py +2 -3
sopro/model.py +265 -535
sopro/nn/__init__.py +7 -3
sopro/nn/blocks.py +78 -0
sopro/nn/embeddings.py +16 -0
sopro/nn/generator.py +130 -0
sopro/nn/nar.py +116 -0
sopro/nn/ref.py +160 -0
sopro/nn/speaker.py +14 -17
sopro/nn/text.py +132 -0
sopro/sampling.py +3 -3
sopro/streaming.py +25 -38
{sopro-1.0.1.dist-info → sopro-1.5.0.dist-info}/METADATA +30 -7
sopro-1.5.0.dist-info/RECORD +26 -0
{sopro-1.0.1.dist-info → sopro-1.5.0.dist-info}/WHEEL +1 -1
sopro/nn/xattn.py +0 -98
sopro-1.0.1.dist-info/RECORD +0 -23
{sopro-1.0.1.dist-info → sopro-1.5.0.dist-info}/entry_points.txt +0 -0
{sopro-1.0.1.dist-info → sopro-1.5.0.dist-info}/licenses/LICENSE.txt +0 -0
{sopro-1.0.1.dist-info → sopro-1.5.0.dist-info}/top_level.txt +0 -0

sopro/nn/__init__.py CHANGED Viewed

@@ -1,7 +1,9 @@
 from .blocks import GLU, AttentiveStatsPool, DepthwiseConv1d, RMSNorm, SSMLiteBlock
 from .embeddings import CodebookEmbedding, SinusoidalPositionalEmbedding, TextEmbedding
+from .generator import ARRVQ1Generator
+from .ref import RefXAttnBlock, RefXAttnStack
 from .speaker import SpeakerFiLM, Token2SV
-from .xattn import RefXAttn, RefXAttnBlock, TextXAttnBlock
+from .text import TextEncoder, TextXAttnBlock
 __all__ = [
     "GLU",
@@ -14,7 +16,9 @@ __all__ = [
     "CodebookEmbedding",
     "Token2SV",
     "SpeakerFiLM",
-    "RefXAttn",
-    "RefXAttnBlock",
+    "TextEncoder",
     "TextXAttnBlock",
+    "RefXAttnBlock",
+    "RefXAttnStack",
+    "ARRVQ1Generator",
 ]

sopro/nn/blocks.py CHANGED Viewed

@@ -1,10 +1,18 @@
 from __future__ import annotations
+from dataclasses import dataclass
+from typing import Optional, Tuple
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
+@dataclass
+class DepthwiseConv1dState:
+    buf: torch.Tensor
 class GLU(nn.Module):
     def __init__(self, d: int):
         super().__init__()
@@ -39,6 +47,19 @@ class DepthwiseConv1d(nn.Module):
         self.kernel_size = kernel_size
         self.dw = nn.Conv1d(d, d, kernel_size, groups=d, padding=0, dilation=dilation)
+    def _ctx_len(self) -> int:
+        return (self.kernel_size - 1) * self.dilation + 1
+    def init_state(
+        self, batch_size: int, device: torch.device, dtype: torch.dtype
+    ) -> DepthwiseConv1dState:
+        if not self.causal:
+            raise ValueError("init_state is only valid for causal convs")
+        L = self._ctx_len()
+        D = int(self.dw.in_channels)
+        buf = torch.zeros((batch_size, L, D), device=device, dtype=dtype)
+        return DepthwiseConv1dState(buf=buf)
     def forward(self, x: torch.Tensor) -> torch.Tensor:
         xt = x.transpose(1, 2)
         if self.causal:
@@ -52,6 +73,42 @@ class DepthwiseConv1d(nn.Module):
         y = self.dw(xt)
         return y.transpose(1, 2)
+    def forward_step(
+        self, x_bt_d: torch.Tensor, state: Optional[DepthwiseConv1dState]
+    ) -> Tuple[torch.Tensor, DepthwiseConv1dState]:
+        if not self.causal:
+            raise ValueError("forward_step is only valid for causal convs")
+        if x_bt_d.dim() == 2:
+            x_bt_d = x_bt_d.unsqueeze(1)
+        B, T, D = x_bt_d.shape
+        if T != 1:
+            raise ValueError("forward_step expects a single timestep [B,1,D]")
+        if state is None:
+            state = self.init_state(B, x_bt_d.device, x_bt_d.dtype)
+        buf = state.buf
+        if buf.size(1) > 1:
+            buf = torch.cat([buf[:, 1:, :], x_bt_d], dim=1)
+        else:
+            buf = x_bt_d
+        k = int(self.kernel_size)
+        d = int(self.dilation)
+        idx = torch.arange(0, k * d, d, device=x_bt_d.device)
+        x_bkd = buf.index_select(1, idx)  # [B,k,D]
+        w_dk = self.dw.weight.squeeze(1).to(dtype=x_bt_d.dtype)
+        y_bd = (x_bkd.transpose(1, 2) * w_dk.unsqueeze(0)).sum(dim=-1)
+        if self.dw.bias is not None:
+            y_bd = y_bd + self.dw.bias.to(dtype=y_bd.dtype).unsqueeze(0)
+        y_bt_d = y_bd.unsqueeze(1)
+        state.buf = buf
+        return y_bt_d, state
 class SSMLiteBlock(nn.Module):
     def __init__(
@@ -76,6 +133,13 @@ class SSMLiteBlock(nn.Module):
         )
         self.drop = nn.Dropout(dropout)
+    def init_state(
+        self, batch_size: int, device: torch.device, dtype: torch.dtype
+    ) -> dict:
+        if not self.dw.causal:
+            raise ValueError("SSMLiteBlock.init_state only valid for causal blocks")
+        return {"dw": self.dw.init_state(batch_size, device, dtype)}
     def forward(self, x: torch.Tensor) -> torch.Tensor:
         h = self.glu(self.norm(x))
         h = self.dw(h)
@@ -83,6 +147,20 @@ class SSMLiteBlock(nn.Module):
         x = x + self.drop(self.ff(x))
         return x
+    def forward_step(
+        self, x_bt_d: torch.Tensor, state: dict
+    ) -> Tuple[torch.Tensor, dict]:
+        if not self.dw.causal:
+            raise ValueError("forward_step only valid for causal blocks")
+        h = self.glu(self.norm(x_bt_d))
+        y, dw_state = self.dw.forward_step(h, state.get("dw", None))
+        state["dw"] = dw_state
+        x = x_bt_d + self.drop(y)
+        x = x + self.drop(self.ff(x))
+        return x, state
 class AttentiveStatsPool(nn.Module):
     def __init__(self, d: int):

sopro/nn/embeddings.py CHANGED Viewed

@@ -79,6 +79,7 @@ class CodebookEmbedding(nn.Module):
         tokens_subset: Optional[torch.Tensor],
         cb_indices: Optional[List[int]],
         keep_mask: Optional[torch.Tensor] = None,
+        cb_weights: Optional[torch.Tensor] = None,
     ) -> torch.Tensor:
         if tokens_subset is None or cb_indices is None or len(cb_indices) == 0:
             return 0.0
@@ -90,6 +91,21 @@ class CodebookEmbedding(nn.Module):
         idx = torch.cat(idx_list, dim=2)
         emb = self.emb(idx)
+        if cb_weights is not None:
+            w = cb_weights
+            if w.dim() != 1:
+                raise ValueError("cb_weights must be 1D")
+            if w.numel() == self.Q:
+                cb_t = torch.tensor(cb_indices, device=emb.device, dtype=torch.long)
+                w = w.to(emb.device).index_select(0, cb_t)
+            elif w.numel() != K:
+                raise ValueError(
+                    f"cb_weights must have len Q={self.Q} or K={K}, got {w.numel()}"
+                )
+            w = F.softmax(w.float(), dim=0).to(dtype=emb.dtype)
+            emb = emb * w.view(1, 1, K, 1)
         if keep_mask is not None:
             emb = emb * keep_mask.unsqueeze(-1).to(emb.dtype)

sopro/nn/generator.py ADDED Viewed

@@ -0,0 +1,130 @@
+from typing import Dict, List, Optional, Tuple
+import torch
+import torch.nn as nn
+from sopro.config import SoproTTSConfig
+from sopro.nn.blocks import RMSNorm, SSMLiteBlock
+from sopro.nn.text import TextXAttnBlock
+class ARRVQ1Generator(nn.Module):
+    def __init__(self, cfg: SoproTTSConfig, d_model: int, vocab: int):
+        super().__init__()
+        ks = int(cfg.ar_kernel)
+        dils: List[int] = []
+        while len(dils) < int(cfg.n_layers_ar):
+            dils.extend(list(cfg.ar_dilation_cycle))
+        dils = dils[: int(cfg.n_layers_ar)]
+        self.dils = tuple(int(d) for d in dils)
+        self.blocks = nn.ModuleList(
+            [
+                SSMLiteBlock(
+                    d_model, cfg.dropout, causal=True, kernel_size=ks, dilation=d
+                )
+                for d in self.dils
+            ]
+        )
+        self.attn_freq = int(cfg.ar_text_attn_freq)
+        self.x_attns = nn.ModuleList()
+        for i in range(len(self.blocks)):
+            if (i + 1) % self.attn_freq == 0:
+                self.x_attns.append(
+                    TextXAttnBlock(d_model, heads=4, dropout=cfg.dropout)
+                )
+            else:
+                self.x_attns.append(nn.Identity())
+        self.norm = RMSNorm(d_model)
+        self.head = nn.Linear(d_model, vocab)
+    @torch.no_grad()
+    def init_stream_state(
+        self,
+        batch_size: int,
+        device: torch.device,
+        dtype: torch.dtype,
+        *,
+        text_emb: Optional[torch.Tensor] = None,
+        text_mask: Optional[torch.Tensor] = None,
+    ) -> Dict[str, object]:
+        layer_states = [
+            blk.init_state(batch_size, device, dtype) for blk in self.blocks
+        ]
+        kv_caches: List[Optional[Dict[str, torch.Tensor]]] = []
+        key_padding_mask = (~text_mask) if text_mask is not None else None
+        for xa in self.x_attns:
+            if isinstance(xa, nn.Identity) or (text_emb is None):
+                kv_caches.append(None)
+            else:
+                kv_caches.append(
+                    xa.build_kv_cache(text_emb, key_padding_mask=key_padding_mask)
+                )
+        return {"layer_states": layer_states, "kv_caches": kv_caches}
+    def forward(
+        self,
+        x: torch.Tensor,
+        text_emb: Optional[torch.Tensor] = None,
+        text_mask: Optional[torch.Tensor] = None,
+    ) -> torch.Tensor:
+        key_padding_mask = ~text_mask if text_mask is not None else None
+        if key_padding_mask is not None:
+            bad_rows = key_padding_mask.all(dim=1)
+            if bad_rows.any():
+                key_padding_mask = key_padding_mask.clone()
+                idx = torch.nonzero(bad_rows, as_tuple=False).squeeze(1)
+                key_padding_mask[idx, 0] = False
+                if text_emb is not None:
+                    text_emb = text_emb.clone()
+                    text_emb[idx, 0, :] = 0
+        h = x
+        for i, lyr in enumerate(self.blocks):
+            h = lyr(h)
+            if not isinstance(self.x_attns[i], nn.Identity) and text_emb is not None:
+                h = self.x_attns[i](h, text_emb, key_padding_mask=key_padding_mask)
+        h = self.norm(h)
+        return self.head(h)
+    @torch.no_grad()
+    def step(
+        self,
+        x_bt_d: torch.Tensor,
+        state: Dict[str, object],
+        *,
+        text_emb: Optional[torch.Tensor] = None,
+        text_mask: Optional[torch.Tensor] = None,
+    ) -> Tuple[torch.Tensor, Dict[str, object]]:
+        h = x_bt_d
+        key_padding_mask = (~text_mask) if text_mask is not None else None
+        layer_states: List[dict] = state["layer_states"]
+        kv_caches: List[Optional[Dict[str, torch.Tensor]]] = state["kv_caches"]
+        for i, blk in enumerate(self.blocks):
+            h, layer_states[i] = blk.forward_step(h, layer_states[i])
+            xa = self.x_attns[i]
+            if (not isinstance(xa, nn.Identity)) and (text_emb is not None):
+                kv = kv_caches[i]
+                if kv is None:
+                    kv = xa.build_kv_cache(text_emb, key_padding_mask=key_padding_mask)
+                h, kv = xa(h, kv_cache=kv, use_cache=True)
+                kv_caches[i] = kv
+        state["layer_states"] = layer_states
+        state["kv_caches"] = kv_caches
+        h = self.norm(h)
+        logits = self.head(h)
+        return logits, state

sopro/nn/nar.py ADDED Viewed

@@ -0,0 +1,116 @@
+from __future__ import annotations
+from typing import Dict, List
+import torch
+import torch.nn as nn
+from sopro.config import SoproTTSConfig
+from .blocks import RMSNorm, SSMLiteBlock
+class NARStageAdapter(nn.Module):
+    def __init__(self, d_model: int, hidden: int = 256):
+        super().__init__()
+        self.norm = RMSNorm(d_model)
+        self.mlp = nn.Sequential(
+            nn.Linear(d_model, hidden),
+            nn.GELU(),
+            nn.Linear(hidden, 2 * d_model),
+        )
+        nn.init.zeros_(self.mlp[-1].weight)
+        nn.init.zeros_(self.mlp[-1].bias)
+    def forward(self, x: torch.Tensor, stage_vec: torch.Tensor) -> torch.Tensor:
+        if stage_vec.dim() == 1:
+            stage_vec = stage_vec.unsqueeze(0).expand(x.size(0), -1)
+        g, b = self.mlp(stage_vec).chunk(2, dim=-1)
+        g = g.unsqueeze(1)
+        b = b.unsqueeze(1)
+        x = self.norm(x)
+        return x * (1 + torch.tanh(g)) + torch.tanh(b)
+class NARSinglePass(nn.Module):
+    def __init__(
+        self, cfg: SoproTTSConfig, d_model: int, stage_specs: Dict[str, List[int]]
+    ):
+        super().__init__()
+        self.cfg = cfg
+        self.stage_names = [
+            s for s in ["B", "C", "D", "E"] if len(stage_specs.get(s, [])) > 0
+        ]
+        self.stage_to_id = {s: i for i, s in enumerate(self.stage_names)}
+        self.stage_specs = {s: stage_specs[s] for s in self.stage_names}
+        ks = int(cfg.nar_kernel_size)
+        cycle = tuple(int(x) for x in cfg.nar_dilation_cycle) or (1,)
+        dils: List[int] = []
+        while len(dils) < int(cfg.n_layers_nar):
+            dils.extend(cycle)
+        dils = dils[: int(cfg.n_layers_nar)]
+        self.blocks = nn.ModuleList(
+            [
+                SSMLiteBlock(
+                    d_model, cfg.dropout, causal=False, kernel_size=ks, dilation=int(d)
+                )
+                for d in dils
+            ]
+        )
+        self.norm = RMSNorm(d_model)
+        self.pre = nn.Linear(d_model, int(cfg.nar_head_dim))
+        self.stage_emb = nn.Embedding(len(self.stage_names), d_model)
+        self.adapter = NARStageAdapter(d_model, hidden=256)
+        self.heads = nn.ModuleDict()
+        self.head_id_emb = nn.ModuleDict()
+        for s in self.stage_names:
+            n_heads = len(self.stage_specs[s])
+            self.heads[s] = nn.ModuleList(
+                [
+                    nn.Linear(int(cfg.nar_head_dim), int(cfg.codebook_size))
+                    for _ in range(n_heads)
+                ]
+            )
+            emb = nn.Embedding(n_heads, int(cfg.nar_head_dim))
+            nn.init.zeros_(emb.weight)
+            self.head_id_emb[s] = emb
+        self.mix = nn.ParameterDict(
+            {
+                s: nn.Parameter(torch.zeros(2, dtype=torch.float32))
+                for s in self.stage_names
+            }
+        )
+    def forward_stage(
+        self, stage: str, cond: torch.Tensor, prev_emb: torch.Tensor
+    ) -> List[torch.Tensor]:
+        if stage not in self.heads:
+            return []
+        w = torch.softmax(self.mix[stage], dim=0)
+        x = w[0] * cond + w[1] * prev_emb
+        sid = self.stage_to_id[stage]
+        stage_vec = self.stage_emb.weight[sid]
+        x = self.adapter(x, stage_vec)
+        for blk in self.blocks:
+            x = blk(x)
+        x = self.norm(x)
+        z = self.pre(x)
+        outs: List[torch.Tensor] = []
+        for i, head in enumerate(self.heads[stage]):
+            hb = (
+                self.head_id_emb[stage]
+                .weight[i]
+                .view(1, 1, -1)
+                .to(dtype=z.dtype, device=z.device)
+            )
+            outs.append(head(z + hb))
+        return outs

sopro/nn/ref.py ADDED Viewed

@@ -0,0 +1,160 @@
+from __future__ import annotations
+from typing import Dict, List, Optional, Tuple
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from .blocks import RMSNorm
+def _rms_per_token(x: torch.Tensor, eps: float = 1e-6) -> torch.Tensor:
+    return torch.sqrt(x.float().pow(2).mean(dim=-1, keepdim=True) + eps)
+class RefXAttnBlock(nn.Module):
+    def __init__(self, d_model: int, heads: int = 2, gmax: float = 0.35):
+        super().__init__()
+        assert d_model % heads == 0
+        self.d_model = int(d_model)
+        self.heads = int(heads)
+        self.head_dim = self.d_model // self.heads
+        self.gmax = float(gmax)
+        self.nq = RMSNorm(self.d_model)
+        self.nkv = RMSNorm(self.d_model)
+        self.q_proj = nn.Linear(self.d_model, self.d_model, bias=False)
+        self.k_proj = nn.Linear(self.d_model, self.d_model, bias=False)
+        self.v_proj = nn.Linear(self.d_model, self.d_model, bias=False)
+        self.out_proj = nn.Linear(self.d_model, self.d_model, bias=False)
+        self.gate = nn.Parameter(torch.tensor(0.0))
+    def _to_heads(self, t: torch.Tensor) -> torch.Tensor:
+        B, T, D = t.shape
+        return t.view(B, T, self.heads, self.head_dim).transpose(1, 2)
+    def _from_heads(self, t: torch.Tensor) -> torch.Tensor:
+        B, H, T, Hd = t.shape
+        return t.transpose(1, 2).contiguous().view(B, T, H * Hd)
+    def build_kv_cache(
+        self,
+        context: torch.Tensor,
+        key_padding_mask: Optional[torch.Tensor] = None,
+    ) -> Dict[str, torch.Tensor]:
+        kv = self.nkv(context)
+        k = self._to_heads(self.k_proj(kv))
+        v = self._to_heads(self.v_proj(kv))
+        return {"k": k, "v": v, "key_padding_mask": key_padding_mask}
+    def forward(
+        self,
+        x: torch.Tensor,
+        *,
+        context: Optional[torch.Tensor] = None,
+        key_padding_mask: Optional[torch.Tensor] = None,
+        kv_cache: Optional[Dict[str, torch.Tensor]] = None,
+        use_cache: bool = False,
+    ):
+        q = self.nq(x)
+        q = self._to_heads(self.q_proj(q))
+        if kv_cache is None:
+            if context is None:
+                raise ValueError("context must be provided when kv_cache is None")
+            kv_cache = self.build_kv_cache(context, key_padding_mask=key_padding_mask)
+        k = kv_cache["k"]
+        v = kv_cache["v"]
+        kpm = kv_cache.get("key_padding_mask", None)
+        attn_bias = None
+        if kpm is not None:
+            kpm = kpm.to(torch.bool)
+            B = q.size(0)
+            S = k.size(-2)
+            attn_bias = torch.zeros((B, 1, 1, S), device=q.device, dtype=torch.float32)
+            attn_bias = attn_bias.masked_fill(kpm[:, None, None, :], float("-inf"))
+            bad = kpm.all(dim=1)
+            if bad.any():
+                attn_bias = attn_bias.clone()
+                attn_bias[bad, :, :, 0] = 0.0
+        with torch.autocast(device_type=x.device.type, enabled=False):
+            a = F.scaled_dot_product_attention(
+                q.float(),
+                k.float(),
+                v.float(),
+                attn_mask=attn_bias,
+                dropout_p=0.0,
+                is_causal=False,
+            )
+        a = torch.nan_to_num(a, nan=0.0, posinf=0.0, neginf=0.0)
+        a = self._from_heads(a)
+        scale = (_rms_per_token(x) / _rms_per_token(a)).clamp(0.0, 10.0)
+        a = (a * scale).to(x.dtype)
+        a = self.out_proj(a)
+        gate_eff = (self.gmax * torch.tanh(self.gate)).to(x.dtype)
+        y = x + gate_eff * a
+        return (y, kv_cache) if use_cache else y
+class RefXAttnStack(nn.Module):
+    def __init__(
+        self, d_model: int, heads: int = 2, layers: int = 3, gmax: float = 0.35
+    ):
+        super().__init__()
+        self.blocks = nn.ModuleList(
+            [RefXAttnBlock(d_model, heads=heads, gmax=gmax) for _ in range(int(layers))]
+        )
+    def build_kv_caches(
+        self,
+        context: torch.Tensor,
+        key_padding_mask: Optional[torch.Tensor] = None,
+    ) -> List[Dict[str, torch.Tensor]]:
+        return [
+            blk.build_kv_cache(context, key_padding_mask=key_padding_mask)
+            for blk in self.blocks
+        ]
+    def forward(
+        self,
+        x: torch.Tensor,
+        *,
+        context: Optional[torch.Tensor] = None,
+        key_padding_mask: Optional[torch.Tensor] = None,
+        kv_caches: Optional[List[Dict[str, torch.Tensor]]] = None,
+        use_cache: bool = False,
+    ):
+        if use_cache:
+            if kv_caches is None:
+                if context is None:
+                    raise ValueError("context must be provided when kv_caches is None")
+                kv_caches = self.build_kv_caches(
+                    context, key_padding_mask=key_padding_mask
+                )
+            assert kv_caches is not None and len(kv_caches) == len(self.blocks)
+            new_caches: List[Dict[str, torch.Tensor]] = []
+            h = x
+            for blk, cache in zip(self.blocks, kv_caches):
+                h, cache2 = blk(h, kv_cache=cache, use_cache=True)
+                new_caches.append(cache2)
+            return h, new_caches
+        if context is None:
+            raise ValueError("context must be provided when use_cache=False")
+        h = x
+        for blk in self.blocks:
+            h = blk(h, context=context, key_padding_mask=key_padding_mask)
+        return h

sopro/nn/speaker.py CHANGED Viewed

@@ -11,7 +11,7 @@ from .blocks import AttentiveStatsPool, DepthwiseConv1d
 class Token2SV(nn.Module):
     def __init__(
-        self, Q: int, V: int, d: int = 192, out_dim: int = 256, dropout: float = 0.05
+        self, Q: int, V: int, d: int = 192, out_dim: int = 192, dropout: float = 0.05
     ):
         super().__init__()
         self.Q, self.V = int(Q), int(V)
@@ -27,7 +27,6 @@ class Token2SV(nn.Module):
             DepthwiseConv1d(d, 7, causal=False),
             nn.GELU(),
         )
         self.pool = AttentiveStatsPool(d)
         self.proj = nn.Linear(2 * d, out_dim)
@@ -39,26 +38,24 @@ class Token2SV(nn.Module):
         self, tokens_btq: torch.Tensor, lengths: Optional[torch.Tensor] = None
     ) -> torch.Tensor:
         B, T, Q = tokens_btq.shape
-        q_idx = torch.arange(Q, device=tokens_btq.device, dtype=torch.long).view(
-            1, 1, Q
-        )
+        device = tokens_btq.device
+        if lengths is not None:
+            valid = torch.arange(T, device=device)[None, :] < lengths[:, None]
+        else:
+            valid = torch.ones(B, T, device=device, dtype=torch.bool)
+        q_idx = torch.arange(Q, device=device, dtype=torch.long).view(1, 1, Q)
         idx = q_idx * self.V + tokens_btq.long()
         raw_emb = self.emb(idx)
-        if self.training:
-            keep_prob = 0.95
-            mask = torch.rand(B, T, device=tokens_btq.device) < keep_prob
-            bad = mask.sum(dim=1) == 0
-            if bad.any():
-                bad_idx = bad.nonzero(as_tuple=False).squeeze(1)
-                rand_pos = torch.randint(
-                    0, T, (bad_idx.numel(),), device=tokens_btq.device
-                )
-                mask[bad_idx, rand_pos] = True
-            raw_emb = raw_emb * mask.float().unsqueeze(-1).unsqueeze(-1)
+        raw_emb = raw_emb * valid[:, :, None, None].to(raw_emb.dtype)
         x = self._get_mixed_embedding(raw_emb)
+        x = x * valid[:, :, None].to(x.dtype)
         h = self.enc(x)
+        h = h * valid[:, :, None].to(h.dtype)
         pooled = self.pool(h, lengths=lengths)
         e = self.proj(pooled)
         return F.normalize(e, dim=-1, eps=1e-6)

sopro 1.0.1__py3-none-any.whl → 1.5.0__py3-none-any.whl

sopro 1.0.1py3-none-any.whl → 1.5.0py3-none-any.whl