PyPI - torch-rechub - Versions diffs - 0.1.0__py3-none-any.whl → 0.3.0__py3-none-any.whl - Mend

torch-rechub 0.1.0py3-none-any.whl → 0.3.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

torch_rechub/basic/layers.py +15 -9
torch_rechub/basic/loss_func.py +10 -4
torch_rechub/models/matching/narm.py +43 -20
torch_rechub/models/matching/sasrec.py +55 -5
torch_rechub/models/matching/stamp.py +43 -15
torch_rechub/trainers/match_trainer.py +54 -6
torch_rechub/utils/data.py +28 -12
torch_rechub/utils/match.py +61 -1
{torch_rechub-0.1.0.dist-info → torch_rechub-0.3.0.dist-info}/METADATA +31 -18
{torch_rechub-0.1.0.dist-info → torch_rechub-0.3.0.dist-info}/RECORD +12 -12
{torch_rechub-0.1.0.dist-info → torch_rechub-0.3.0.dist-info}/WHEEL +0 -0
{torch_rechub-0.1.0.dist-info → torch_rechub-0.3.0.dist-info}/licenses/LICENSE +0 -0

torch_rechub/basic/layers.py CHANGED Viewed

@@ -846,7 +846,7 @@ class HSTULayer(nn.Module):
         self.dropout = nn.Dropout(dropout)
         # Scaling factor for attention scores
-        self.scale = 1.0 / (dqk**0.5)
+        # self.scale = 1.0 / (dqk**0.5)  # Removed in favor of L2 norm + SiLU
     def forward(self, x, rel_pos_bias=None):
         """Forward pass of a single HSTU layer.
@@ -878,6 +878,10 @@ class HSTULayer(nn.Module):
         u = proj_out[..., 2 * self.n_heads * self.dqk:2 * self.n_heads * self.dqk + self.n_heads * self.dv].reshape(batch_size, seq_len, self.n_heads, self.dv)
         v = proj_out[..., 2 * self.n_heads * self.dqk + self.n_heads * self.dv:].reshape(batch_size, seq_len, self.n_heads, self.dv)
+        # Apply L2 normalization to Q and K (HSTU specific)
+        q = F.normalize(q, p=2, dim=-1)
+        k = F.normalize(k, p=2, dim=-1)
         # Transpose to (B, H, L, dqk/dv)
         q = q.transpose(1, 2)  # (B, H, L, dqk)
         k = k.transpose(1, 2)  # (B, H, L, dqk)
@@ -885,20 +889,22 @@ class HSTULayer(nn.Module):
         v = v.transpose(1, 2)  # (B, H, L, dv)
         # Compute attention scores: (B, H, L, L)
-        scores = torch.matmul(q, k.transpose(-2, -1)) * self.scale
+        # Note: No scaling factor here as we use L2 norm + SiLU
+        scores = torch.matmul(q, k.transpose(-2, -1))
+        # Add relative position bias if provided (before masking/activation)
+        if rel_pos_bias is not None:
+            scores = scores + rel_pos_bias
         # Add causal mask (prevent attending to future positions)
         # For generative models this is required so that position i only attends
         # to positions <= i.
         causal_mask = torch.tril(torch.ones(seq_len, seq_len, device=x.device, dtype=torch.bool))
-        scores = scores.masked_fill(~causal_mask.unsqueeze(0).unsqueeze(0), float('-inf'))
-        # Add relative position bias if provided
-        if rel_pos_bias is not None:
-            scores = scores + rel_pos_bias
+        # Use a large negative number for masking compatible with SiLU
+        scores = scores.masked_fill(~causal_mask.unsqueeze(0).unsqueeze(0), -1e4)
-        # Softmax over attention scores
-        attn_weights = F.softmax(scores, dim=-1)
+        # SiLU activation over attention scores (HSTU specific)
+        attn_weights = F.silu(scores)
         attn_weights = self.dropout(attn_weights)
         # Attention output: (B, H, L, dv)

torch_rechub/basic/loss_func.py CHANGED Viewed

@@ -81,7 +81,8 @@ class HingeLoss(torch.nn.Module):
         self.margin = margin
         self.n_items = num_items
-    def forward(self, pos_score, neg_score):
+    def forward(self, pos_score, neg_score, in_batch_neg=False):
+        pos_score = pos_score.view(-1)
         loss = torch.maximum(torch.max(neg_score, dim=-1).values - pos_score + self.margin, torch.tensor([0]).type_as(pos_score))
         if self.n_items is not None:
             impostors = neg_score - pos_score.view(-1, 1) + self.margin > 0
@@ -96,9 +97,14 @@ class BPRLoss(torch.nn.Module):
     def __init__(self):
         super().__init__()
-    def forward(self, pos_score, neg_score):
-        loss = torch.mean(-(pos_score - neg_score).sigmoid().log(), dim=-1)
-        return loss
+    def forward(self, pos_score, neg_score, in_batch_neg=False):
+        pos_score = pos_score.view(-1)
+        if neg_score.dim() == 1:
+            diff = pos_score - neg_score
+        else:
+            diff = pos_score.view(-1, 1) - neg_score
+        loss = -diff.sigmoid().log()
+        return loss.mean()
 class NCELoss(torch.nn.Module):

torch_rechub/models/matching/narm.py CHANGED Viewed

@@ -17,12 +17,14 @@ from torch.nn import GRU, Dropout, Embedding, Parameter
 class NARM(nn.Module):
-    def __init__(self, item_history_feature, hidden_dim, emb_dropout_p, session_rep_dropout_p):
+    def __init__(self, item_history_feature, hidden_dim, emb_dropout_p, session_rep_dropout_p, item_feature=None):
         super(NARM, self).__init__()
         # item embedding layer
         self.item_history_feature = item_history_feature
+        self.item_feature = item_feature  # Optional: for in-batch negative sampling
         self.item_emb = Embedding(item_history_feature.vocab_size, item_history_feature.embed_dim, padding_idx=0)
+        self.mode = None  # For inference: "user" or "item"
         # embedding dropout layer
         self.emb_dropout = Dropout(emb_dropout_p)
@@ -42,41 +44,62 @@ class NARM(nn.Module):
         # bilinear projection matrix
         self.b = Parameter(torch.randn(item_history_feature.embed_dim, hidden_dim * 2))
-    def forward(self, input_dict):
-        # Eq. 1-4, index item embeddings and pass through gru
-        # # Fetch the embeddings for items in the session
+    def _compute_session_repr(self, input_dict):
+        """Compute session representation (user embedding before bilinear transform)."""
         input = input_dict[self.item_history_feature.name]
         value_mask = (input != 0)
         value_counts = value_mask.sum(dim=1, keepdim=False).to("cpu").detach()
         embs = rnn_utils.pack_padded_sequence(self.emb_dropout(self.item_emb(input)), value_counts, batch_first=True, enforce_sorted=False)
-        # # compute hidden states at each time step
         h, h_t = self.gru(embs)
         h_t = h_t.permute(1, 0, 2)
         h, _ = rnn_utils.pad_packed_sequence(h, batch_first=True)
-        # Eq. 5, set last hidden state of gru as the output of the global
-        # encoder
         c_g = h_t.squeeze(1)
-        # Eq. 8, compute similarity between final hidden state and previous
-        # hidden states
         q = sigmoid(h_t @ self.a_1.T + h @ self.a_2.T) @ self.v
-        # Eq. 7, compute attention
         alpha = torch.exp(q) * value_mask.unsqueeze(-1)
         alpha /= alpha.sum(dim=1, keepdim=True)
-        # Eq. 6, compute the output of the local encoder
         c_l = (alpha * h).sum(1)
-        # Eq. 9, compute session representation by concatenating user
-        # sequential behavior (global) and main purpose in the current session
-        # (local)
         c = self.session_rep_dropout(torch.hstack((c_g, c_l)))
+        return c
+    def user_tower(self, x):
+        """Compute user embedding for in-batch negative sampling."""
+        if self.mode == "item":
+            return None
+        c = self._compute_session_repr(x)
+        user_emb = c @ self.b.T  # [batch_size, embed_dim]
+        if self.mode == "user":
+            return user_emb
+        return user_emb.unsqueeze(1)  # [batch_size, 1, embed_dim]
+    def item_tower(self, x):
+        """Compute item embedding for in-batch negative sampling."""
+        if self.mode == "user":
+            return None
+        if self.item_feature is not None:
+            item_ids = x[self.item_feature.name]
+            item_emb = self.item_emb(item_ids)  # [batch_size, embed_dim]
+            if self.mode == "item":
+                return item_emb
+            return item_emb.unsqueeze(1)  # [batch_size, 1, embed_dim]
+        return None
-        # Eq. 10, compute bilinear similarity between current session and each
-        # candidate items
+    def forward(self, input_dict):
+        # Support inference mode
+        if self.mode == "user":
+            return self.user_tower(input_dict)
+        if self.mode == "item":
+            return self.item_tower(input_dict)
+        # In-batch negative sampling mode
+        if self.item_feature is not None:
+            user_emb = self.user_tower(input_dict)  # [batch_size, 1, embed_dim]
+            item_emb = self.item_tower(input_dict)  # [batch_size, 1, embed_dim]
+            return torch.mul(user_emb, item_emb).sum(dim=-1).squeeze()
+        # Original behavior: compute scores for all items
+        c = self._compute_session_repr(input_dict)
         s = c @ self.b.T @ self.item_emb.weight.T
         return s

torch_rechub/models/matching/sasrec.py CHANGED Viewed

@@ -21,6 +21,7 @@ class SASRec(torch.nn.Module):
         max_len: The length of the sequence feature.
         num_blocks: The number of stacks of attention modules.
         num_heads: The number of heads in MultiheadAttention.
+        item_feature: Optional item feature for in-batch negative sampling mode.
     """
@@ -31,9 +32,15 @@ class SASRec(torch.nn.Module):
         dropout_rate=0.5,
         num_blocks=2,
         num_heads=1,
+        item_feature=None,
     ):
         super(SASRec, self).__init__()
+        self.features = features
+        self.item_feature = item_feature  # Optional: for in-batch negative sampling
+        self.mode = None  # For inference: "user" or "item"
+        self.max_len = max_len
         self.features = features
         self.item_num = self.features[0].vocab_size
@@ -94,17 +101,60 @@ class SASRec(torch.nn.Module):
         return seq_output
+    def user_tower(self, x):
+        """Compute user embedding for in-batch negative sampling.
+        Takes the last valid position's output as user representation.
+        """
+        if self.mode == "item":
+            return None
+        # Get sequence embedding
+        seq_embed = self.item_emb(x, self.features[:1])[:, 0]  # Only use seq feature
+        seq_output = self.seq_forward(x, seq_embed)  # [batch_size, max_len, embed_dim]
+        # Get the last valid position for each sequence
+        seq = x['seq']
+        seq_lens = (seq != 0).sum(dim=1) - 1  # Last valid index
+        seq_lens = seq_lens.clamp(min=0)
+        batch_idx = torch.arange(seq_output.size(0), device=seq_output.device)
+        user_emb = seq_output[batch_idx, seq_lens]  # [batch_size, embed_dim]
+        if self.mode == "user":
+            return user_emb
+        return user_emb.unsqueeze(1)  # [batch_size, 1, embed_dim]
+    def item_tower(self, x):
+        """Compute item embedding for in-batch negative sampling."""
+        if self.mode == "user":
+            return None
+        if self.item_feature is not None:
+            item_ids = x[self.item_feature.name]
+            # Use the embedding layer to get item embeddings
+            item_emb = self.item_emb.embedding[self.features[0].name](item_ids)
+            if self.mode == "item":
+                return item_emb
+            return item_emb.unsqueeze(1)  # [batch_size, 1, embed_dim]
+        return None
     def forward(self, x):
-        # (batch_size, 3, max_len, embed_dim)
+        # Support inference mode
+        if self.mode == "user":
+            return self.user_tower(x)
+        if self.mode == "item":
+            return self.item_tower(x)
+        # In-batch negative sampling mode
+        if self.item_feature is not None:
+            user_emb = self.user_tower(x)  # [batch_size, 1, embed_dim]
+            item_emb = self.item_tower(x)  # [batch_size, 1, embed_dim]
+            return torch.mul(user_emb, item_emb).sum(dim=-1).squeeze()
+        # Original behavior: pairwise loss with pos/neg sequences
         embedding = self.item_emb(x, self.features)
-        # (batch_size, max_len, embed_dim)
         seq_embed, pos_embed, neg_embed = embedding[:, 0], embedding[:, 1], embedding[:, 2]
-        # (batch_size, max_len, embed_dim)
         seq_output = self.seq_forward(x, seq_embed)
         pos_logits = (seq_output * pos_embed).sum(dim=-1)
-        neg_logits = (seq_output * neg_embed).sum(dim=-1)  # (batch_size, max_len)
+        neg_logits = (seq_output * neg_embed).sum(dim=-1)
         return pos_logits, neg_logits

torch_rechub/models/matching/stamp.py CHANGED Viewed

@@ -14,13 +14,15 @@ import torch.nn.functional as F
 class STAMP(nn.Module):
-    def __init__(self, item_history_feature, weight_std, emb_std):
+    def __init__(self, item_history_feature, weight_std, emb_std, item_feature=None):
         super(STAMP, self).__init__()
         # item embedding layer
         self.item_history_feature = item_history_feature
+        self.item_feature = item_feature  # Optional: for in-batch negative sampling
         n_items, item_emb_dim, = item_history_feature.vocab_size, item_history_feature.embed_dim
         self.item_emb = nn.Embedding(n_items, item_emb_dim, padding_idx=0)
+        self.mode = None  # For inference: "user" or "item"
         # weights and biases for attention computation
         self.w_0 = nn.Parameter(torch.zeros(item_emb_dim, 1))
@@ -50,32 +52,58 @@ class STAMP(nn.Module):
         elif isinstance(module, nn.Embedding):
             module.weight.data.normal_(std=self.emb_std)
-    def forward(self, input_dict):
-        # Index the embeddings for the items in the session
+    def _compute_user_repr(self, input_dict):
+        """Compute user representation (h_s * h_t)."""
         input = input_dict[self.item_history_feature.name]
         value_mask = (input != 0).unsqueeze(-1)
         value_counts = value_mask.sum(dim=1, keepdim=True).squeeze(-1)
         item_emb_batch = self.item_emb(input) * value_mask
-        # Index the embeddings of the latest clicked items
         x_t = self.item_emb(torch.gather(input, 1, value_counts - 1))
-        # Eq. 2, user's general interest in the current session
         m_s = ((item_emb_batch).sum(1) / value_counts).unsqueeze(1)
-        # Eq. 7, compute attention coefficient
         a = F.normalize(torch.exp(torch.sigmoid(item_emb_batch @ self.w_1_t + x_t @ self.w_2_t + m_s @ self.w_3_t + self.b_a) @ self.w_0) * value_mask, p=1, dim=1)
-        # Eq. 8, compute user's attention-based interests
         m_a = (a * item_emb_batch).sum(1) + m_s.squeeze(1)
-        # Eq. 3, compute the output state of the general interest
         h_s = self.f_s(m_a)
-        # Eq. 9, compute the output state of the short-term interest
         h_t = self.f_t(x_t).squeeze(1)
+        return h_s * h_t  # [batch_size, embed_dim]
+    def user_tower(self, x):
+        """Compute user embedding for in-batch negative sampling."""
+        if self.mode == "item":
+            return None
+        user_emb = self._compute_user_repr(x)
+        if self.mode == "user":
+            return user_emb
+        return user_emb.unsqueeze(1)  # [batch_size, 1, embed_dim]
+    def item_tower(self, x):
+        """Compute item embedding for in-batch negative sampling."""
+        if self.mode == "user":
+            return None
+        if self.item_feature is not None:
+            item_ids = x[self.item_feature.name]
+            item_emb = self.item_emb(item_ids)  # [batch_size, embed_dim]
+            if self.mode == "item":
+                return item_emb
+            return item_emb.unsqueeze(1)  # [batch_size, 1, embed_dim]
+        return None
-        # Eq. 4, compute candidate scores
-        z = h_s * h_t @ self.item_emb.weight.T
+    def forward(self, input_dict):
+        # Support inference mode
+        if self.mode == "user":
+            return self.user_tower(input_dict)
+        if self.mode == "item":
+            return self.item_tower(input_dict)
+        # In-batch negative sampling mode
+        if self.item_feature is not None:
+            user_emb = self.user_tower(input_dict)  # [batch_size, 1, embed_dim]
+            item_emb = self.item_tower(input_dict)  # [batch_size, 1, embed_dim]
+            return torch.mul(user_emb, item_emb).sum(dim=-1).squeeze()
+        # Original behavior: compute scores for all items
+        user_repr = self._compute_user_repr(input_dict)
+        z = user_repr @ self.item_emb.weight.T
         return z

torch_rechub/trainers/match_trainer.py CHANGED Viewed

@@ -6,6 +6,7 @@ from sklearn.metrics import roc_auc_score
 from ..basic.callback import EarlyStopper
 from ..basic.loss_func import BPRLoss, RegularizationLoss
+from ..utils.match import gather_inbatch_logits, inbatch_negative_sampling
 class MatchTrainer(object):
@@ -23,12 +24,20 @@ class MatchTrainer(object):
         device (str): `"cpu"` or `"cuda:0"`
         gpus (list): id of multi gpu (default=[]). If the length >=1, then the model will wrapped by nn.DataParallel.
         model_path (str): the path you want to save the model (default="./"). Note only save the best weight in the validation data.
+        in_batch_neg (bool): whether to use in-batch negative sampling instead of global negatives.
+        in_batch_neg_ratio (int): number of negatives to draw from the batch per positive sample when in_batch_neg is True.
+        hard_negative (bool): whether to choose hardest negatives within batch (top-k by score) instead of uniform random.
+        sampler_seed (int): optional random seed for in-batch sampler to ease reproducibility/testing.
     """
     def __init__(
         self,
         model,
         mode=0,
+        in_batch_neg=False,
+        in_batch_neg_ratio=None,
+        hard_negative=False,
+        sampler_seed=None,
         optimizer_fn=torch.optim.Adam,
         optimizer_params=None,
         regularization_params=None,
@@ -51,13 +60,30 @@ class MatchTrainer(object):
         # torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
         self.device = torch.device(device)
         self.model.to(self.device)
+        self.in_batch_neg = in_batch_neg
+        self.in_batch_neg_ratio = in_batch_neg_ratio
+        self.hard_negative = hard_negative
+        self._sampler_generator = None
+        if sampler_seed is not None:
+            self._sampler_generator = torch.Generator(device=self.device)
+            self._sampler_generator.manual_seed(sampler_seed)
+        # Check model compatibility for in-batch negative sampling
+        if in_batch_neg:
+            base_model = model.module if isinstance(model, torch.nn.DataParallel) else model
+            if not hasattr(base_model, 'user_tower') or not hasattr(base_model, 'item_tower'):
+                raise ValueError(
+                    f"Model {type(base_model).__name__} does not support in-batch negative sampling. "
+                    "Only two-tower models with user_tower() and item_tower() methods are supported, "
+                    "such as DSSM, YoutubeDNN, MIND, GRU4Rec, SINE, ComiRec, SASRec, NARM, STAMP, etc."
+                )
         if optimizer_params is None:
             optimizer_params = {"lr": 1e-3, "weight_decay": 1e-5}
         if regularization_params is None:
             regularization_params = {"embedding_l1": 0.0, "embedding_l2": 0.0, "dense_l1": 0.0, "dense_l2": 0.0}
         self.mode = mode
         if mode == 0:  # point-wise loss, binary cross_entropy
-            self.criterion = torch.nn.BCELoss()  # default loss binary cross_entropy
+            # With in-batch negatives we treat it as list-wise classification over sampled negatives
+            self.criterion = torch.nn.CrossEntropyLoss() if in_batch_neg else torch.nn.BCELoss()
         elif mode == 1:  # pair-wise loss
             self.criterion = BPRLoss()
         elif mode == 2:  # list-wise loss, softmax
@@ -89,12 +115,34 @@ class MatchTrainer(object):
                 y = y.float()  # torch._C._nn.binary_cross_entropy expected Float
             else:
                 y = y.long()  #
-            if self.mode == 1:  # pair_wise
-                pos_score, neg_score = self.model(x_dict)
-                loss = self.criterion(pos_score, neg_score)
+            if self.in_batch_neg:
+                base_model = self.model.module if isinstance(self.model, torch.nn.DataParallel) else self.model
+                user_embedding = base_model.user_tower(x_dict)
+                item_embedding = base_model.item_tower(x_dict)
+                if user_embedding is None or item_embedding is None:
+                    raise ValueError("Model must return user/item embeddings when in_batch_neg is True.")
+                if user_embedding.dim() > 2 and user_embedding.size(1) == 1:
+                    user_embedding = user_embedding.squeeze(1)
+                if item_embedding.dim() > 2 and item_embedding.size(1) == 1:
+                    item_embedding = item_embedding.squeeze(1)
+                if user_embedding.dim() != 2 or item_embedding.dim() != 2:
+                    raise ValueError(f"In-batch negative sampling requires 2D embeddings, got shapes {user_embedding.shape} and {item_embedding.shape}")
+                scores = torch.matmul(user_embedding, item_embedding.t())  # bs x bs
+                neg_indices = inbatch_negative_sampling(scores, neg_ratio=self.in_batch_neg_ratio, hard_negative=self.hard_negative, generator=self._sampler_generator)
+                logits = gather_inbatch_logits(scores, neg_indices)
+                if self.mode == 1:  # pair_wise
+                    loss = self.criterion(logits[:, 0], logits[:, 1:], in_batch_neg=True)
+                else:  # point-wise/list-wise -> cross entropy on sampled logits
+                    targets = torch.zeros(logits.size(0), dtype=torch.long, device=self.device)
+                    loss = self.criterion(logits, targets)
             else:
-                y_pred = self.model(x_dict)
-                loss = self.criterion(y_pred, y)
+                if self.mode == 1:  # pair_wise
+                    pos_score, neg_score = self.model(x_dict)
+                    loss = self.criterion(pos_score, neg_score)
+                else:
+                    y_pred = self.model(x_dict)
+                    loss = self.criterion(y_pred, y)
             # Add regularization loss
             reg_loss = self.reg_loss_fn(self.model)

torch_rechub/utils/data.py CHANGED Viewed

@@ -482,41 +482,57 @@ class SequenceDataGenerator(object):
         # Underlying dataset
         self.dataset = SeqDataset(seq_tokens, seq_positions, targets, seq_time_diffs)
-    def generate_dataloader(self, batch_size=32, num_workers=0, split_ratio=None):
-        """Generate train/val/test dataloaders.
+    def generate_dataloader(self, batch_size=32, num_workers=0, split_ratio=None, shuffle=True):
+        """Generate dataloader(s) from the dataset.
         Parameters
         ----------
         batch_size : int, default=32
+            Batch size for DataLoader.
         num_workers : int, default=0
-        split_ratio : tuple, default (0.7, 0.1, 0.2)
-            Train/val/test split.
+            Number of workers for DataLoader.
+        split_ratio : tuple or None, default=None
+            If None, returns a single DataLoader without splitting the data.
+            If tuple (e.g., (0.7, 0.1, 0.2)), splits dataset and returns
+            (train_loader, val_loader, test_loader).
+        shuffle : bool, default=True
+            Whether to shuffle data. Only applies when split_ratio is None.
+            When split_ratio is provided, train data is always shuffled.
         Returns
         -------
         tuple
-            (train_loader, val_loader, test_loader)
+            If split_ratio is None: returns (dataloader,)
+            If split_ratio is provided: returns (train_loader, val_loader, test_loader)
+        Examples
+        --------
+        # Case 1: Data already split, just create loader
+        >>> train_gen = SequenceDataGenerator(train_data['seq_tokens'], ...)
+        >>> train_loader = train_gen.generate_dataloader(batch_size=32)[0]
+        # Case 2: Auto-split data into train/val/test
+        >>> all_gen = SequenceDataGenerator(all_data['seq_tokens'], ...)
+        >>> train_loader, val_loader, test_loader = all_gen.generate_dataloader(
+        ...     batch_size=32, split_ratio=(0.7, 0.1, 0.2))
         """
         if split_ratio is None:
-            split_ratio = (0.7, 0.1, 0.2)
+            # No split - data is already divided, just create a single DataLoader
+            dataloader = DataLoader(self.dataset, batch_size=batch_size, shuffle=shuffle, num_workers=num_workers)
+            return (dataloader,)
-        # 验证分割比例
+        # Split data into train/val/test
         assert abs(sum(split_ratio) - 1.0) < 1e-6, "split_ratio must sum to 1.0"
-        # 计算分割大小
         total_size = len(self.dataset)
         train_size = int(total_size * split_ratio[0])
         val_size = int(total_size * split_ratio[1])
         test_size = total_size - train_size - val_size
-        # 分割数据集
         train_dataset, val_dataset, test_dataset = random_split(self.dataset, [train_size, val_size, test_size])
-        # 创建数据加载器
         train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=num_workers)
         val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers)
         test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers)
         return train_loader, val_loader, test_loader

torch_rechub/utils/match.py CHANGED Viewed

@@ -4,6 +4,7 @@ from collections import Counter, OrderedDict
 import numpy as np
 import pandas as pd
+import torch
 import tqdm
 from .data import df_to_dict, pad_sequences
@@ -16,7 +17,6 @@ except ImportError:
     ANNOY_AVAILABLE = False
 try:
-    import torch
     from pymilvus import Collection, CollectionSchema, DataType, FieldSchema, connections, utility
     MILVUS_AVAILABLE = True
 except ImportError:
@@ -101,6 +101,66 @@ def negative_sample(items_cnt_order, ratio, method_id=0):
     return neg_items
+def inbatch_negative_sampling(scores, neg_ratio=None, hard_negative=False, generator=None):
+    """Generate in-batch negative indices from a similarity matrix.
+    This mirrors the offline ``negative_sample`` API by only returning sampled
+    indices; score gathering is handled separately to keep responsibilities clear.
+    Args:
+        scores (torch.Tensor): similarity matrix with shape (batch_size, batch_size).
+        neg_ratio (int, optional): number of negatives for each positive sample.
+            Defaults to batch_size-1 when omitted or out of range.
+        hard_negative (bool, optional): whether to pick top-k highest scores as negatives
+            instead of uniform random sampling. Defaults to False.
+        generator (torch.Generator, optional): generator to control randomness for tests/reproducibility.
+    Returns:
+        torch.Tensor: sampled negative indices with shape (batch_size, neg_ratio).
+    """
+    if scores.dim() != 2:  # must be batch_size x batch_size
+        raise ValueError(f"inbatch_negative_sampling expects 2D scores, got shape {tuple(scores.shape)}")
+    batch_size = scores.size(0)
+    if batch_size <= 1:
+        raise ValueError("In-batch negative sampling requires batch_size > 1")
+    max_neg = batch_size - 1  # each col can provide at most batch_size-1 negatives
+    if neg_ratio is None or neg_ratio <= 0 or neg_ratio > max_neg:
+        neg_ratio = max_neg
+    device = scores.device
+    index_range = torch.arange(batch_size, device=device)
+    neg_indices = torch.empty((batch_size, neg_ratio), dtype=torch.long, device=device)
+    # for each sample, pick neg_ratio negatives
+    for i in range(batch_size):
+        if hard_negative:
+            row_scores = scores[i].clone()
+            row_scores[i] = float("-inf")  # mask positive
+            topk = torch.topk(row_scores, k=neg_ratio).indices
+            neg_indices[i] = topk
+        else:
+            candidates = torch.cat([index_range[:i], index_range[i + 1:]])  # all except i
+            perm = torch.randperm(candidates.size(0), device=device, generator=generator)  # random negative sampling
+            neg_indices[i] = candidates[perm[:neg_ratio]]
+    return neg_indices
+def gather_inbatch_logits(scores, neg_indices):
+    """
+    scores: (B, B)
+        scores[i][j] = user_i ⋅ item_j
+    neg_indices: (B, K)
+        neg_indices[i] = the K negative items for user_i
+    """
+    # positive: scores[i][i]
+    positive_logits = torch.diagonal(scores).reshape(-1, 1)  # (B,1)
+    # negatives: scores[i][neg_indices[i, j]]
+    negative_logits = scores[torch.arange(scores.size(0)).unsqueeze(1), neg_indices]  # (B,K)
+    return torch.cat([positive_logits, negative_logits], dim=1)
 def generate_seq_feature_match(data, user_col, item_col, time_col, item_attribute_cols=None, sample_method=0, mode=0, neg_ratio=0, min_item=0):
     """Generate sequence feature and negative sample for match.

{torch_rechub-0.1.0.dist-info → torch_rechub-0.3.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: torch-rechub
-Version: 0.1.0
+Version: 0.3.0
 Summary: A Pytorch Toolbox for Recommendation Models, Easy-to-use and Easy-to-extend.
 Project-URL: Homepage, https://github.com/datawhalechina/torch-rechub
 Project-URL: Documentation, https://www.torch-rechub.com
@@ -31,7 +31,7 @@ Requires-Dist: transformers>=4.46.3
 Provides-Extra: annoy
 Requires-Dist: annoy>=1.17.2; extra == 'annoy'
 Provides-Extra: bigdata
-Requires-Dist: pyarrow~=21.0; extra == 'bigdata'
+Requires-Dist: pyarrow<23,>=21; extra == 'bigdata'
 Provides-Extra: dev
 Requires-Dist: bandit>=1.7.0; extra == 'dev'
 Requires-Dist: flake8>=3.8.0; extra == 'dev'
@@ -60,9 +60,13 @@ Requires-Dist: graphviz>=0.20; extra == 'visualization'
 Requires-Dist: torchview>=0.2.6; extra == 'visualization'
 Description-Content-Type: text/markdown
-# 🔥 Torch-RecHub - 轻量、高效、易用的 PyTorch 推荐系统框架
+<div align="center">
-> 🚀 **30+ 主流推荐模型** | 🎯 **开箱即用** | 📦 **一键部署 ONNX** | 🤖 **支持生成式推荐 (HSTU/HLLM)**
+![Torch-RecHub 横幅](docs/public/img/banner.png)
+# Torch-RecHub: 轻量、高效、易用的 PyTorch 推荐系统框架
+【⚠️ Alpha内测版本警告：此为早期内部构建版本，尚不完整且可能存在错误，欢迎大家提Issue反馈问题或建议。】
 [![许可证](https://img.shields.io/badge/license-MIT-blue?style=for-the-badge)](LICENSE)
 ![GitHub Repo stars](https://img.shields.io/github/stars/datawhalechina/torch-rechub?style=for-the-badge)
@@ -75,27 +79,21 @@ Description-Content-Type: text/markdown
 [![numpy 版本](https://img.shields.io/badge/numpy-1.19%2B-orange?style=for-the-badge)](https://numpy.org/)
 [![scikit-learn 版本](https://img.shields.io/badge/scikit_learn-0.23%2B-orange?style=for-the-badge)](https://scikit-learn.org/)
 [![torch-rechub 版本](https://img.shields.io/badge/torch_rechub-0.0.3%2B-orange?style=for-the-badge)](https://pypi.org/project/torch-rechub/)
+[![torchview](https://img.shields.io/badge/torchview-0.2%2B-green?style=for-the-badge)](https://github.com/mert-kurttutan/torchview)
 [English](README_en.md) | 简体中文
-**在线文档：** https://datawhalechina.github.io/torch-rechub/ （英文）| https://datawhalechina.github.io/torch-rechub/zh/ （简体中文）
+![架构图](docs/public/img/project_framework.png)
-**Torch-RecHub** —— **10 行代码实现工业级推荐系统**。30+ 主流模型开箱即用，支持一键 ONNX 部署，让你专注于业务而非工程。
+</div>
-![Torch-RecHub 横幅](docs/public/img/banner.png)
-## 🎯 为什么选择 Torch-RecHub？
+**在线文档：** https://datawhalechina.github.io/torch-rechub/zh/
-| 特性          | Torch-RecHub                | 其他框架   |
-| ------------- | --------------------------- | ---------- |
-| 代码行数      | **10行** 完成训练+评估+部署 | 100+ 行    |
-| 模型覆盖      | **30+** 主流模型            | 有限       |
-| 生成式推荐    | ✅ HSTU/HLLM (Meta 2024)     | ❌          |
-| ONNX 一键导出 | ✅ 内置支持                  | 需手动适配 |
-| 学习曲线      | 极低                        | 陡峭       |
+**Torch-RecHub** —— **10 行代码实现工业级推荐系统**。30+ 主流模型开箱即用，支持一键 ONNX 部署，让你专注于业务而非工程。
 ## ✨ 特性
+* **生成式推荐模型:** LLM时代下，可以复现部分生成式推荐模型
 * **模块化设计:** 易于添加新的模型、数据集和评估指标。
 * **基于 PyTorch:** 利用 PyTorch 的动态图和 GPU 加速能力。
 * **丰富的模型库:** 涵盖 **30+** 经典和前沿推荐算法（召回、排序、多任务、生成式推荐等）。
@@ -109,7 +107,6 @@ Description-Content-Type: text/markdown
 ## 📖 目录
 - [🔥 Torch-RecHub - 轻量、高效、易用的 PyTorch 推荐系统框架](#-torch-rechub---轻量高效易用的-pytorch-推荐系统框架)
-  - [🎯 为什么选择 Torch-RecHub？](#-为什么选择-torch-rechub)
   - [✨ 特性](#-特性)
   - [📖 目录](#-目录)
   - [🔧 安装](#-安装)
@@ -221,6 +218,8 @@ torch-rechub/             # 根目录
 本框架目前支持 **30+** 主流推荐模型：
+<details>
 ### 排序模型 (Ranking Models) - 13个
 | 模型          | 论文                                             | 简介                    |
@@ -236,7 +235,11 @@ torch-rechub/             # 根目录
 | **AutoInt**   | [CIKM 2019](https://arxiv.org/abs/1810.11921)    | 自动特征交互学习        |
 | **FiBiNET**   | [RecSys 2019](https://arxiv.org/abs/1905.09433)  | 特征重要性 + 双线性交互 |
 | **DeepFFM**   | [RecSys 2019](https://arxiv.org/abs/1611.00144)  | 场感知因子分解机        |
-| **EDCN**      | [KDD 2021](https://arxiv.org/abs/2106.03032)     | 增强型交叉网络          |
+| **EDCN**      | [KDD 2021](https://arxiv.org/abs/2106.03032)     | 增强型交叉网络
+        |
+</details>
+<details>
 ### 召回模型 (Matching Models) - 12个
@@ -253,6 +256,10 @@ torch-rechub/             # 根目录
 | **STAMP**      | [KDD 2018](https://dl.acm.org/doi/10.1145/3219819.3219895)                     | 短期注意力记忆优先 |
 | **ComiRec**    | [KDD 2020](https://arxiv.org/abs/2005.09347)                                   | 可控多兴趣推荐     |
+</details>
+<details>
 ### 多任务模型 (Multi-Task Models) - 5个
 | 模型             | 论文                                                          | 简介               |
@@ -263,6 +270,10 @@ torch-rechub/             # 根目录
 | **AITM**         | [KDD 2021](https://arxiv.org/abs/2105.08489)                  | 自适应信息迁移     |
 | **SharedBottom** | -                                                             | 经典多任务共享底层 |
+</details>
+<details>
 ### 生成式推荐 (Generative Recommendation) - 2个
 | 模型     | 论文                                          | 简介                                         |
@@ -270,6 +281,8 @@ torch-rechub/             # 根目录
 | **HSTU** | [Meta 2024](https://arxiv.org/abs/2402.17152) | 层级序列转换单元，支撑 Meta 万亿参数推荐系统 |
 | **HLLM** | [2024](https://arxiv.org/abs/2409.12740)      | 层级大语言模型推荐，融合 LLM 语义理解能力    |
+</details>
 ## 📊 支持的数据集
 框架内置了对以下常见数据集格式的支持或提供了处理脚本：

{torch_rechub-0.1.0.dist-info → torch_rechub-0.3.0.dist-info}/RECORD RENAMED Viewed

@@ -5,8 +5,8 @@ torch_rechub/basic/activation.py,sha256=hIZDCe7cAgV3bX2UnvUrkO8pQs4iXxkQGD0J4Gej
 torch_rechub/basic/callback.py,sha256=ZeiDSDQAZUKmyK1AyGJCnqEJ66vwfwlX5lOyu6-h2G0,946
 torch_rechub/basic/features.py,sha256=TLHR5EaNvIbKyKd730Qt8OlLpV0Km91nv2TMnq0HObk,3562
 torch_rechub/basic/initializers.py,sha256=V6hprXvRexcw3vrYsf8Qp-F52fp8uzPMpa1CvkHofy8,3196
-torch_rechub/basic/layers.py,sha256=sLntNogvBu0QHm7riwyuJp_FbpbmPG26XeOyLs83Yu0,38813
-torch_rechub/basic/loss_func.py,sha256=a-j1gan4eYUk5zstWwKeaPZ99eJkZPGWS82LNhT6Jbc,7756
+torch_rechub/basic/layers.py,sha256=0qNeoIzgcSfmlVoQkyjT6yEnLklcKmQG44wBypAn2rY,39148
+torch_rechub/basic/loss_func.py,sha256=P3FbJ-eXviHostvwgsBdv75QB_GXbVJC_XpQA5jL628,7983
 torch_rechub/basic/metaoptimizer.py,sha256=y-oT4MV3vXnSQ5Zd_ZEHP1KClITEi3kbZa6RKjlkYw8,3093
 torch_rechub/basic/metric.py,sha256=9JsaJJGvT6VRvsLoM2Y171CZxESsjYTofD3qnMI-bPM,8443
 torch_rechub/basic/tracking.py,sha256=7-aoyKJxyqb8GobpjRjFsgPYWsBDOV44BYOC_vMoCto,6608
@@ -24,10 +24,10 @@ torch_rechub/models/matching/dssm_facebook.py,sha256=n3MS7FT_kyJSDnVTlPCv_nPJ0MH
 torch_rechub/models/matching/dssm_senet.py,sha256=_E-xEh44XvOaBHP8XdSRkFsTvajhovxlYyCt3H9P61c,4052
 torch_rechub/models/matching/gru4rec.py,sha256=cJtYCkFyg3cPYkOy_YeXRAsTev0cBPiicrj68xJup9k,3932
 torch_rechub/models/matching/mind.py,sha256=NIUeqWhrnZeiFDMNFvXfMx1GMBMaCZnc6nxNZCJpwSE,4933
-torch_rechub/models/matching/narm.py,sha256=2dlTuan9AFrku53WJlBbTwgLlfOHsas3-JBFGxEz7oE,3167
-torch_rechub/models/matching/sasrec.py,sha256=QDfKrFl-aduWg6rY3R13RrdpMiApVugDmtEsWJulgzg,5534
+torch_rechub/models/matching/narm.py,sha256=IjUq0dVRwo4cMnQ35DIKk9PkSGxlHx8NNJMqoHpNUmk,4235
+torch_rechub/models/matching/sasrec.py,sha256=FFHXsUsaJ_tRR51W2ihuLcxXRqg7sgsqVe5CXOlC4to,7693
 torch_rechub/models/matching/sine.py,sha256=sUTUHbnewdSBd51epDIp9j-B1guKkhm6eM-KkZ3oS3Q,6746
-torch_rechub/models/matching/stamp.py,sha256=DBVM3iCoQTBKwO7oKHg5SCCDXqTuRJ4Ko1n7StgEovA,3308
+torch_rechub/models/matching/stamp.py,sha256=rbuTrh-5klXTCCWtNkVE9BczeEDPa7Yjogaz9ROa1_U,4587
 torch_rechub/models/matching/youtube_dnn.py,sha256=EQV_GoEs2Hxwg1U3Dj7-lWkEejEqGmtZ7D9CgfknQdA,3368
 torch_rechub/models/matching/youtube_sbc.py,sha256=paw9uRnbNw_-EaFpRogy7rB4vhw4KN0Qf8BfQylTj4I,4757
 torch_rechub/models/multi_task/__init__.py,sha256=5N8aJ32fzxniDm4d-AeNSi81CFWyBhjoSaK3OC-XCkY,189
@@ -56,19 +56,19 @@ torch_rechub/serving/faiss.py,sha256=kroqICeIxfZg8hPZiWZXmFtUpQSj9JLheFxorzdV3aw
 torch_rechub/serving/milvus.py,sha256=EnhD-zbtmp3KAS-lkZYFCQjXeKe7J2-LM3-iIUhLg0Y,6529
 torch_rechub/trainers/__init__.py,sha256=NSa2DqgfE1HGDyj40YgrbtUrfBHBxNBpw57XtaAB_jE,148
 torch_rechub/trainers/ctr_trainer.py,sha256=6vU2_-HCY1MBHwmT8p68rkoYFjbdFZgZ3zTyHxPIcGs,14407
-torch_rechub/trainers/match_trainer.py,sha256=oASggXTvFd-93ltvt2uhB1TFPSYP_H-EGdA8Zurw64A,16648
+torch_rechub/trainers/match_trainer.py,sha256=SAywtmQ3E4HCXyNaWhExCH_uXORp0XwtnAtKUdZSONk,20087
 torch_rechub/trainers/mtl_trainer.py,sha256=J8ztmZN-4f2ELruN2lAGLlC1quo9Y-yH9Yu30MXBqJE,18562
 torch_rechub/trainers/seq_trainer.py,sha256=48s8YfY0PN5HETm0Dj09xDKrCT9S8wqykK4q1OtMTRo,20358
 torch_rechub/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-torch_rechub/utils/data.py,sha256=TALy-nP9tqfz0DG2nMjBae5UZyBRvZIDX7zjGMnRqZ8,18542
+torch_rechub/utils/data.py,sha256=Qt_HpwiU6n4wikJizRflAS5acr33YJN-t1Ar86U8UIQ,19715
 torch_rechub/utils/hstu_utils.py,sha256=QKX2V6dmbK6kwNEETSE0oEpbHz-FbIhB4PvbQC9Lx5w,5656
-torch_rechub/utils/match.py,sha256=l9qDwJGHPP9gOQTMYoqGVdWrlhDx1F1-8UnQwDWrEyk,18143
+torch_rechub/utils/match.py,sha256=v12K4DbJcpyIrsKQw_D69w-fbRbBCO1qhJ6QuSgcUKA,20853
 torch_rechub/utils/model_utils.py,sha256=f8dx9uVCN8kfwYSJm_Mg5jZ2_gNMItPzTyccpVf_zA4,8219
 torch_rechub/utils/mtl.py,sha256=AxU05ezizCuLdbPuCg1ZXE0WAStzuxaS5Sc3nwMCBpI,5737
 torch_rechub/utils/onnx_export.py,sha256=02-UI4C0ACccP4nP5moVn6tPr4SSFaKdym0aczJs_jI,10739
 torch_rechub/utils/quantization.py,sha256=ett0VpmQz6c14-zvRuoOwctQurmQFLfF7Dj565L7iqE,4847
 torch_rechub/utils/visualization.py,sha256=cfaq3_ZYcqxb4R7V_be-RebPAqKDedAJSwjYoUm55AU,9201
-torch_rechub-0.1.0.dist-info/METADATA,sha256=r7xaaxaN7MYx2BJu96WGU72nHvOpwFE9CQmZSKBnRrk,18746
-torch_rechub-0.1.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
-torch_rechub-0.1.0.dist-info/licenses/LICENSE,sha256=V7ietiX9G_84HtgEbxDgxClniqXGm2t5q8WM4AHGTu0,1066
-torch_rechub-0.1.0.dist-info/RECORD,,
+torch_rechub-0.3.0.dist-info/METADATA,sha256=IKznFWom9Ngmr1jAHFXbT_8jnOJx16oeTxcMm5TuASw,18469
+torch_rechub-0.3.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+torch_rechub-0.3.0.dist-info/licenses/LICENSE,sha256=V7ietiX9G_84HtgEbxDgxClniqXGm2t5q8WM4AHGTu0,1066
+torch_rechub-0.3.0.dist-info/RECORD,,

{torch_rechub-0.1.0.dist-info → torch_rechub-0.3.0.dist-info}/WHEEL RENAMED Viewed

File without changes

{torch_rechub-0.1.0.dist-info → torch_rechub-0.3.0.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

torch-rechub 0.1.0__py3-none-any.whl → 0.3.0__py3-none-any.whl

torch-rechub 0.1.0py3-none-any.whl → 0.3.0py3-none-any.whl