PyPI - torch-rechub - Versions diffs - 0.0.3__py3-none-any.whl → 0.0.5__py3-none-any.whl - Mend

torch-rechub 0.0.3py3-none-any.whl → 0.0.5py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (64) hide show

torch_rechub/__init__.py +14 -0
torch_rechub/basic/activation.py +54 -54
torch_rechub/basic/callback.py +33 -33
torch_rechub/basic/features.py +87 -94
torch_rechub/basic/initializers.py +92 -92
torch_rechub/basic/layers.py +994 -720
torch_rechub/basic/loss_func.py +223 -34
torch_rechub/basic/metaoptimizer.py +76 -72
torch_rechub/basic/metric.py +251 -250
torch_rechub/models/generative/__init__.py +6 -0
torch_rechub/models/generative/hllm.py +249 -0
torch_rechub/models/generative/hstu.py +189 -0
torch_rechub/models/matching/__init__.py +13 -11
torch_rechub/models/matching/comirec.py +193 -188
torch_rechub/models/matching/dssm.py +72 -66
torch_rechub/models/matching/dssm_facebook.py +77 -79
torch_rechub/models/matching/dssm_senet.py +28 -16
torch_rechub/models/matching/gru4rec.py +85 -87
torch_rechub/models/matching/mind.py +103 -101
torch_rechub/models/matching/narm.py +82 -76
torch_rechub/models/matching/sasrec.py +143 -140
torch_rechub/models/matching/sine.py +148 -151
torch_rechub/models/matching/stamp.py +81 -83
torch_rechub/models/matching/youtube_dnn.py +75 -71
torch_rechub/models/matching/youtube_sbc.py +98 -98
torch_rechub/models/multi_task/__init__.py +7 -5
torch_rechub/models/multi_task/aitm.py +83 -84
torch_rechub/models/multi_task/esmm.py +56 -55
torch_rechub/models/multi_task/mmoe.py +58 -58
torch_rechub/models/multi_task/ple.py +116 -130
torch_rechub/models/multi_task/shared_bottom.py +45 -45
torch_rechub/models/ranking/__init__.py +14 -11
torch_rechub/models/ranking/afm.py +65 -63
torch_rechub/models/ranking/autoint.py +102 -0
torch_rechub/models/ranking/bst.py +61 -63
torch_rechub/models/ranking/dcn.py +38 -38
torch_rechub/models/ranking/dcn_v2.py +59 -69
torch_rechub/models/ranking/deepffm.py +131 -123
torch_rechub/models/ranking/deepfm.py +43 -42
torch_rechub/models/ranking/dien.py +191 -191
torch_rechub/models/ranking/din.py +93 -91
torch_rechub/models/ranking/edcn.py +101 -117
torch_rechub/models/ranking/fibinet.py +42 -50
torch_rechub/models/ranking/widedeep.py +41 -41
torch_rechub/trainers/__init__.py +4 -3
torch_rechub/trainers/ctr_trainer.py +288 -128
torch_rechub/trainers/match_trainer.py +336 -170
torch_rechub/trainers/matching.md +3 -0
torch_rechub/trainers/mtl_trainer.py +356 -207
torch_rechub/trainers/seq_trainer.py +427 -0
torch_rechub/utils/data.py +492 -360
torch_rechub/utils/hstu_utils.py +198 -0
torch_rechub/utils/match.py +457 -274
torch_rechub/utils/model_utils.py +233 -0
torch_rechub/utils/mtl.py +136 -126
torch_rechub/utils/onnx_export.py +220 -0
torch_rechub/utils/visualization.py +271 -0
torch_rechub-0.0.5.dist-info/METADATA +402 -0
torch_rechub-0.0.5.dist-info/RECORD +64 -0
{torch_rechub-0.0.3.dist-info → torch_rechub-0.0.5.dist-info}/WHEEL +1 -2
{torch_rechub-0.0.3.dist-info → torch_rechub-0.0.5.dist-info/licenses}/LICENSE +21 -21
torch_rechub-0.0.3.dist-info/METADATA +0 -177
torch_rechub-0.0.3.dist-info/RECORD +0 -55
torch_rechub-0.0.3.dist-info/top_level.txt +0 -1

torch_rechub/__init__.py CHANGED Viewed

@@ -0,0 +1,14 @@
+"""Torch-RecHub: A PyTorch Toolbox for Recommendation Models."""
+__version__ = "0.1.0"
+# 导入主要模块
+from . import basic, models, trainers, utils
+__all__ = [
+    "__version__",
+    "basic",
+    "models",
+    "trainers",
+    "utils",
+]

torch_rechub/basic/activation.py CHANGED Viewed

@@ -1,54 +1,54 @@
-import torch
-import torch.nn as nn
-class Dice(nn.Module):
-    """The Dice activation function mentioned in the `DIN paper
-    https://arxiv.org/abs/1706.06978`
-    """
-    def __init__(self, epsilon=1e-3):
-        super(Dice, self).__init__()
-        self.epsilon = epsilon
-        self.alpha = nn.Parameter(torch.randn(1))
-    def forward(self, x: torch.Tensor):
-        # x: N * num_neurons
-        avg = x.mean(dim=1)  # N
-        avg = avg.unsqueeze(dim=1)  # N * 1
-        var = torch.pow(x - avg, 2) + self.epsilon  # N * num_neurons
-        var = var.sum(dim=1).unsqueeze(dim=1)  # N * 1
-        ps = (x - avg) / torch.sqrt(var)  # N * 1
-        ps = nn.Sigmoid()(ps)  # N * 1
-        return ps * x + (1 - ps) * self.alpha * x
-def activation_layer(act_name):
-    """Construct activation layers
-    Args:
-        act_name: str or nn.Module, name of activation function
-    Returns:
-        act_layer: activation layer
-    """
-    if isinstance(act_name, str):
-        if act_name.lower() == 'sigmoid':
-            act_layer = nn.Sigmoid()
-        elif act_name.lower() == 'relu':
-            act_layer = nn.ReLU(inplace=True)
-        elif act_name.lower() == 'dice':
-            act_layer = Dice()
-        elif act_name.lower() == 'prelu':
-            act_layer = nn.PReLU()
-        elif act_name.lower() == "softmax":
-            act_layer = nn.Softmax(dim=1)
-        elif act_name.lower() == 'leakyrelu':
-            act_layer = nn.LeakyReLU()
-    elif issubclass(act_name, nn.Module):
-        act_layer = act_name()
-    else:
-        raise NotImplementedError
-    return act_layer
+import torch
+import torch.nn as nn
+class Dice(nn.Module):
+    """The Dice activation function mentioned in the `DIN paper
+    https://arxiv.org/abs/1706.06978`
+    """
+    def __init__(self, epsilon=1e-3):
+        super(Dice, self).__init__()
+        self.epsilon = epsilon
+        self.alpha = nn.Parameter(torch.randn(1))
+    def forward(self, x: torch.Tensor):
+        # x: N * num_neurons
+        avg = x.mean(dim=1)  # N
+        avg = avg.unsqueeze(dim=1)  # N * 1
+        var = torch.pow(x - avg, 2) + self.epsilon  # N * num_neurons
+        var = var.sum(dim=1).unsqueeze(dim=1)  # N * 1
+        ps = (x - avg) / torch.sqrt(var)  # N * 1
+        ps = nn.Sigmoid()(ps)  # N * 1
+        return ps * x + (1 - ps) * self.alpha * x
+def activation_layer(act_name):
+    """Construct activation layers
+    Args:
+        act_name: str or nn.Module, name of activation function
+    Returns:
+        act_layer: activation layer
+    """
+    if isinstance(act_name, str):
+        if act_name.lower() == 'sigmoid':
+            act_layer = nn.Sigmoid()
+        elif act_name.lower() == 'relu':
+            act_layer = nn.ReLU(inplace=True)
+        elif act_name.lower() == 'dice':
+            act_layer = Dice()
+        elif act_name.lower() == 'prelu':
+            act_layer = nn.PReLU()
+        elif act_name.lower() == "softmax":
+            act_layer = nn.Softmax(dim=1)
+        elif act_name.lower() == 'leakyrelu':
+            act_layer = nn.LeakyReLU()
+    elif issubclass(act_name, nn.Module):
+        act_layer = act_name()
+    else:
+        raise NotImplementedError
+    return act_layer

torch_rechub/basic/callback.py CHANGED Viewed

@@ -1,33 +1,33 @@
-import copy
-class EarlyStopper(object):
-    """Early stops the training if validation loss doesn't improve after a given patience.
-    Args:
-        patience (int): How long to wait after last time validation auc improved.
-    """
-    def __init__(self, patience):
-        self.patience = patience
-        self.trial_counter = 0
-        self.best_auc = 0
-        self.best_weights = None
-    def stop_training(self, val_auc, weights):
-        """whether to stop training.
-        Args:
-            val_auc (float): auc score in val data.
-            weights (tensor): the weights of model
-        """
-        if val_auc > self.best_auc:
-            self.best_auc = val_auc
-            self.trial_counter = 0
-            self.best_weights = copy.deepcopy(weights)
-            return False
-        elif self.trial_counter + 1 < self.patience:
-            self.trial_counter += 1
-            return False
-        else:
-            return True
+import copy
+class EarlyStopper(object):
+    """Early stops the training if validation loss doesn't improve after a given patience.
+    Args:
+        patience (int): How long to wait after last time validation auc improved.
+    """
+    def __init__(self, patience):
+        self.patience = patience
+        self.trial_counter = 0
+        self.best_auc = 0
+        self.best_weights = None
+    def stop_training(self, val_auc, weights):
+        """whether to stop training.
+        Args:
+            val_auc (float): auc score in val data.
+            weights (tensor): the weights of model
+        """
+        if val_auc > self.best_auc:
+            self.best_auc = val_auc
+            self.trial_counter = 0
+            self.best_weights = copy.deepcopy(weights)
+            return False
+        elif self.trial_counter + 1 < self.patience:
+            self.trial_counter += 1
+            return False
+        else:
+            return True

torch_rechub/basic/features.py CHANGED Viewed

@@ -1,94 +1,87 @@
-from ..utils.data import get_auto_embedding_dim
-from .initializers import RandomNormal
-class SequenceFeature(object):
-    """The Feature Class for Sequence feature or multi-hot feature.
-    In recommendation, there are many user behaviour features which we want to take the sequence model
-    and tag featurs (multi hot) which we want to pooling. Note that if you use this feature, you must padding
-    the feature value before training.
-    Args:
-        name (str): feature's name.
-        vocab_size (int): vocabulary size of embedding table.
-        embed_dim (int): embedding vector's length
-        pooling (str): pooling method, support `["mean", "sum", "concat"]` (default=`"mean"`)
-        shared_with (str): the another feature name which this feature will shared with embedding.
-        padding_idx (int, optional): If specified, the entries at padding_idx will be masked 0 in InputMask Layer.
-        initializer(Initializer): Initializer the embedding layer weight.
-    """
-    def __init__(self,
-                 name,
-                 vocab_size,
-                 embed_dim=None,
-                 pooling="mean",
-                 shared_with=None,
-                 padding_idx=None,
-                 initializer=RandomNormal(0, 0.0001)):
-        self.name = name
-        self.vocab_size = vocab_size
-        if embed_dim is None:
-            self.embed_dim = get_auto_embedding_dim(vocab_size)
-        else:
-            self.embed_dim = embed_dim
-        self.pooling = pooling
-        self.shared_with = shared_with
-        self.padding_idx = padding_idx
-        self.initializer = initializer
-    def __repr__(self):
-        return f'<SequenceFeature {self.name} with Embedding shape ({self.vocab_size}, {self.embed_dim})>'
-    def get_embedding_layer(self):
-        if not hasattr(self, 'embed'):
-            self.embed = self.initializer(self.vocab_size, self.embed_dim)
-        return self.embed
-class SparseFeature(object):
-    """The Feature Class for Sparse feature.
-    Args:
-        name (str): feature's name.
-        vocab_size (int): vocabulary size of embedding table.
-        embed_dim (int): embedding vector's length
-        shared_with (str): the another feature name which this feature will shared with embedding.
-        padding_idx (int, optional): If specified, the entries at padding_idx will be masked 0 in InputMask Layer.
-        initializer(Initializer): Initializer the embedding layer weight.
-    """
-    def __init__(self, name, vocab_size, embed_dim=None, shared_with=None, padding_idx=None, initializer=RandomNormal(0, 0.0001)):
-        self.name = name
-        self.vocab_size = vocab_size
-        if embed_dim is None:
-            self.embed_dim = get_auto_embedding_dim(vocab_size)
-        else:
-            self.embed_dim = embed_dim
-        self.shared_with = shared_with
-        self.padding_idx = padding_idx
-        self.initializer = initializer
-    def __repr__(self):
-        return f'<SparseFeature {self.name} with Embedding shape ({self.vocab_size}, {self.embed_dim})>'
-    def get_embedding_layer(self):
-        if not hasattr(self, 'embed'):
-            self.embed = self.initializer(self.vocab_size, self.embed_dim)
-        return self.embed
-class DenseFeature(object):
-    """The Feature Class for Dense feature.
-    Args:
-        name (str): feature's name.
-        embed_dim (int): embedding vector's length, the value fixed `1`. If you put a vector (torch.tensor) , replace the embed_dim with your vector dimension.
-    """
-    def __init__(self, name, embed_dim = 1):
-        self.name = name
-        self.embed_dim = embed_dim
-    def __repr__(self):
-        return f'<DenseFeature {self.name}>'
+from ..utils.data import get_auto_embedding_dim
+from .initializers import RandomNormal
+class SequenceFeature(object):
+    """The Feature Class for Sequence feature or multi-hot feature.
+    In recommendation, there are many user behaviour features which we want to take the sequence model
+    and tag featurs (multi hot) which we want to pooling. Note that if you use this feature, you must padding
+    the feature value before training.
+    Args:
+        name (str): feature's name.
+        vocab_size (int): vocabulary size of embedding table.
+        embed_dim (int): embedding vector's length
+        pooling (str): pooling method, support `["mean", "sum", "concat"]` (default=`"mean"`)
+        shared_with (str): the another feature name which this feature will shared with embedding.
+        padding_idx (int, optional): If specified, the entries at padding_idx will be masked 0 in InputMask Layer.
+        initializer(Initializer): Initializer the embedding layer weight.
+    """
+    def __init__(self, name, vocab_size, embed_dim=None, pooling="mean", shared_with=None, padding_idx=None, initializer=RandomNormal(0, 0.0001)):
+        self.name = name
+        self.vocab_size = vocab_size
+        if embed_dim is None:
+            self.embed_dim = get_auto_embedding_dim(vocab_size)
+        else:
+            self.embed_dim = embed_dim
+        self.pooling = pooling
+        self.shared_with = shared_with
+        self.padding_idx = padding_idx
+        self.initializer = initializer
+    def __repr__(self):
+        return f'<SequenceFeature {self.name} with Embedding shape ({self.vocab_size}, {self.embed_dim})>'
+    def get_embedding_layer(self):
+        if not hasattr(self, 'embed'):
+            self.embed = self.initializer(self.vocab_size, self.embed_dim)
+        return self.embed
+class SparseFeature(object):
+    """The Feature Class for Sparse feature.
+    Args:
+        name (str): feature's name.
+        vocab_size (int): vocabulary size of embedding table.
+        embed_dim (int): embedding vector's length
+        shared_with (str): the another feature name which this feature will shared with embedding.
+        padding_idx (int, optional): If specified, the entries at padding_idx will be masked 0 in InputMask Layer.
+        initializer(Initializer): Initializer the embedding layer weight.
+    """
+    def __init__(self, name, vocab_size, embed_dim=None, shared_with=None, padding_idx=None, initializer=RandomNormal(0, 0.0001)):
+        self.name = name
+        self.vocab_size = vocab_size
+        if embed_dim is None:
+            self.embed_dim = get_auto_embedding_dim(vocab_size)
+        else:
+            self.embed_dim = embed_dim
+        self.shared_with = shared_with
+        self.padding_idx = padding_idx
+        self.initializer = initializer
+    def __repr__(self):
+        return f'<SparseFeature {self.name} with Embedding shape ({self.vocab_size}, {self.embed_dim})>'
+    def get_embedding_layer(self):
+        if not hasattr(self, 'embed'):
+            self.embed = self.initializer(self.vocab_size, self.embed_dim)
+        return self.embed
+class DenseFeature(object):
+    """The Feature Class for Dense feature.
+    Args:
+        name (str): feature's name.
+        embed_dim (int): embedding vector's length, the value fixed `1`. If you put a vector (torch.tensor) , replace the embed_dim with your vector dimension.
+    """
+    def __init__(self, name, embed_dim=1):
+        self.name = name
+        self.embed_dim = embed_dim
+    def __repr__(self):
+        return f'<DenseFeature {self.name}>'

torch_rechub/basic/initializers.py CHANGED Viewed

@@ -1,92 +1,92 @@
-import torch
-class RandomNormal(object):
-    """Returns an embedding initialized with a normal distribution.
-    Args:
-        mean (float): the mean of the normal distribution
-        std (float): the standard deviation of the normal distribution
-    """
-    def __init__(self, mean=0.0, std=1.0):
-        self.mean = mean
-        self.std = std
-    def __call__(self, vocab_size, embed_dim):
-        embed = torch.nn.Embedding(vocab_size, embed_dim)
-        torch.nn.init.normal_(embed.weight, self.mean, self.std)
-        return embed
-class RandomUniform(object):
-    """Returns an embedding initialized with a uniform distribution.
-    Args:
-        minval (float): Lower bound of the range of random values of the uniform distribution.
-        maxval (float): Upper bound of the range of random values of the uniform distribution.
-    """
-    def __init__(self, minval=0.0, maxval=1.0):
-        self.minval = minval
-        self.maxval = maxval
-    def __call__(self, vocab_size, embed_dim):
-        embed = torch.nn.Embedding(vocab_size, embed_dim)
-        torch.nn.init.uniform_(embed.weight, self.minval, self.maxval)
-        return embed
-class XavierNormal(object):
-    """Returns an embedding initialized with  the method described in
-    `Understanding the difficulty of training deep feedforward neural networks`
-    - Glorot, X. & Bengio, Y. (2010), using a uniform distribution.
-    Args:
-        gain (float): stddev = gain*sqrt(2 / (fan_in + fan_out))
-    """
-    def __init__(self, gain=1.0):
-        self.gain = gain
-    def __call__(self, vocab_size, embed_dim):
-        embed = torch.nn.Embedding(vocab_size, embed_dim)
-        torch.nn.init.xavier_normal_(embed.weight, self.gain)
-        return embed
-class XavierUniform(object):
-    """Returns an embedding initialized with the method described in
-    `Understanding the difficulty of training deep feedforward neural networks`
-    - Glorot, X. & Bengio, Y. (2010), using a uniform distribution.
-    Args:
-        gain (float): stddev = gain*sqrt(6 / (fan_in + fan_out))
-    """
-    def __init__(self, gain=1.0):
-        self.gain = gain
-    def __call__(self, vocab_size, embed_dim):
-        embed = torch.nn.Embedding(vocab_size, embed_dim)
-        torch.nn.init.xavier_uniform_(embed.weight, self.gain)
-        return embed
-class Pretrained(object):
-    """Creates Embedding instance from given 2-dimensional FloatTensor.
-    Args:
-        embedding_weight(Tensor or ndarray or List[List[int]]): FloatTensor containing weights for the Embedding.
-                First dimension is being passed to Embedding as ``num_embeddings``, second as ``embedding_dim``.
-        freeze (boolean, optional): If ``True``, the tensor does not get updated in the learning process.
-    """
-    def __init__(self, embedding_weight, freeze=True):
-        self.embedding_weight = torch.FloatTensor(embedding_weight)
-        self.freeze = freeze
-    def __call__(self, vocab_size, embed_dim):
-        assert vocab_size == self.embedding_weight.shape[0] and embed_dim == self.embedding_weight.shape[1]
-        embed = torch.nn.Embedding.from_pretrained(self.embedding_weight, freeze=self.freeze)
-        return embed
+import torch
+class RandomNormal(object):
+    """Returns an embedding initialized with a normal distribution.
+    Args:
+        mean (float): the mean of the normal distribution
+        std (float): the standard deviation of the normal distribution
+    """
+    def __init__(self, mean=0.0, std=1.0):
+        self.mean = mean
+        self.std = std
+    def __call__(self, vocab_size, embed_dim):
+        embed = torch.nn.Embedding(vocab_size, embed_dim)
+        torch.nn.init.normal_(embed.weight, self.mean, self.std)
+        return embed
+class RandomUniform(object):
+    """Returns an embedding initialized with a uniform distribution.
+    Args:
+        minval (float): Lower bound of the range of random values of the uniform distribution.
+        maxval (float): Upper bound of the range of random values of the uniform distribution.
+    """
+    def __init__(self, minval=0.0, maxval=1.0):
+        self.minval = minval
+        self.maxval = maxval
+    def __call__(self, vocab_size, embed_dim):
+        embed = torch.nn.Embedding(vocab_size, embed_dim)
+        torch.nn.init.uniform_(embed.weight, self.minval, self.maxval)
+        return embed
+class XavierNormal(object):
+    """Returns an embedding initialized with  the method described in
+    `Understanding the difficulty of training deep feedforward neural networks`
+    - Glorot, X. & Bengio, Y. (2010), using a uniform distribution.
+    Args:
+        gain (float): stddev = gain*sqrt(2 / (fan_in + fan_out))
+    """
+    def __init__(self, gain=1.0):
+        self.gain = gain
+    def __call__(self, vocab_size, embed_dim):
+        embed = torch.nn.Embedding(vocab_size, embed_dim)
+        torch.nn.init.xavier_normal_(embed.weight, self.gain)
+        return embed
+class XavierUniform(object):
+    """Returns an embedding initialized with the method described in
+    `Understanding the difficulty of training deep feedforward neural networks`
+    - Glorot, X. & Bengio, Y. (2010), using a uniform distribution.
+    Args:
+        gain (float): stddev = gain*sqrt(6 / (fan_in + fan_out))
+    """
+    def __init__(self, gain=1.0):
+        self.gain = gain
+    def __call__(self, vocab_size, embed_dim):
+        embed = torch.nn.Embedding(vocab_size, embed_dim)
+        torch.nn.init.xavier_uniform_(embed.weight, self.gain)
+        return embed
+class Pretrained(object):
+    """Creates Embedding instance from given 2-dimensional FloatTensor.
+    Args:
+        embedding_weight(Tensor or ndarray or List[List[int]]): FloatTensor containing weights for the Embedding.
+                First dimension is being passed to Embedding as ``num_embeddings``, second as ``embedding_dim``.
+        freeze (boolean, optional): If ``True``, the tensor does not get updated in the learning process.
+    """
+    def __init__(self, embedding_weight, freeze=True):
+        self.embedding_weight = torch.FloatTensor(embedding_weight)
+        self.freeze = freeze
+    def __call__(self, vocab_size, embed_dim):
+        assert vocab_size == self.embedding_weight.shape[0] and embed_dim == self.embedding_weight.shape[1]
+        embed = torch.nn.Embedding.from_pretrained(self.embedding_weight, freeze=self.freeze)
+        return embed

torch-rechub 0.0.3__py3-none-any.whl → 0.0.5__py3-none-any.whl

torch-rechub 0.0.3py3-none-any.whl → 0.0.5py3-none-any.whl