PyPI - torch-rechub - Versions diffs - 0.0.3__py3-none-any.whl → 0.0.5__py3-none-any.whl - Mend

torch-rechub 0.0.3py3-none-any.whl → 0.0.5py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (64) hide show

torch_rechub/__init__.py +14 -0
torch_rechub/basic/activation.py +54 -54
torch_rechub/basic/callback.py +33 -33
torch_rechub/basic/features.py +87 -94
torch_rechub/basic/initializers.py +92 -92
torch_rechub/basic/layers.py +994 -720
torch_rechub/basic/loss_func.py +223 -34
torch_rechub/basic/metaoptimizer.py +76 -72
torch_rechub/basic/metric.py +251 -250
torch_rechub/models/generative/__init__.py +6 -0
torch_rechub/models/generative/hllm.py +249 -0
torch_rechub/models/generative/hstu.py +189 -0
torch_rechub/models/matching/__init__.py +13 -11
torch_rechub/models/matching/comirec.py +193 -188
torch_rechub/models/matching/dssm.py +72 -66
torch_rechub/models/matching/dssm_facebook.py +77 -79
torch_rechub/models/matching/dssm_senet.py +28 -16
torch_rechub/models/matching/gru4rec.py +85 -87
torch_rechub/models/matching/mind.py +103 -101
torch_rechub/models/matching/narm.py +82 -76
torch_rechub/models/matching/sasrec.py +143 -140
torch_rechub/models/matching/sine.py +148 -151
torch_rechub/models/matching/stamp.py +81 -83
torch_rechub/models/matching/youtube_dnn.py +75 -71
torch_rechub/models/matching/youtube_sbc.py +98 -98
torch_rechub/models/multi_task/__init__.py +7 -5
torch_rechub/models/multi_task/aitm.py +83 -84
torch_rechub/models/multi_task/esmm.py +56 -55
torch_rechub/models/multi_task/mmoe.py +58 -58
torch_rechub/models/multi_task/ple.py +116 -130
torch_rechub/models/multi_task/shared_bottom.py +45 -45
torch_rechub/models/ranking/__init__.py +14 -11
torch_rechub/models/ranking/afm.py +65 -63
torch_rechub/models/ranking/autoint.py +102 -0
torch_rechub/models/ranking/bst.py +61 -63
torch_rechub/models/ranking/dcn.py +38 -38
torch_rechub/models/ranking/dcn_v2.py +59 -69
torch_rechub/models/ranking/deepffm.py +131 -123
torch_rechub/models/ranking/deepfm.py +43 -42
torch_rechub/models/ranking/dien.py +191 -191
torch_rechub/models/ranking/din.py +93 -91
torch_rechub/models/ranking/edcn.py +101 -117
torch_rechub/models/ranking/fibinet.py +42 -50
torch_rechub/models/ranking/widedeep.py +41 -41
torch_rechub/trainers/__init__.py +4 -3
torch_rechub/trainers/ctr_trainer.py +288 -128
torch_rechub/trainers/match_trainer.py +336 -170
torch_rechub/trainers/matching.md +3 -0
torch_rechub/trainers/mtl_trainer.py +356 -207
torch_rechub/trainers/seq_trainer.py +427 -0
torch_rechub/utils/data.py +492 -360
torch_rechub/utils/hstu_utils.py +198 -0
torch_rechub/utils/match.py +457 -274
torch_rechub/utils/model_utils.py +233 -0
torch_rechub/utils/mtl.py +136 -126
torch_rechub/utils/onnx_export.py +220 -0
torch_rechub/utils/visualization.py +271 -0
torch_rechub-0.0.5.dist-info/METADATA +402 -0
torch_rechub-0.0.5.dist-info/RECORD +64 -0
{torch_rechub-0.0.3.dist-info → torch_rechub-0.0.5.dist-info}/WHEEL +1 -2
{torch_rechub-0.0.3.dist-info → torch_rechub-0.0.5.dist-info/licenses}/LICENSE +21 -21
torch_rechub-0.0.3.dist-info/METADATA +0 -177
torch_rechub-0.0.3.dist-info/RECORD +0 -55
torch_rechub-0.0.3.dist-info/top_level.txt +0 -1

torch_rechub/models/ranking/bst.py CHANGED Viewed

@@ -1,63 +1,61 @@
-"""
-Date: create on 26/02/2024, update on 30/04/2022
-References:
-    paper: Behavior Sequence Transformer for E-commerce Recommendation in Alibaba
-    url: https://arxiv.org/pdf/1905.06874
-    code: https://github.com/jiwidi/Behavior-Sequence-Transformer-Pytorch/blob/master/pytorch_bst.ipynb
-Authors: Tao Fan, thisisevy@foxmail.com
-"""
-import torch
-import torch.nn as nn
-from ...basic.layers import EmbeddingLayer, MLP
-class BST(nn.Module):
-    """Behavior Sequence Transformer
-    Args:
-        features (list): the list of `Feature Class`. training by MLP. It means the user profile features and context features in origin paper, exclude history and target features.
-        history_features (list): the list of `Feature Class`,training by ActivationUnit. It means the user behaviour sequence features, eg.item id sequence, shop id sequence.
-        target_features (list): the list of `Feature Class`, training by ActivationUnit. It means the target feature which will execute target-attention with history feature.
-        mlp_params (dict): the params of the last MLP module, keys include:`{"dims":list, "activation":str, "dropout":float, "output_layer":bool`}.
-        nhead (int): the number of heads in the multi-head-attention models.
-        dropout (float): the dropout value in the multi-head-attention models.
-        num_layers (Any): the number of sub-encoder-layers in the encoder.
-    """
-    def __init__(self, features, history_features, target_features, mlp_params, nhead=8, dropout=0.2, num_layers=1):
-        super().__init__()
-        self.features = features
-        self.history_features = history_features
-        self.target_features = target_features
-        self.num_history_features = len(history_features)
-        self.embed_dim = target_features[0].embed_dim
-        self.seq_len = 50
-        # TODO 在 'torch_rechub.basic.features.SequenceFeature' 中加入seq_len属性
-        self.all_dims = (len(features) + len(history_features) * (self.seq_len + len(target_features))) * self.embed_dim
-        self.embedding = EmbeddingLayer(features + history_features + target_features)
-        self.encoder_layer = nn.TransformerEncoderLayer(d_model=self.embed_dim, nhead=nhead, dropout=dropout)
-        self.transformer_layers = nn.TransformerEncoder(self.encoder_layer, num_layers=num_layers)
-        self.mlp = MLP(self.all_dims, activation="leakyrelu",
-                       **mlp_params)  # # 定义模型，模型的参数需要我们之前的feature类，用于构建模型的输入层，mlp指定模型后续DNN的结构
-    def forward(self, x):
-        embed_x_features = self.embedding(x, self.features)  # (batch_size, num_features, emb_dim)
-        embed_x_history = self.embedding(x,
-                                         self.history_features)  # (batch_size, num_history_features, seq_length, emb_dim)
-        embed_x_target = self.embedding(x, self.target_features)  # (batch_size, num_target_features, emb_dim)
-        attention_pooling = []
-        for i in range(self.num_history_features):
-            attention_seq = self.transformer_layers(
-                torch.cat([torch.squeeze(embed_x_history[:, i, :, :], 1), embed_x_target], dim=1))
-            attention_pooling.append(attention_seq)  # (batch_size, seq_length + num_target_features, emb_dim)
-        attention_pooling = torch.cat(attention_pooling,
-                                      dim=1)  # (batch_size, num_history_features * (seq_length + num_target_features), emb_dim)
-        mlp_in = torch.cat([
-            attention_pooling.flatten(start_dim=1),
-            embed_x_features.flatten(start_dim=1)
-        ],
-            dim=1)  # (batch_size, N)
-        y = self.mlp(mlp_in)
-        return torch.sigmoid(y.squeeze(1))
+"""
+Date: create on 26/02/2024, update on 30/04/2022
+References:
+    paper: Behavior Sequence Transformer for E-commerce Recommendation in Alibaba
+    url: https://arxiv.org/pdf/1905.06874
+    code: https://github.com/jiwidi/Behavior-Sequence-Transformer-Pytorch/blob/master/pytorch_bst.ipynb
+Authors: Tao Fan, thisisevy@foxmail.com
+"""
+import torch
+import torch.nn as nn
+from ...basic.layers import MLP, EmbeddingLayer
+class BST(nn.Module):
+    """Behavior Sequence Transformer
+    Args:
+        features (list): the list of `Feature Class`. training by MLP. It means the user profile features and context features in origin paper, exclude history and target features.
+        history_features (list): the list of `Feature Class`,training by ActivationUnit. It means the user behaviour sequence features, eg.item id sequence, shop id sequence.
+        target_features (list): the list of `Feature Class`, training by ActivationUnit. It means the target feature which will execute target-attention with history feature.
+        mlp_params (dict): the params of the last MLP module, keys include:`{"dims":list, "activation":str, "dropout":float, "output_layer":bool`}.
+        nhead (int): the number of heads in the multi-head-attention models.
+        dropout (float): the dropout value in the multi-head-attention models.
+        num_layers (Any): the number of sub-encoder-layers in the encoder.
+    """
+    def __init__(self, features, history_features, target_features, mlp_params, nhead=8, dropout=0.2, num_layers=1):
+        super().__init__()
+        self.features = features
+        self.history_features = history_features
+        self.target_features = target_features
+        self.num_history_features = len(history_features)
+        self.embed_dim = target_features[0].embed_dim
+        self.seq_len = 50
+        # TODO 在 'torch_rechub.basic.features.SequenceFeature' 中加入seq_len属性
+        self.all_dims = (len(features) + len(history_features) * (self.seq_len + len(target_features))) * self.embed_dim
+        self.embedding = EmbeddingLayer(features + history_features + target_features)
+        self.encoder_layer = nn.TransformerEncoderLayer(d_model=self.embed_dim, nhead=nhead, dropout=dropout)
+        self.transformer_layers = nn.TransformerEncoder(self.encoder_layer, num_layers=num_layers)
+        # # 定义模型，模型的参数需要我们之前的feature类，用于构建模型的输入层，mlp指定模型后续DNN的结构
+        self.mlp = MLP(self.all_dims, activation="leakyrelu", **mlp_params)
+    def forward(self, x):
+        # (batch_size, num_features, emb_dim)
+        embed_x_features = self.embedding(x, self.features)
+        # (batch_size, num_history_features, seq_length, emb_dim)
+        embed_x_history = self.embedding(x, self.history_features)
+        # (batch_size, num_target_features, emb_dim)
+        embed_x_target = self.embedding(x, self.target_features)
+        attention_pooling = []
+        for i in range(self.num_history_features):
+            attention_seq = self.transformer_layers(torch.cat([torch.squeeze(embed_x_history[:, i, :, :], 1), embed_x_target], dim=1))
+            # (batch_size, seq_length + num_target_features, emb_dim)
+            attention_pooling.append(attention_seq)
+        # (batch_size, num_history_features * (seq_length + num_target_features), emb_dim)
+        attention_pooling = torch.cat(attention_pooling, dim=1)
+        mlp_in = torch.cat([attention_pooling.flatten(start_dim=1), embed_x_features.flatten(start_dim=1)], dim=1)  # (batch_size, N)
+        y = self.mlp(mlp_in)
+        return torch.sigmoid(y.squeeze(1))

torch_rechub/models/ranking/dcn.py CHANGED Viewed

@@ -1,38 +1,38 @@
-"""
-Date: create on 12/05/2022
-References:
-    paper: (AKDD'2017) Deep & Cross Network for Ad Click Predictions
-    url: https://arxiv.org/abs/1708.05123
-Authors: Mincai Lai, laimincai@shanghaitech.edu.cn
-"""
-import torch
-from ...basic.layers import LR, MLP, CrossNetwork, EmbeddingLayer
-class DCN(torch.nn.Module):
-    """Deep & Cross Network
-    Args:
-        features (list[Feature Class]): training by the whole module.
-        mlp_params (dict): the params of the last MLP module, keys include:`{"dims":list, "activation":str, "dropout":float, "output_layer":bool`}
-    """
-    def __init__(self, features, n_cross_layers, mlp_params):
-        super().__init__()
-        self.features = features
-        self.dims = sum([fea.embed_dim for fea in features])
-        self.embedding = EmbeddingLayer(features)
-        self.cn = CrossNetwork(self.dims, n_cross_layers)
-        self.mlp = MLP(self.dims, output_layer=False, **mlp_params)
-        self.linear = LR(self.dims + mlp_params["dims"][-1])
-    def forward(self, x):
-        embed_x = self.embedding(x, self.features, squeeze_dim=True)
-        cn_out = self.cn(embed_x)
-        mlp_out = self.mlp(embed_x)
-        x_stack = torch.cat([cn_out, mlp_out], dim=1)
-        y = self.linear(x_stack)
-        return torch.sigmoid(y.squeeze(1))
+"""
+Date: create on 12/05/2022
+References:
+    paper: (AKDD'2017) Deep & Cross Network for Ad Click Predictions
+    url: https://arxiv.org/abs/1708.05123
+Authors: Mincai Lai, laimincai@shanghaitech.edu.cn
+"""
+import torch
+from ...basic.layers import LR, MLP, CrossNetwork, EmbeddingLayer
+class DCN(torch.nn.Module):
+    """Deep & Cross Network
+    Args:
+        features (list[Feature Class]): training by the whole module.
+        mlp_params (dict): the params of the last MLP module, keys include:`{"dims":list, "activation":str, "dropout":float, "output_layer":bool`}
+    """
+    def __init__(self, features, n_cross_layers, mlp_params):
+        super().__init__()
+        self.features = features
+        self.dims = sum([fea.embed_dim for fea in features])
+        self.embedding = EmbeddingLayer(features)
+        self.cn = CrossNetwork(self.dims, n_cross_layers)
+        self.mlp = MLP(self.dims, output_layer=False, **mlp_params)
+        self.linear = LR(self.dims + mlp_params["dims"][-1])
+    def forward(self, x):
+        embed_x = self.embedding(x, self.features, squeeze_dim=True)
+        cn_out = self.cn(embed_x)
+        mlp_out = self.mlp(embed_x)
+        x_stack = torch.cat([cn_out, mlp_out], dim=1)
+        y = self.linear(x_stack)
+        return torch.sigmoid(y.squeeze(1))

torch_rechub/models/ranking/dcn_v2.py CHANGED Viewed

@@ -1,69 +1,59 @@
-"""
-Date: create on 09/01/2022
-References:
-    paper: (WWW'21) Dcn v2: Improved deep & cross network and practical lessons for web-scale learning to rank systems
-    url: https://arxiv.org/abs/2008.13535
-Authors: lailai, lailai_zxy@tju.edu.cn
-"""
-import torch
-from ...basic.layers import LR, MLP,CrossNetV2, CrossNetMix, EmbeddingLayer
-class DCNv2(torch.nn.Module):
-    """Deep & Cross Network with a mixture of low-rank architecture
-    Args:
-        features (list[Feature Class]): training by the whole module.
-        n_cross_layers (int) : the number of layers of feature intersection layers
-        mlp_params (dict): the params of the last MLP module, keys include:`{"dims":list, "activation":str, "dropout":float, "output_layer":bool`}
-        use_low_rank_mixture (bool): True, whether to use a mixture of low-rank architecture
-        low_rank (int): the rank size of low-rank matrices
-        num_experts (int): the number of expert networks
-    """
-    def __init__(self,
-                 features,
-                 n_cross_layers,
-                 mlp_params,
-                 model_structure="parallel",
-                 use_low_rank_mixture=True,
-                 low_rank=32,
-                 num_experts=4,
-                 **kwargs):
-        super(DCNv2, self).__init__()
-        self.features = features
-        self.dims = sum([fea.embed_dim for fea in features])
-        self.embedding = EmbeddingLayer(features)
-        if use_low_rank_mixture:
-            self.crossnet = CrossNetMix(self.dims, n_cross_layers, low_rank=low_rank, num_experts=num_experts)
-        else:
-            self.crossnet = CrossNetV2(self.dims, n_cross_layers)
-        self.model_structure = model_structure
-        assert self.model_structure in ["crossnet_only", "stacked", "parallel"], \
-               "model_structure={} not supported!".format(self.model_structure)
-        if self.model_structure == "stacked":
-            self.stacked_dnn = MLP(self.dims,
-                                   output_layer=False,
-                                   ** mlp_params)
-            final_dim = mlp_params["dims"][-1]
-        if self.model_structure == "parallel":
-            self.parallel_dnn =  MLP(self.dims,
-                                     output_layer = False,
-                                   ** mlp_params)
-            final_dim = mlp_params["dims"][-1] + self.dims
-        if self.model_structure == "crossnet_only": # only CrossNet
-            final_dim = self.dims
-        self.linear = LR(final_dim)
-    def forward(self, x):
-        embed_x = self.embedding(x, self.features, squeeze_dim=True)
-        cross_out = self.crossnet(embed_x)
-        if self.model_structure == "crossnet_only":
-            final_out = cross_out
-        elif self.model_structure == "stacked":
-            final_out = self.stacked_dnn(cross_out)
-        elif self.model_structure == "parallel":
-            dnn_out = self.parallel_dnn(embed_x)
-            final_out = torch.cat([cross_out, dnn_out], dim=1)
-        y_pred = self.linear(final_out)
-        y_pred =  torch.sigmoid(y_pred.squeeze(1))
-        return y_pred
+"""
+Date: create on 09/01/2022
+References:
+    paper: (WWW'21) Dcn v2: Improved deep & cross network and practical lessons for web-scale learning to rank systems
+    url: https://arxiv.org/abs/2008.13535
+Authors: lailai, lailai_zxy@tju.edu.cn
+"""
+import torch
+from ...basic.layers import LR, MLP, CrossNetMix, CrossNetV2, EmbeddingLayer
+class DCNv2(torch.nn.Module):
+    """Deep & Cross Network with a mixture of low-rank architecture
+    Args:
+        features (list[Feature Class]): training by the whole module.
+        n_cross_layers (int) : the number of layers of feature intersection layers
+        mlp_params (dict): the params of the last MLP module, keys include:`{"dims":list, "activation":str, "dropout":float, "output_layer":bool`}
+        use_low_rank_mixture (bool): True, whether to use a mixture of low-rank architecture
+        low_rank (int): the rank size of low-rank matrices
+        num_experts (int): the number of expert networks
+    """
+    def __init__(self, features, n_cross_layers, mlp_params, model_structure="parallel", use_low_rank_mixture=True, low_rank=32, num_experts=4, **kwargs):
+        super(DCNv2, self).__init__()
+        self.features = features
+        self.dims = sum([fea.embed_dim for fea in features])
+        self.embedding = EmbeddingLayer(features)
+        if use_low_rank_mixture:
+            self.crossnet = CrossNetMix(self.dims, n_cross_layers, low_rank=low_rank, num_experts=num_experts)
+        else:
+            self.crossnet = CrossNetV2(self.dims, n_cross_layers)
+        self.model_structure = model_structure
+        assert self.model_structure in ["crossnet_only", "stacked", "parallel"], \
+            "model_structure={} not supported!".format(self.model_structure)
+        if self.model_structure == "stacked":
+            self.stacked_dnn = MLP(self.dims, output_layer=False, **mlp_params)
+            final_dim = mlp_params["dims"][-1]
+        if self.model_structure == "parallel":
+            self.parallel_dnn = MLP(self.dims, output_layer=False, **mlp_params)
+            final_dim = mlp_params["dims"][-1] + self.dims
+        if self.model_structure == "crossnet_only":  # only CrossNet
+            final_dim = self.dims
+        self.linear = LR(final_dim)
+    def forward(self, x):
+        embed_x = self.embedding(x, self.features, squeeze_dim=True)
+        cross_out = self.crossnet(embed_x)
+        if self.model_structure == "crossnet_only":
+            final_out = cross_out
+        elif self.model_structure == "stacked":
+            final_out = self.stacked_dnn(cross_out)
+        elif self.model_structure == "parallel":
+            dnn_out = self.parallel_dnn(embed_x)
+            final_out = torch.cat([cross_out, dnn_out], dim=1)
+        y_pred = self.linear(final_out)
+        y_pred = torch.sigmoid(y_pred.squeeze(1))
+        return y_pred

torch_rechub/models/ranking/deepffm.py CHANGED Viewed

@@ -1,123 +1,131 @@
-"""
-Date: created on 31/07/2022
-References:
-    paper: FAT-DeepFFM: Field Attentive Deep Field-aware Factorization Machine
-    url: https://arxiv.org/abs/1905.06336
-Authors: Bo Kang, klinux@live.com
-"""
-import torch
-import torch.nn as nn
-from ...basic.layers import CEN, EmbeddingLayer, FFM, MLP
-class DeepFFM(nn.Module):
-    """The DeepFFM model, mentioned on the `webpage
-    <https://cs.nju.edu.cn/31/60/c1654a209248/page.htm>` which is the first
-    work that introduces FFM model into neural CTR system. It is also described
-    in the `FAT-DeepFFM paper <https://arxiv.org/abs/1905.06336>`.
-    Args:
-        linear_features (list): the list of `Feature Class`, fed to the linear module.
-        cross_features (list): the list of `Feature Class`, fed to the ffm module.
-        embed_dim (int): the dimensionality of categorical value embedding.
-        mlp_params (dict): the params of the last MLP module, keys include:`{"dims":list, "activation":str, "dropout":float, "output_layer":bool`}
-    """
-    def __init__(self, linear_features, cross_features, embed_dim, mlp_params):
-        super().__init__()
-        self.linear_features = linear_features
-        self.cross_features = cross_features
-        self.num_fields = len(cross_features)
-        self.num_field_cross = self.num_fields * (self.num_fields - 1) // 2
-        self.ffm = FFM(num_fields=self.num_fields, reduce_sum=False)
-        self.mlp_out = MLP(self.num_field_cross * embed_dim, **mlp_params)
-        self.linear_embedding = EmbeddingLayer(linear_features)
-        self.ffm_embedding = EmbeddingLayer(cross_features)
-        self.b =torch.nn.Parameter(torch.zeros(1))
-        # This keeping constant value in module on correct device
-        # url: https://discuss.pytorch.org/t/keeping-constant-value-in-module-on-correct-device/10129
-        fields_offset = torch.arange(0, self.num_fields, dtype=torch.long)
-        self.register_buffer('fields_offset', fields_offset)
-    def forward(self, x):
-        # compute scores from the linear part of the model, where input is the raw features (Eq. 5, FAT-DeepFFM)
-        y_linear = self.linear_embedding(x, self.linear_features, squeeze_dim=True).sum(1, keepdim=True) #[batch_size, 1]
-        # gather the embeddings. Each feature value corresponds to multiple embeddings, with multiplicity equal to number of features/fields.
-        # output shape [batch_size, num_field, num_field, emb_dim]
-        x_ffm = {fea.name: x[fea.name].unsqueeze(1) * self.num_fields  + self.fields_offset for fea in self.cross_features}
-        input_ffm = self.ffm_embedding(x_ffm, self.cross_features, squeeze_dim=False)
-        # compute second order field-aware feature crossings, output shape [batch_size, num_field_cross, emb_dim]
-        em = self.ffm(input_ffm)
-        # compute scores from the ffm part of the model, output shape [batch_size, 1]
-        y_ffm = self.mlp_out(em.flatten(start_dim=1))
-        # compute final prediction
-        y = y_linear + y_ffm
-        return torch.sigmoid(y.squeeze(1) + self.b)
-class FatDeepFFM(nn.Module):
-    """The FAT-DeepFFM model, mentioned in the `FAT-DeepFFM paper
-    <https://arxiv.org/abs/1905.06336>`. It combines DeepFFM with
-    Compose-Excitation Network (CENet) field attention mechanism
-    to highlight the importance of second-order feature crosses.
-    Args:
-        linear_features (list): the list of `Feature Class`, fed to the linear module.
-        cross_features (list): the list of `Feature Class`, fed to the ffm module.
-        embed_dim (int): the dimensionality of categorical value embedding.
-        reduction_ratio (int): the between the dimensions of input layer and hidden layer of the CEN MLP module.
-        mlp_params (dict): the params of the last MLP module, keys include:`{"dims":list, "activation":str, "dropout":float, "output_layer":bool`}
-    """
-    def __init__(self, linear_features, cross_features, embed_dim, reduction_ratio, mlp_params):
-        super().__init__()
-        self.linear_features = linear_features
-        self.cross_features = cross_features
-        self.num_fields = len(cross_features)
-        self.num_field_cross = self.num_fields * (self.num_fields - 1) // 2
-        self.ffm = FFM(num_fields=self.num_fields, reduce_sum=False)
-        self.cen = CEN(embed_dim, self.num_field_cross, reduction_ratio)
-        self.mlp_out = MLP(self.num_field_cross * embed_dim, **mlp_params)
-        self.linear_embedding = EmbeddingLayer(linear_features)
-        self.ffm_embedding = EmbeddingLayer(cross_features)
-        self.b =torch.nn.Parameter(torch.zeros(1))
-        fields_offset = torch.arange(0, self.num_fields, dtype=torch.long)
-        self.register_buffer('fields_offset', fields_offset)
-    def forward(self, x):
-        # compute scores from the linear part of the model, where input is the raw features (Eq. 5, FAT-DeepFFM)
-        y_linear = self.linear_embedding(x, self.linear_features, squeeze_dim=True).sum(1, keepdim=True) #[batch_size, 1]
-        # gather the embeddings. Each feature value corresponds to multiple embeddings, with multiplicity is equal to the number of features/fields.
-        # output shape [batch_size, num_field, num_field, emb_dim]
-        x_ffm = {fea.name: x[fea.name].unsqueeze(1) * self.num_fields + self.fields_offset for fea in self.cross_features}
-        input_ffm = self.ffm_embedding(x_ffm, self.cross_features, squeeze_dim=False)
-        # compute second order field-aware feature crossings, output shape [batch_size, num_field_cross, emb_dim]
-        em = self.ffm(input_ffm)
-        # rescale FFM embeddings with field attention (Eq.10), output shape [batch_size, num_field_cross * emb_dim]
-        aem = self.cen(em)
-        # compute scores from the ffm part of the model, output shape [batch_size, 1]
-        y_ffm = self.mlp_out(aem)
-        # compute final prediction
-        y = y_linear + y_ffm
-        return torch.sigmoid(y.squeeze(1) + self.b)
+"""
+Date: created on 31/07/2022
+References:
+    paper: FAT-DeepFFM: Field Attentive Deep Field-aware Factorization Machine
+    url: https://arxiv.org/abs/1905.06336
+Authors: Bo Kang, klinux@live.com
+"""
+import torch
+import torch.nn as nn
+from ...basic.layers import CEN, FFM, MLP, EmbeddingLayer
+class DeepFFM(nn.Module):
+    """The DeepFFM model, mentioned on the `webpage
+    <https://cs.nju.edu.cn/31/60/c1654a209248/page.htm>` which is the first
+    work that introduces FFM model into neural CTR system. It is also described
+    in the `FAT-DeepFFM paper <https://arxiv.org/abs/1905.06336>`.
+    Args:
+        linear_features (list): the list of `Feature Class`, fed to the linear module.
+        cross_features (list): the list of `Feature Class`, fed to the ffm module.
+        embed_dim (int): the dimensionality of categorical value embedding.
+        mlp_params (dict): the params of the last MLP module, keys include:`{"dims":list, "activation":str, "dropout":float, "output_layer":bool`}
+    """
+    def __init__(self, linear_features, cross_features, embed_dim, mlp_params):
+        super().__init__()
+        self.linear_features = linear_features
+        self.cross_features = cross_features
+        self.num_fields = len(cross_features)
+        self.num_field_cross = self.num_fields * (self.num_fields - 1) // 2
+        self.ffm = FFM(num_fields=self.num_fields, reduce_sum=False)
+        self.mlp_out = MLP(self.num_field_cross * embed_dim, **mlp_params)
+        self.linear_embedding = EmbeddingLayer(linear_features)
+        self.ffm_embedding = EmbeddingLayer(cross_features)
+        self.b = torch.nn.Parameter(torch.zeros(1))
+        # This keeping constant value in module on correct device
+        # url:
+        # https://discuss.pytorch.org/t/keeping-constant-value-in-module-on-correct-device/10129
+        fields_offset = torch.arange(0, self.num_fields, dtype=torch.long)
+        self.register_buffer('fields_offset', fields_offset)
+    def forward(self, x):
+        # compute scores from the linear part of the model, where input is the
+        # raw features (Eq. 5, FAT-DeepFFM)
+        y_linear = self.linear_embedding(x, self.linear_features, squeeze_dim=True).sum(1, keepdim=True)  # [batch_size, 1]
+        # gather the embeddings. Each feature value corresponds to multiple embeddings, with multiplicity equal to number of features/fields.
+        # output shape [batch_size, num_field, num_field, emb_dim]
+        x_ffm = {fea.name: x[fea.name].unsqueeze(1) * self.num_fields + self.fields_offset for fea in self.cross_features}
+        input_ffm = self.ffm_embedding(x_ffm, self.cross_features, squeeze_dim=False)
+        # compute second order field-aware feature crossings, output shape
+        # [batch_size, num_field_cross, emb_dim]
+        em = self.ffm(input_ffm)
+        # compute scores from the ffm part of the model, output shape
+        # [batch_size, 1]
+        y_ffm = self.mlp_out(em.flatten(start_dim=1))
+        # compute final prediction
+        y = y_linear + y_ffm
+        return torch.sigmoid(y.squeeze(1) + self.b)
+class FatDeepFFM(nn.Module):
+    """The FAT-DeepFFM model, mentioned in the `FAT-DeepFFM paper
+    <https://arxiv.org/abs/1905.06336>`. It combines DeepFFM with
+    Compose-Excitation Network (CENet) field attention mechanism
+    to highlight the importance of second-order feature crosses.
+    Args:
+        linear_features (list): the list of `Feature Class`, fed to the linear module.
+        cross_features (list): the list of `Feature Class`, fed to the ffm module.
+        embed_dim (int): the dimensionality of categorical value embedding.
+        reduction_ratio (int): the between the dimensions of input layer and hidden layer of the CEN MLP module.
+        mlp_params (dict): the params of the last MLP module, keys include:`{"dims":list, "activation":str, "dropout":float, "output_layer":bool`}
+    """
+    def __init__(self, linear_features, cross_features, embed_dim, reduction_ratio, mlp_params):
+        super().__init__()
+        self.linear_features = linear_features
+        self.cross_features = cross_features
+        self.num_fields = len(cross_features)
+        self.num_field_cross = self.num_fields * (self.num_fields - 1) // 2
+        self.ffm = FFM(num_fields=self.num_fields, reduce_sum=False)
+        self.cen = CEN(embed_dim, self.num_field_cross, reduction_ratio)
+        self.mlp_out = MLP(self.num_field_cross * embed_dim, **mlp_params)
+        self.linear_embedding = EmbeddingLayer(linear_features)
+        self.ffm_embedding = EmbeddingLayer(cross_features)
+        self.b = torch.nn.Parameter(torch.zeros(1))
+        fields_offset = torch.arange(0, self.num_fields, dtype=torch.long)
+        self.register_buffer('fields_offset', fields_offset)
+    def forward(self, x):
+        # compute scores from the linear part of the model, where input is the
+        # raw features (Eq. 5, FAT-DeepFFM)
+        y_linear = self.linear_embedding(x, self.linear_features, squeeze_dim=True).sum(1, keepdim=True)  # [batch_size, 1]
+        # gather the embeddings. Each feature value corresponds to multiple embeddings, with multiplicity is equal to the number of features/fields.
+        # output shape [batch_size, num_field, num_field, emb_dim]
+        x_ffm = {fea.name: x[fea.name].unsqueeze(1) * self.num_fields + self.fields_offset for fea in self.cross_features}
+        input_ffm = self.ffm_embedding(x_ffm, self.cross_features, squeeze_dim=False)
+        # compute second order field-aware feature crossings, output shape
+        # [batch_size, num_field_cross, emb_dim]
+        em = self.ffm(input_ffm)
+        # rescale FFM embeddings with field attention (Eq.10), output shape
+        # [batch_size, num_field_cross * emb_dim]
+        aem = self.cen(em)
+        # compute scores from the ffm part of the model, output shape
+        # [batch_size, 1]
+        y_ffm = self.mlp_out(aem)
+        # compute final prediction
+        y = y_linear + y_ffm
+        return torch.sigmoid(y.squeeze(1) + self.b)

torch-rechub 0.0.3__py3-none-any.whl → 0.0.5__py3-none-any.whl

torch-rechub 0.0.3py3-none-any.whl → 0.0.5py3-none-any.whl