torch-rechub 0.0.1__py3-none-any.whl → 0.0.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. torch_rechub/__init__.py +14 -0
  2. torch_rechub/basic/activation.py +3 -1
  3. torch_rechub/basic/callback.py +2 -2
  4. torch_rechub/basic/features.py +38 -8
  5. torch_rechub/basic/initializers.py +92 -0
  6. torch_rechub/basic/layers.py +800 -46
  7. torch_rechub/basic/loss_func.py +223 -0
  8. torch_rechub/basic/metaoptimizer.py +76 -0
  9. torch_rechub/basic/metric.py +251 -0
  10. torch_rechub/models/generative/__init__.py +6 -0
  11. torch_rechub/models/generative/hllm.py +249 -0
  12. torch_rechub/models/generative/hstu.py +189 -0
  13. torch_rechub/models/matching/__init__.py +13 -0
  14. torch_rechub/models/matching/comirec.py +193 -0
  15. torch_rechub/models/matching/dssm.py +72 -0
  16. torch_rechub/models/matching/dssm_facebook.py +77 -0
  17. torch_rechub/models/matching/dssm_senet.py +87 -0
  18. torch_rechub/models/matching/gru4rec.py +85 -0
  19. torch_rechub/models/matching/mind.py +103 -0
  20. torch_rechub/models/matching/narm.py +82 -0
  21. torch_rechub/models/matching/sasrec.py +143 -0
  22. torch_rechub/models/matching/sine.py +148 -0
  23. torch_rechub/models/matching/stamp.py +81 -0
  24. torch_rechub/models/matching/youtube_dnn.py +75 -0
  25. torch_rechub/models/matching/youtube_sbc.py +98 -0
  26. torch_rechub/models/multi_task/__init__.py +5 -2
  27. torch_rechub/models/multi_task/aitm.py +83 -0
  28. torch_rechub/models/multi_task/esmm.py +19 -8
  29. torch_rechub/models/multi_task/mmoe.py +18 -12
  30. torch_rechub/models/multi_task/ple.py +41 -29
  31. torch_rechub/models/multi_task/shared_bottom.py +3 -2
  32. torch_rechub/models/ranking/__init__.py +13 -2
  33. torch_rechub/models/ranking/afm.py +65 -0
  34. torch_rechub/models/ranking/autoint.py +102 -0
  35. torch_rechub/models/ranking/bst.py +61 -0
  36. torch_rechub/models/ranking/dcn.py +38 -0
  37. torch_rechub/models/ranking/dcn_v2.py +59 -0
  38. torch_rechub/models/ranking/deepffm.py +131 -0
  39. torch_rechub/models/ranking/deepfm.py +8 -7
  40. torch_rechub/models/ranking/dien.py +191 -0
  41. torch_rechub/models/ranking/din.py +31 -19
  42. torch_rechub/models/ranking/edcn.py +101 -0
  43. torch_rechub/models/ranking/fibinet.py +42 -0
  44. torch_rechub/models/ranking/widedeep.py +6 -6
  45. torch_rechub/trainers/__init__.py +4 -2
  46. torch_rechub/trainers/ctr_trainer.py +191 -0
  47. torch_rechub/trainers/match_trainer.py +239 -0
  48. torch_rechub/trainers/matching.md +3 -0
  49. torch_rechub/trainers/mtl_trainer.py +137 -23
  50. torch_rechub/trainers/seq_trainer.py +293 -0
  51. torch_rechub/utils/__init__.py +0 -0
  52. torch_rechub/utils/data.py +492 -0
  53. torch_rechub/utils/hstu_utils.py +198 -0
  54. torch_rechub/utils/match.py +457 -0
  55. torch_rechub/utils/mtl.py +136 -0
  56. torch_rechub/utils/onnx_export.py +353 -0
  57. torch_rechub-0.0.4.dist-info/METADATA +391 -0
  58. torch_rechub-0.0.4.dist-info/RECORD +62 -0
  59. {torch_rechub-0.0.1.dist-info → torch_rechub-0.0.4.dist-info}/WHEEL +1 -2
  60. {torch_rechub-0.0.1.dist-info → torch_rechub-0.0.4.dist-info/licenses}/LICENSE +1 -1
  61. torch_rechub/basic/utils.py +0 -168
  62. torch_rechub/trainers/trainer.py +0 -111
  63. torch_rechub-0.0.1.dist-info/METADATA +0 -105
  64. torch_rechub-0.0.1.dist-info/RECORD +0 -26
  65. torch_rechub-0.0.1.dist-info/top_level.txt +0 -1
@@ -1,6 +1,6 @@
1
1
  """
2
2
  Date: create on 05/05/2022
3
- References:
3
+ References:
4
4
  paper: (RecSys'2020) Progressive Layered Extraction (PLE): A Novel Multi-Task Learning (MTL) Model for Personalized Recommendations
5
5
  url: https://dl.acm.org/doi/abs/10.1145/3383313.3412236
6
6
  Authors: Mincai Lai, laimincai@shanghaitech.edu.cn
@@ -21,11 +21,11 @@ class PLE(nn.Module):
21
21
  n_level (int): the number of CGC layer.
22
22
  n_expert_specific (int): the number of task-specific expert net.
23
23
  n_expert_shared (int): the number of task-shared expert net.
24
- expert_params (dict): the params of all the expert module, keys include:`{"dims":list, "activation":str, "dropout":float}, keep `{"output_layer":False}`.
24
+ expert_params (dict): the params of all the expert module, keys include:`{"dims":list, "activation":str, "dropout":float}.
25
25
  tower_params_list (list): the list of tower params dict, the keys same as expert_params.
26
26
  """
27
27
 
28
- def __init__(self, features, task_types, n_level, n_expert_specific, n_expert_shared, expert_params={"dims": [32, 16], "output_layer": False}, tower_params_list=[{"dims": [32, 16]}, {"dims": [32, 16]}]):
28
+ def __init__(self, features, task_types, n_level, n_expert_specific, n_expert_shared, expert_params, tower_params_list):
29
29
  super().__init__()
30
30
  self.features = features
31
31
  self.n_task = len(task_types)
@@ -34,21 +34,25 @@ class PLE(nn.Module):
34
34
  self.input_dims = sum([fea.embed_dim for fea in features])
35
35
  self.embedding = EmbeddingLayer(features)
36
36
  self.cgc_layers = nn.ModuleList(CGC(i + 1, n_level, self.n_task, n_expert_specific, n_expert_shared, self.input_dims, expert_params) for i in range(n_level))
37
- self.towers = nn.ModuleList(MLP(expert_params["dims"][-1], **tower_params_list[i]) for i in range(self.n_task))
37
+ self.towers = nn.ModuleList(MLP(expert_params["dims"][-1], output_layer=False, **tower_params_list[i]) for i in range(self.n_task))
38
38
  self.predict_layers = nn.ModuleList(PredictionLayer(task_type) for task_type in task_types)
39
39
 
40
40
  def forward(self, x):
41
- embed_x = self.embedding(x, self.features, squeeze_dim=True) #[batch_size, input_dims]
41
+ # [batch_size, input_dims]
42
+ embed_x = self.embedding(x, self.features, squeeze_dim=True)
42
43
  ple_inputs = [embed_x] * (self.n_task + 1)
43
44
  ple_outs = []
44
45
  for i in range(self.n_level):
45
- ple_outs = self.cgc_layers[i](ple_inputs) #ple_outs[i]: [batch_size, expert_dims[-1]]
46
+ # ple_outs[i]: [batch_size, expert_dims[-1]]
47
+ ple_outs = self.cgc_layers[i](ple_inputs)
46
48
  ple_inputs = ple_outs
47
- #predict
49
+
50
+
51
+ # predict
48
52
  ys = []
49
53
  for ple_out, tower, predict_layer in zip(ple_outs, self.towers, self.predict_layers):
50
- tower_out = tower(ple_out) #[batch_size, 1]
51
- y = predict_layer(tower_out) #logit -> proba
54
+ tower_out = tower(ple_out) # [batch_size, 1]
55
+ y = predict_layer(tower_out) # logit -> proba
52
56
  ys.append(y)
53
57
  return torch.cat(ys, dim=1)
54
58
 
@@ -63,10 +67,10 @@ class CGC(nn.Module):
63
67
  n_expert_specific (int): the number of task-specific expert net.
64
68
  n_expert_shared (int): the number of task-shared expert net.
65
69
  input_dims (int): the input dims of the xpert module in current CGC layer.
66
- expert_params (dict): the params of all the expert module, keys include:`{"dims":list, "activation":str, "dropout":float}, keep `{"output_layer":False}`.
70
+ expert_params (dict): the params of all the expert module, keys include:`{"dims":list, "activation":str, "dropout":float}.
67
71
  """
68
72
 
69
- def __init__(self, cur_level, n_level, n_task, n_expert_specific, n_expert_shared, input_dims, expert_params={"dims": [32, 16], "output_layer": False}):
73
+ def __init__(self, cur_level, n_level, n_task, n_expert_specific, n_expert_shared, input_dims, expert_params):
70
74
  super().__init__()
71
75
  self.cur_level = cur_level # the CGC level of PLE
72
76
  self.n_level = n_level
@@ -74,31 +78,39 @@ class CGC(nn.Module):
74
78
  self.n_expert_specific = n_expert_specific
75
79
  self.n_expert_shared = n_expert_shared
76
80
  self.n_expert_all = n_expert_specific * self.n_task + n_expert_shared
77
- input_dims = input_dims if cur_level == 1 else expert_params["dims"][-1] #the first layer expert dim is the input data dim other expert dim
78
- self.experts_specific = nn.ModuleList(MLP(input_dims, **expert_params) for _ in range(self.n_task * self.n_expert_specific))
79
- self.experts_shared = nn.ModuleList(MLP(input_dims, **expert_params) for _ in range(self.n_expert_shared))
80
- self.gates_specific = nn.ModuleList(MLP(input_dims, **{"dims": [self.n_expert_specific + self.n_expert_shared], "activation": "softmax", "output_layer": False}) for _ in range(self.n_task)) #n_gate_specific = n_task
81
+ # the first layer expert dim is the input data dim other expert dim
82
+ input_dims = input_dims if cur_level == 1 else expert_params["dims"][-1]
83
+ self.experts_specific = nn.ModuleList(MLP(input_dims, output_layer=False, **expert_params) for _ in range(self.n_task * self.n_expert_specific))
84
+ self.experts_shared = nn.ModuleList(MLP(input_dims, output_layer=False, **expert_params) for _ in range(self.n_expert_shared))
85
+ self.gates_specific = nn.ModuleList(MLP(input_dims, **{"dims": [self.n_expert_specific + self.n_expert_shared], "activation": "softmax", "output_layer": False}) for _ in range(self.n_task)) # n_gate_specific = n_task
81
86
  if cur_level < n_level:
82
- self.gate_shared = MLP(input_dims, **{"dims": [self.n_expert_all], "activation": "softmax", "output_layer": False}) #n_gate_specific = n_task
87
+ self.gate_shared = MLP(input_dims, **{"dims": [self.n_expert_all], "activation": "softmax", "output_layer": False}) # n_gate_specific = n_task
83
88
 
84
89
  def forward(self, x_list):
85
- expert_specific_outs = [] #expert_out[i]: [batch_size, 1, expert_dims[-1]]
90
+ expert_specific_outs = [] # expert_out[i]: [batch_size, 1, expert_dims[-1]]
86
91
  for i in range(self.n_task):
87
92
  expert_specific_outs.extend([expert(x_list[i]).unsqueeze(1) for expert in self.experts_specific[i * self.n_expert_specific:(i + 1) * self.n_expert_specific]])
88
- expert_shared_outs = [expert(x_list[-1]).unsqueeze(1) for expert in self.experts_shared] #x_list[-1]: the input for shared experts
89
- gate_specific_outs = [gate(x_list[i]).unsqueeze(-1) for i, gate in enumerate(self.gates_specific)] #gate_out[i]: [batch_size, n_expert_specific+n_expert_shared, 1]
93
+ # x_list[-1]: the input for shared experts
94
+ expert_shared_outs = [expert(x_list[-1]).unsqueeze(1) for expert in self.experts_shared]
95
+ # gate_out[i]: [batch_size, n_expert_specific+n_expert_shared, 1]
96
+ gate_specific_outs = [gate(x_list[i]).unsqueeze(-1) for i, gate in enumerate(self.gates_specific)]
90
97
  cgc_outs = []
91
98
  for i, gate_out in enumerate(gate_specific_outs):
92
99
  cur_expert_list = expert_specific_outs[i * self.n_expert_specific:(i + 1) * self.n_expert_specific] + expert_shared_outs
93
- expert_concat = torch.cat(cur_expert_list, dim=1) #[batch_size, n_expert_specific+n_expert_shared, expert_dims[-1]]
94
- expert_weight = torch.mul(gate_out, expert_concat) #[batch_size, n_expert_specific+n_expert_shared, expert_dims[-1]]
95
- expert_pooling = torch.sum(expert_weight, dim=1) #[batch_size, expert_dims[-1]]
96
- cgc_outs.append(expert_pooling) #length: n_task
97
- if self.cur_level < self.n_level: #not the last layer
98
- gate_shared_out = self.gate_shared(x_list[-1]).unsqueeze(-1) #[batch_size, n_expert_all, 1]
99
- expert_concat = torch.cat(expert_specific_outs + expert_shared_outs, dim=1) #[batch_size, n_expert_all, expert_dims[-1]]
100
- expert_weight = torch.mul(gate_shared_out, expert_concat) #[batch_size, n_expert_all, expert_dims[-1]]
101
- expert_pooling = torch.sum(expert_weight, dim=1) #[batch_size, expert_dims[-1]]
102
- cgc_outs.append(expert_pooling) #length: n_task+1
100
+ # [batch_size, n_expert_specific+n_expert_shared, expert_dims[-1]]
101
+ expert_concat = torch.cat(cur_expert_list, dim=1)
102
+ # [batch_size, n_expert_specific+n_expert_shared, expert_dims[-1]]
103
+ expert_weight = torch.mul(gate_out, expert_concat)
104
+ # [batch_size, expert_dims[-1]]
105
+ expert_pooling = torch.sum(expert_weight, dim=1)
106
+ cgc_outs.append(expert_pooling) # length: n_task
107
+ if self.cur_level < self.n_level: # not the last layer
108
+ gate_shared_out = self.gate_shared(x_list[-1]).unsqueeze(-1) # [batch_size, n_expert_all, 1]
109
+ expert_concat = torch.cat(expert_specific_outs + expert_shared_outs, dim=1) # [batch_size, n_expert_all, expert_dims[-1]]
110
+ # [batch_size, n_expert_all, expert_dims[-1]]
111
+ expert_weight = torch.mul(gate_shared_out, expert_concat)
112
+ # [batch_size, expert_dims[-1]]
113
+ expert_pooling = torch.sum(expert_weight, dim=1)
114
+ cgc_outs.append(expert_pooling) # length: n_task+1
103
115
 
104
116
  return cgc_outs
@@ -1,6 +1,6 @@
1
1
  """
2
2
  Date: create on 04/05/2022
3
- Reference:
3
+ Reference:
4
4
  paper: Caruana, R. (1997). Multitask learning. Machine learning, 28(1), 41-75.
5
5
  Authors: Mincai Lai, laimincai@shanghaitech.edu.cn
6
6
  """
@@ -39,6 +39,7 @@ class SharedBottom(nn.Module):
39
39
  ys = []
40
40
  for tower, predict_layer in zip(self.towers, self.predict_layers):
41
41
  tower_out = tower(x)
42
- y = predict_layer(tower_out) #regression->keep, binary classification->sigmoid
42
+ # regression->keep, binary classification->sigmoid
43
+ y = predict_layer(tower_out)
43
44
  ys.append(y)
44
45
  return torch.cat(ys, dim=1)
@@ -1,3 +1,14 @@
1
- from .widedeep import WideDeep
1
+ __all__ = ['WideDeep', 'DeepFM', 'DCN', 'DCNv2', 'EDCN', 'AFM', 'FiBiNet', 'DeepFFM', 'BST', 'DIN', 'DIEN', 'FatDeepFFM', 'AutoInt']
2
+
3
+ from .afm import AFM
4
+ from .autoint import AutoInt
5
+ from .bst import BST
6
+ from .dcn import DCN
7
+ from .dcn_v2 import DCNv2
8
+ from .deepffm import DeepFFM, FatDeepFFM
2
9
  from .deepfm import DeepFM
3
- from .din import DIN
10
+ from .dien import DIEN
11
+ from .din import DIN
12
+ from .edcn import EDCN
13
+ from .fibinet import FiBiNet
14
+ from .widedeep import WideDeep
@@ -0,0 +1,65 @@
1
+ """
2
+ Date: create on 23/04/2024
3
+ References:
4
+ paper: (IJCAI'2017) Attentional Factorization Machines:Learning the Weight of Feature Interactions via Attention Networks
5
+ url: https://arxiv.org/abs/1708.04617
6
+ Authors: Tao Fan, thisisevy@foxmail.com
7
+ """
8
+
9
+ import torch
10
+ from torch import nn
11
+ from torch.nn import Parameter, init
12
+
13
+ from ...basic.layers import FM, LR, MLP, EmbeddingLayer
14
+
15
+
16
+ class AFM(nn.Module):
17
+ """Attentional Factorization Machine Model
18
+
19
+ Args:
20
+ fm_features (list): the list of `Feature Class`, training by the fm part module.
21
+ embed_dim (int): the dimension of input embedding.
22
+ t (int): the size of the hidden layer in the attention network.
23
+ """
24
+
25
+ def __init__(self, fm_features, embed_dim, t=64):
26
+ super(AFM, self).__init__()
27
+ self.fm_features = fm_features
28
+ self.embed_dim = embed_dim
29
+ self.fm_dims = sum([fea.embed_dim for fea in fm_features])
30
+ self.linear = LR(self.fm_dims) # 1-odrder interaction
31
+ self.fm = FM(reduce_sum=False) # 2-odrder interaction
32
+ self.embedding = EmbeddingLayer(fm_features)
33
+
34
+ # 注意力计算中的线性层
35
+ self.attention_liner = nn.Linear(self.embed_dim, t)
36
+ # AFM公式中的h
37
+ self.h = init.xavier_uniform_(Parameter(torch.empty(t, 1)))
38
+ # AFM公式中的p
39
+ self.p = init.xavier_uniform_(Parameter(torch.empty(self.embed_dim, 1)))
40
+
41
+ def attention(self, y_fm):
42
+ # embs: [ batch_size, k ]
43
+ # [ batch_size, t ]
44
+ y_fm = self.attention_liner(y_fm)
45
+ # [ batch_size, t ]
46
+ y_fm = torch.relu(y_fm)
47
+ # [ batch_size, 1 ]
48
+ y_fm = torch.matmul(y_fm, self.h)
49
+ # [ batch_size, 1 ]
50
+ atts = torch.softmax(y_fm, dim=1)
51
+ return atts
52
+
53
+ def forward(self, x):
54
+ # [batch_size, num_fields, embed_dim]
55
+ input_fm = self.embedding(x, self.fm_features, squeeze_dim=False)
56
+
57
+ y_linear = self.linear(input_fm.flatten(start_dim=1))
58
+ y_fm = self.fm(input_fm)
59
+ # 得到注意力
60
+ atts = self.attention(y_fm)
61
+ # [ batch_size, 1 ]
62
+ outs = torch.matmul(atts * y_fm, self.p)
63
+ # print(y_linear.size(), outs.size())
64
+ y = y_linear + outs
65
+ return torch.sigmoid(y.squeeze(1))
@@ -0,0 +1,102 @@
1
+ """
2
+ Date: create on 14/11/2025
3
+ References:
4
+ paper: (CIKM'2019) AutoInt: Automatic Feature Interaction Learning via Self-Attentive Neural Networks
5
+ url: https://arxiv.org/abs/1810.11921
6
+ Authors: Yang Zhou, zyaztec@gmail.com
7
+ """
8
+
9
+ import torch
10
+ import torch.nn as nn
11
+
12
+ from ...basic.layers import LR, MLP, EmbeddingLayer, InteractingLayer
13
+
14
+
15
+ class AutoInt(torch.nn.Module):
16
+ """AutoInt Model
17
+
18
+ Args:
19
+ sparse_features (list): the list of `SparseFeature` Class
20
+ dense_features (list): the list of `DenseFeature` Class
21
+ num_layers (int): number of interacting layers
22
+ num_heads (int): number of attention heads
23
+ dropout (float): dropout rate for attention
24
+ mlp_params (dict): parameters for MLP, keys: {"dims":list, "activation":str,
25
+ "dropout":float, "output_layer":bool"}
26
+ """
27
+
28
+ def __init__(self, sparse_features, dense_features, num_layers=3, num_heads=2, dropout=0.0, mlp_params=None):
29
+ super(AutoInt, self).__init__()
30
+ self.sparse_features = sparse_features
31
+
32
+ self.dense_features = dense_features if dense_features is not None else []
33
+ embed_dims = [fea.embed_dim for fea in self.sparse_features]
34
+ self.embed_dim = embed_dims[0]
35
+ if len(self.sparse_features) == 0:
36
+ raise ValueError("AutoInt requires at least one sparse feature to determine embed_dim.")
37
+
38
+ # field nums = sparse + dense
39
+ self.num_sparse = len(self.sparse_features)
40
+ self.num_dense = len(self.dense_features)
41
+ self.num_fields = self.num_sparse + self.num_dense
42
+
43
+ # total dims = num_fields * embed_dim
44
+ self.dims = self.num_fields * self.embed_dim
45
+ self.num_layers = num_layers
46
+
47
+ self.sparse_embedding = EmbeddingLayer(self.sparse_features)
48
+
49
+ # dense feature embedding
50
+ self.dense_embeddings = nn.ModuleDict()
51
+ for fea in self.dense_features:
52
+ self.dense_embeddings[fea.name] = nn.Linear(1, self.embed_dim, bias=False)
53
+
54
+ self.interacting_layers = torch.nn.ModuleList([InteractingLayer(self.embed_dim, num_heads=num_heads, dropout=dropout, residual=True) for _ in range(num_layers)])
55
+
56
+ self.linear = LR(self.dims)
57
+
58
+ self.attn_linear = nn.Linear(self.dims, 1)
59
+
60
+ if mlp_params is not None:
61
+ self.use_mlp = True
62
+ self.mlp = MLP(self.dims, **mlp_params)
63
+ else:
64
+ self.use_mlp = False
65
+
66
+ def forward(self, x):
67
+ # sparse feature embedding: [B, num_sparse, embed_dim]
68
+ sparse_emb = self.sparse_embedding(x, self.sparse_features, squeeze_dim=False)
69
+
70
+ dense_emb_list = []
71
+ for fea in self.dense_features:
72
+ v = x[fea.name].float().view(-1, 1, 1)
73
+ dense_emb = self.dense_embeddings[fea.name](v) # [B, 1, embed_dim]
74
+ dense_emb_list.append(dense_emb)
75
+
76
+ if len(dense_emb_list) > 0:
77
+ dense_emb = torch.cat(dense_emb_list, dim=1) # [B, num_dense, d]
78
+ embed_x = torch.cat([sparse_emb, dense_emb], dim=1) # [B, num_fields, d]
79
+ else:
80
+ embed_x = sparse_emb # [B, num_sparse, d]
81
+
82
+ embed_x_flatten = embed_x.flatten(start_dim=1) # [B, num_fields * embed_dim]
83
+
84
+ # Multi-head self-attention layers
85
+ attn_out = embed_x
86
+ for layer in self.interacting_layers:
87
+ attn_out = layer(attn_out) # [B, num_fields, embed_dim]
88
+
89
+ # Attention linear
90
+ attn_out_flatten = attn_out.flatten(start_dim=1) # [B, num_fields * embed_dim]
91
+ y_attn = self.attn_linear(attn_out_flatten) # [B, 1]
92
+
93
+ # Linear part
94
+ y_linear = self.linear(embed_x_flatten) # [B, 1]
95
+
96
+ # Deep MLP
97
+ y = y_attn + y_linear
98
+ if self.use_mlp:
99
+ y_deep = self.mlp(embed_x_flatten) # [B, 1]
100
+ y = y + y_deep
101
+
102
+ return torch.sigmoid(y.squeeze(1))
@@ -0,0 +1,61 @@
1
+ """
2
+ Date: create on 26/02/2024, update on 30/04/2022
3
+ References:
4
+ paper: Behavior Sequence Transformer for E-commerce Recommendation in Alibaba
5
+ url: https://arxiv.org/pdf/1905.06874
6
+ code: https://github.com/jiwidi/Behavior-Sequence-Transformer-Pytorch/blob/master/pytorch_bst.ipynb
7
+ Authors: Tao Fan, thisisevy@foxmail.com
8
+ """
9
+
10
+ import torch
11
+ import torch.nn as nn
12
+
13
+ from ...basic.layers import MLP, EmbeddingLayer
14
+
15
+
16
+ class BST(nn.Module):
17
+ """Behavior Sequence Transformer
18
+ Args:
19
+ features (list): the list of `Feature Class`. training by MLP. It means the user profile features and context features in origin paper, exclude history and target features.
20
+ history_features (list): the list of `Feature Class`,training by ActivationUnit. It means the user behaviour sequence features, eg.item id sequence, shop id sequence.
21
+ target_features (list): the list of `Feature Class`, training by ActivationUnit. It means the target feature which will execute target-attention with history feature.
22
+ mlp_params (dict): the params of the last MLP module, keys include:`{"dims":list, "activation":str, "dropout":float, "output_layer":bool`}.
23
+ nhead (int): the number of heads in the multi-head-attention models.
24
+ dropout (float): the dropout value in the multi-head-attention models.
25
+ num_layers (Any): the number of sub-encoder-layers in the encoder.
26
+ """
27
+
28
+ def __init__(self, features, history_features, target_features, mlp_params, nhead=8, dropout=0.2, num_layers=1):
29
+ super().__init__()
30
+ self.features = features
31
+ self.history_features = history_features
32
+ self.target_features = target_features
33
+ self.num_history_features = len(history_features)
34
+ self.embed_dim = target_features[0].embed_dim
35
+ self.seq_len = 50
36
+ # TODO 在 'torch_rechub.basic.features.SequenceFeature' 中加入seq_len属性
37
+ self.all_dims = (len(features) + len(history_features) * (self.seq_len + len(target_features))) * self.embed_dim
38
+ self.embedding = EmbeddingLayer(features + history_features + target_features)
39
+ self.encoder_layer = nn.TransformerEncoderLayer(d_model=self.embed_dim, nhead=nhead, dropout=dropout)
40
+ self.transformer_layers = nn.TransformerEncoder(self.encoder_layer, num_layers=num_layers)
41
+ # # 定义模型,模型的参数需要我们之前的feature类,用于构建模型的输入层,mlp指定模型后续DNN的结构
42
+ self.mlp = MLP(self.all_dims, activation="leakyrelu", **mlp_params)
43
+
44
+ def forward(self, x):
45
+ # (batch_size, num_features, emb_dim)
46
+ embed_x_features = self.embedding(x, self.features)
47
+ # (batch_size, num_history_features, seq_length, emb_dim)
48
+ embed_x_history = self.embedding(x, self.history_features)
49
+ # (batch_size, num_target_features, emb_dim)
50
+ embed_x_target = self.embedding(x, self.target_features)
51
+ attention_pooling = []
52
+ for i in range(self.num_history_features):
53
+ attention_seq = self.transformer_layers(torch.cat([torch.squeeze(embed_x_history[:, i, :, :], 1), embed_x_target], dim=1))
54
+ # (batch_size, seq_length + num_target_features, emb_dim)
55
+ attention_pooling.append(attention_seq)
56
+ # (batch_size, num_history_features * (seq_length + num_target_features), emb_dim)
57
+ attention_pooling = torch.cat(attention_pooling, dim=1)
58
+
59
+ mlp_in = torch.cat([attention_pooling.flatten(start_dim=1), embed_x_features.flatten(start_dim=1)], dim=1) # (batch_size, N)
60
+ y = self.mlp(mlp_in)
61
+ return torch.sigmoid(y.squeeze(1))
@@ -0,0 +1,38 @@
1
+ """
2
+ Date: create on 12/05/2022
3
+ References:
4
+ paper: (AKDD'2017) Deep & Cross Network for Ad Click Predictions
5
+ url: https://arxiv.org/abs/1708.05123
6
+ Authors: Mincai Lai, laimincai@shanghaitech.edu.cn
7
+ """
8
+
9
+ import torch
10
+
11
+ from ...basic.layers import LR, MLP, CrossNetwork, EmbeddingLayer
12
+
13
+
14
+ class DCN(torch.nn.Module):
15
+ """Deep & Cross Network
16
+
17
+ Args:
18
+ features (list[Feature Class]): training by the whole module.
19
+ mlp_params (dict): the params of the last MLP module, keys include:`{"dims":list, "activation":str, "dropout":float, "output_layer":bool`}
20
+ """
21
+
22
+ def __init__(self, features, n_cross_layers, mlp_params):
23
+ super().__init__()
24
+ self.features = features
25
+ self.dims = sum([fea.embed_dim for fea in features])
26
+
27
+ self.embedding = EmbeddingLayer(features)
28
+ self.cn = CrossNetwork(self.dims, n_cross_layers)
29
+ self.mlp = MLP(self.dims, output_layer=False, **mlp_params)
30
+ self.linear = LR(self.dims + mlp_params["dims"][-1])
31
+
32
+ def forward(self, x):
33
+ embed_x = self.embedding(x, self.features, squeeze_dim=True)
34
+ cn_out = self.cn(embed_x)
35
+ mlp_out = self.mlp(embed_x)
36
+ x_stack = torch.cat([cn_out, mlp_out], dim=1)
37
+ y = self.linear(x_stack)
38
+ return torch.sigmoid(y.squeeze(1))
@@ -0,0 +1,59 @@
1
+ """
2
+ Date: create on 09/01/2022
3
+ References:
4
+ paper: (WWW'21) Dcn v2: Improved deep & cross network and practical lessons for web-scale learning to rank systems
5
+ url: https://arxiv.org/abs/2008.13535
6
+ Authors: lailai, lailai_zxy@tju.edu.cn
7
+ """
8
+ import torch
9
+
10
+ from ...basic.layers import LR, MLP, CrossNetMix, CrossNetV2, EmbeddingLayer
11
+
12
+
13
+ class DCNv2(torch.nn.Module):
14
+ """Deep & Cross Network with a mixture of low-rank architecture
15
+
16
+ Args:
17
+ features (list[Feature Class]): training by the whole module.
18
+ n_cross_layers (int) : the number of layers of feature intersection layers
19
+ mlp_params (dict): the params of the last MLP module, keys include:`{"dims":list, "activation":str, "dropout":float, "output_layer":bool`}
20
+ use_low_rank_mixture (bool): True, whether to use a mixture of low-rank architecture
21
+ low_rank (int): the rank size of low-rank matrices
22
+ num_experts (int): the number of expert networks
23
+ """
24
+
25
+ def __init__(self, features, n_cross_layers, mlp_params, model_structure="parallel", use_low_rank_mixture=True, low_rank=32, num_experts=4, **kwargs):
26
+ super(DCNv2, self).__init__()
27
+ self.features = features
28
+ self.dims = sum([fea.embed_dim for fea in features])
29
+ self.embedding = EmbeddingLayer(features)
30
+ if use_low_rank_mixture:
31
+ self.crossnet = CrossNetMix(self.dims, n_cross_layers, low_rank=low_rank, num_experts=num_experts)
32
+ else:
33
+ self.crossnet = CrossNetV2(self.dims, n_cross_layers)
34
+ self.model_structure = model_structure
35
+ assert self.model_structure in ["crossnet_only", "stacked", "parallel"], \
36
+ "model_structure={} not supported!".format(self.model_structure)
37
+ if self.model_structure == "stacked":
38
+ self.stacked_dnn = MLP(self.dims, output_layer=False, **mlp_params)
39
+ final_dim = mlp_params["dims"][-1]
40
+ if self.model_structure == "parallel":
41
+ self.parallel_dnn = MLP(self.dims, output_layer=False, **mlp_params)
42
+ final_dim = mlp_params["dims"][-1] + self.dims
43
+ if self.model_structure == "crossnet_only": # only CrossNet
44
+ final_dim = self.dims
45
+ self.linear = LR(final_dim)
46
+
47
+ def forward(self, x):
48
+ embed_x = self.embedding(x, self.features, squeeze_dim=True)
49
+ cross_out = self.crossnet(embed_x)
50
+ if self.model_structure == "crossnet_only":
51
+ final_out = cross_out
52
+ elif self.model_structure == "stacked":
53
+ final_out = self.stacked_dnn(cross_out)
54
+ elif self.model_structure == "parallel":
55
+ dnn_out = self.parallel_dnn(embed_x)
56
+ final_out = torch.cat([cross_out, dnn_out], dim=1)
57
+ y_pred = self.linear(final_out)
58
+ y_pred = torch.sigmoid(y_pred.squeeze(1))
59
+ return y_pred
@@ -0,0 +1,131 @@
1
+ """
2
+ Date: created on 31/07/2022
3
+ References:
4
+ paper: FAT-DeepFFM: Field Attentive Deep Field-aware Factorization Machine
5
+ url: https://arxiv.org/abs/1905.06336
6
+ Authors: Bo Kang, klinux@live.com
7
+ """
8
+
9
+ import torch
10
+ import torch.nn as nn
11
+
12
+ from ...basic.layers import CEN, FFM, MLP, EmbeddingLayer
13
+
14
+
15
+ class DeepFFM(nn.Module):
16
+ """The DeepFFM model, mentioned on the `webpage
17
+ <https://cs.nju.edu.cn/31/60/c1654a209248/page.htm>` which is the first
18
+ work that introduces FFM model into neural CTR system. It is also described
19
+ in the `FAT-DeepFFM paper <https://arxiv.org/abs/1905.06336>`.
20
+
21
+ Args:
22
+ linear_features (list): the list of `Feature Class`, fed to the linear module.
23
+ cross_features (list): the list of `Feature Class`, fed to the ffm module.
24
+ embed_dim (int): the dimensionality of categorical value embedding.
25
+ mlp_params (dict): the params of the last MLP module, keys include:`{"dims":list, "activation":str, "dropout":float, "output_layer":bool`}
26
+ """
27
+
28
+ def __init__(self, linear_features, cross_features, embed_dim, mlp_params):
29
+ super().__init__()
30
+ self.linear_features = linear_features
31
+ self.cross_features = cross_features
32
+
33
+ self.num_fields = len(cross_features)
34
+ self.num_field_cross = self.num_fields * (self.num_fields - 1) // 2
35
+
36
+ self.ffm = FFM(num_fields=self.num_fields, reduce_sum=False)
37
+ self.mlp_out = MLP(self.num_field_cross * embed_dim, **mlp_params)
38
+
39
+ self.linear_embedding = EmbeddingLayer(linear_features)
40
+ self.ffm_embedding = EmbeddingLayer(cross_features)
41
+
42
+ self.b = torch.nn.Parameter(torch.zeros(1))
43
+
44
+ # This keeping constant value in module on correct device
45
+ # url:
46
+ # https://discuss.pytorch.org/t/keeping-constant-value-in-module-on-correct-device/10129
47
+ fields_offset = torch.arange(0, self.num_fields, dtype=torch.long)
48
+ self.register_buffer('fields_offset', fields_offset)
49
+
50
+ def forward(self, x):
51
+ # compute scores from the linear part of the model, where input is the
52
+ # raw features (Eq. 5, FAT-DeepFFM)
53
+ y_linear = self.linear_embedding(x, self.linear_features, squeeze_dim=True).sum(1, keepdim=True) # [batch_size, 1]
54
+
55
+ # gather the embeddings. Each feature value corresponds to multiple embeddings, with multiplicity equal to number of features/fields.
56
+ # output shape [batch_size, num_field, num_field, emb_dim]
57
+ x_ffm = {fea.name: x[fea.name].unsqueeze(1) * self.num_fields + self.fields_offset for fea in self.cross_features}
58
+ input_ffm = self.ffm_embedding(x_ffm, self.cross_features, squeeze_dim=False)
59
+
60
+ # compute second order field-aware feature crossings, output shape
61
+ # [batch_size, num_field_cross, emb_dim]
62
+ em = self.ffm(input_ffm)
63
+
64
+ # compute scores from the ffm part of the model, output shape
65
+ # [batch_size, 1]
66
+ y_ffm = self.mlp_out(em.flatten(start_dim=1))
67
+
68
+ # compute final prediction
69
+ y = y_linear + y_ffm
70
+ return torch.sigmoid(y.squeeze(1) + self.b)
71
+
72
+
73
+ class FatDeepFFM(nn.Module):
74
+ """The FAT-DeepFFM model, mentioned in the `FAT-DeepFFM paper
75
+ <https://arxiv.org/abs/1905.06336>`. It combines DeepFFM with
76
+ Compose-Excitation Network (CENet) field attention mechanism
77
+ to highlight the importance of second-order feature crosses.
78
+
79
+ Args:
80
+ linear_features (list): the list of `Feature Class`, fed to the linear module.
81
+ cross_features (list): the list of `Feature Class`, fed to the ffm module.
82
+ embed_dim (int): the dimensionality of categorical value embedding.
83
+ reduction_ratio (int): the between the dimensions of input layer and hidden layer of the CEN MLP module.
84
+ mlp_params (dict): the params of the last MLP module, keys include:`{"dims":list, "activation":str, "dropout":float, "output_layer":bool`}
85
+ """
86
+
87
+ def __init__(self, linear_features, cross_features, embed_dim, reduction_ratio, mlp_params):
88
+ super().__init__()
89
+ self.linear_features = linear_features
90
+ self.cross_features = cross_features
91
+
92
+ self.num_fields = len(cross_features)
93
+ self.num_field_cross = self.num_fields * (self.num_fields - 1) // 2
94
+
95
+ self.ffm = FFM(num_fields=self.num_fields, reduce_sum=False)
96
+ self.cen = CEN(embed_dim, self.num_field_cross, reduction_ratio)
97
+ self.mlp_out = MLP(self.num_field_cross * embed_dim, **mlp_params)
98
+
99
+ self.linear_embedding = EmbeddingLayer(linear_features)
100
+ self.ffm_embedding = EmbeddingLayer(cross_features)
101
+
102
+ self.b = torch.nn.Parameter(torch.zeros(1))
103
+
104
+ fields_offset = torch.arange(0, self.num_fields, dtype=torch.long)
105
+ self.register_buffer('fields_offset', fields_offset)
106
+
107
+ def forward(self, x):
108
+ # compute scores from the linear part of the model, where input is the
109
+ # raw features (Eq. 5, FAT-DeepFFM)
110
+ y_linear = self.linear_embedding(x, self.linear_features, squeeze_dim=True).sum(1, keepdim=True) # [batch_size, 1]
111
+
112
+ # gather the embeddings. Each feature value corresponds to multiple embeddings, with multiplicity is equal to the number of features/fields.
113
+ # output shape [batch_size, num_field, num_field, emb_dim]
114
+ x_ffm = {fea.name: x[fea.name].unsqueeze(1) * self.num_fields + self.fields_offset for fea in self.cross_features}
115
+ input_ffm = self.ffm_embedding(x_ffm, self.cross_features, squeeze_dim=False)
116
+
117
+ # compute second order field-aware feature crossings, output shape
118
+ # [batch_size, num_field_cross, emb_dim]
119
+ em = self.ffm(input_ffm)
120
+
121
+ # rescale FFM embeddings with field attention (Eq.10), output shape
122
+ # [batch_size, num_field_cross * emb_dim]
123
+ aem = self.cen(em)
124
+
125
+ # compute scores from the ffm part of the model, output shape
126
+ # [batch_size, 1]
127
+ y_ffm = self.mlp_out(aem)
128
+
129
+ # compute final prediction
130
+ y = y_linear + y_ffm
131
+ return torch.sigmoid(y.squeeze(1) + self.b)
@@ -1,14 +1,14 @@
1
1
  """
2
2
  Date: create on 22/04/2022
3
- References:
4
- paper: (IJCAI'2017) DeepFM: A Factorization-Machine based Neural Network for CTR Prediction
3
+ References:
4
+ paper: (IJCAI'2017) DeepFM: A Factorization-Machine based Neural Network for CTR Prediction
5
5
  url: https://arxiv.org/abs/1703.04247
6
6
  Authors: Mincai Lai, laimincai@shanghaitech.edu.cn
7
7
  """
8
8
 
9
9
  import torch
10
10
 
11
- from ...basic.layers import FM, MLP, LR, EmbeddingLayer
11
+ from ...basic.layers import FM, LR, MLP, EmbeddingLayer
12
12
 
13
13
 
14
14
  class DeepFM(torch.nn.Module):
@@ -32,11 +32,12 @@ class DeepFM(torch.nn.Module):
32
32
  self.mlp = MLP(self.deep_dims, **mlp_params)
33
33
 
34
34
  def forward(self, x):
35
- input_deep = self.embedding(x, self.deep_features, squeeze_dim=True) #[batch_size, deep_dims]
36
- input_fm = self.embedding(x, self.fm_features, squeeze_dim=False) #[batch_size, num_fields, embed_dim]
35
+ input_deep = self.embedding(x, self.deep_features, squeeze_dim=True) # [batch_size, deep_dims]
36
+ # [batch_size, num_fields, embed_dim]
37
+ input_fm = self.embedding(x, self.fm_features, squeeze_dim=False)
37
38
 
38
39
  y_linear = self.linear(input_fm.flatten(start_dim=1))
39
40
  y_fm = self.fm(input_fm)
40
- y_deep = self.mlp(input_deep) #[batch_size, 1]
41
+ y_deep = self.mlp(input_deep) # [batch_size, 1]
41
42
  y = y_linear + y_fm + y_deep
42
- return torch.sigmoid(y.squeeze(1))
43
+ return torch.sigmoid(y.squeeze(1))