torch-rechub 0.0.1__py3-none-any.whl → 0.0.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. torch_rechub/__init__.py +14 -0
  2. torch_rechub/basic/activation.py +3 -1
  3. torch_rechub/basic/callback.py +2 -2
  4. torch_rechub/basic/features.py +38 -8
  5. torch_rechub/basic/initializers.py +92 -0
  6. torch_rechub/basic/layers.py +800 -46
  7. torch_rechub/basic/loss_func.py +223 -0
  8. torch_rechub/basic/metaoptimizer.py +76 -0
  9. torch_rechub/basic/metric.py +251 -0
  10. torch_rechub/models/generative/__init__.py +6 -0
  11. torch_rechub/models/generative/hllm.py +249 -0
  12. torch_rechub/models/generative/hstu.py +189 -0
  13. torch_rechub/models/matching/__init__.py +13 -0
  14. torch_rechub/models/matching/comirec.py +193 -0
  15. torch_rechub/models/matching/dssm.py +72 -0
  16. torch_rechub/models/matching/dssm_facebook.py +77 -0
  17. torch_rechub/models/matching/dssm_senet.py +87 -0
  18. torch_rechub/models/matching/gru4rec.py +85 -0
  19. torch_rechub/models/matching/mind.py +103 -0
  20. torch_rechub/models/matching/narm.py +82 -0
  21. torch_rechub/models/matching/sasrec.py +143 -0
  22. torch_rechub/models/matching/sine.py +148 -0
  23. torch_rechub/models/matching/stamp.py +81 -0
  24. torch_rechub/models/matching/youtube_dnn.py +75 -0
  25. torch_rechub/models/matching/youtube_sbc.py +98 -0
  26. torch_rechub/models/multi_task/__init__.py +5 -2
  27. torch_rechub/models/multi_task/aitm.py +83 -0
  28. torch_rechub/models/multi_task/esmm.py +19 -8
  29. torch_rechub/models/multi_task/mmoe.py +18 -12
  30. torch_rechub/models/multi_task/ple.py +41 -29
  31. torch_rechub/models/multi_task/shared_bottom.py +3 -2
  32. torch_rechub/models/ranking/__init__.py +13 -2
  33. torch_rechub/models/ranking/afm.py +65 -0
  34. torch_rechub/models/ranking/autoint.py +102 -0
  35. torch_rechub/models/ranking/bst.py +61 -0
  36. torch_rechub/models/ranking/dcn.py +38 -0
  37. torch_rechub/models/ranking/dcn_v2.py +59 -0
  38. torch_rechub/models/ranking/deepffm.py +131 -0
  39. torch_rechub/models/ranking/deepfm.py +8 -7
  40. torch_rechub/models/ranking/dien.py +191 -0
  41. torch_rechub/models/ranking/din.py +31 -19
  42. torch_rechub/models/ranking/edcn.py +101 -0
  43. torch_rechub/models/ranking/fibinet.py +42 -0
  44. torch_rechub/models/ranking/widedeep.py +6 -6
  45. torch_rechub/trainers/__init__.py +4 -2
  46. torch_rechub/trainers/ctr_trainer.py +191 -0
  47. torch_rechub/trainers/match_trainer.py +239 -0
  48. torch_rechub/trainers/matching.md +3 -0
  49. torch_rechub/trainers/mtl_trainer.py +137 -23
  50. torch_rechub/trainers/seq_trainer.py +293 -0
  51. torch_rechub/utils/__init__.py +0 -0
  52. torch_rechub/utils/data.py +492 -0
  53. torch_rechub/utils/hstu_utils.py +198 -0
  54. torch_rechub/utils/match.py +457 -0
  55. torch_rechub/utils/mtl.py +136 -0
  56. torch_rechub/utils/onnx_export.py +353 -0
  57. torch_rechub-0.0.4.dist-info/METADATA +391 -0
  58. torch_rechub-0.0.4.dist-info/RECORD +62 -0
  59. {torch_rechub-0.0.1.dist-info → torch_rechub-0.0.4.dist-info}/WHEEL +1 -2
  60. {torch_rechub-0.0.1.dist-info → torch_rechub-0.0.4.dist-info/licenses}/LICENSE +1 -1
  61. torch_rechub/basic/utils.py +0 -168
  62. torch_rechub/trainers/trainer.py +0 -111
  63. torch_rechub-0.0.1.dist-info/METADATA +0 -105
  64. torch_rechub-0.0.1.dist-info/RECORD +0 -26
  65. torch_rechub-0.0.1.dist-info/top_level.txt +0 -1
@@ -0,0 +1,191 @@
1
+ """
2
+ Date: create on 01/05/2024
3
+ References:
4
+ paper: (AAAI'2019) Deep Interest Evolution Network for Click-Through Rate Prediction
5
+ url: https://arxiv.org/pdf/1809.03672
6
+ Authors: Tao Fan, thisisevy@foxmail.com
7
+ """
8
+
9
+ import torch
10
+ from torch import nn
11
+ from torch.nn import Parameter, init
12
+
13
+ from ...basic.layers import MLP, EmbeddingLayer
14
+
15
+
16
+ class AUGRU(nn.Module):
17
+
18
+ def __init__(self, embed_dim):
19
+ super(AUGRU, self).__init__()
20
+ self.embed_dim = embed_dim
21
+ # 初始化AUGRU单元
22
+ self.augru_cell = AUGRU_Cell(self.embed_dim)
23
+
24
+ def forward(self, x, item):
25
+ '''
26
+ :param x: 输入的序列向量,维度为 [ batch_size, seq_lens, embed_dim ]
27
+ :param item: 目标物品的向量
28
+ :return: outs: 所有AUGRU单元输出的隐藏向量[ batch_size, seq_lens, embed_dim ]
29
+ h: 最后一个AUGRU单元输出的隐藏向量[ batch_size, embed_dim ]
30
+ '''
31
+ outs = []
32
+ h = None
33
+ # 开始循环,x.shape[1]是序列的长度
34
+ for i in range(x.shape[1]):
35
+ if h is None:
36
+ # 初始化第一层的输入h
37
+ h = Parameter(torch.rand(x.shape[0], self.embed_dim).to(x.device))
38
+ h = self.augru_cell(x[:, i], h, item)
39
+ outs.append(torch.unsqueeze(h, dim=1))
40
+ outs = torch.cat(outs, dim=1)
41
+ return outs, h
42
+
43
+
44
+ # AUGRU单元
45
+ class AUGRU_Cell(nn.Module):
46
+
47
+ def __init__(self, embed_dim):
48
+ """
49
+ :param embed_dim: 输入向量的维度
50
+ """
51
+ super(AUGRU_Cell, self).__init__()
52
+
53
+ # 初始化更新门的模型参数
54
+ self.Wu = Parameter(torch.rand(embed_dim, embed_dim))
55
+ self.Uu = Parameter(torch.rand(embed_dim, embed_dim))
56
+ self.bu = init.xavier_uniform_(Parameter(torch.empty(1, embed_dim)))
57
+
58
+ # 初始化重置门的模型参数
59
+ self.Wr = init.xavier_uniform_(Parameter(torch.empty(embed_dim, embed_dim)))
60
+ self.Ur = init.xavier_uniform_(Parameter(torch.empty(embed_dim, embed_dim)))
61
+ self.br = init.xavier_uniform_(Parameter(torch.empty(1, embed_dim)))
62
+
63
+ # 初始化计算h~的模型参数
64
+ self.Wh = init.xavier_uniform_(Parameter(torch.empty(embed_dim, embed_dim)))
65
+ self.Uh = init.xavier_uniform_(Parameter(torch.empty(embed_dim, embed_dim)))
66
+ self.bh = init.xavier_uniform_(Parameter(torch.empty(1, embed_dim)))
67
+
68
+ # 初始化注意计算里的模型参数
69
+ self.Wa = init.xavier_uniform_(Parameter(torch.empty(embed_dim, embed_dim)))
70
+
71
+
72
+ # 注意力的计算
73
+
74
+ def attention(self, x, item):
75
+ '''
76
+ :param x: 输入的序列中第t个向量 [ batch_size, embed_dim ]
77
+ :param item: 目标物品的向量 [ batch_size, embed_dim ]
78
+ :return: 注意力权重 [ batch_size, 1 ]
79
+ '''
80
+ hW = torch.matmul(x, self.Wa)
81
+ hWi = torch.sum(hW * item, dim=1)
82
+ hWi = torch.unsqueeze(hWi, 1)
83
+ return torch.softmax(hWi, dim=1)
84
+
85
+ def forward(self, x, h_1, item):
86
+ '''
87
+ :param x: 输入的序列中第t个物品向量 [ batch_size, embed_dim ]
88
+ :param h_1: 上一个AUGRU单元输出的隐藏向量 [ batch_size, embed_dim ]
89
+ :param item: 目标物品的向量 [ batch_size, embed_dim ]
90
+ :return: h 当前层输出的隐藏向量 [ batch_size, embed_dim ]
91
+ '''
92
+ # [ batch_size, embed_dim ]
93
+ u = torch.sigmoid(torch.matmul(x, self.Wu) + torch.matmul(h_1, self.Uu) + self.bu)
94
+ # [ batch_size, embed_dim ]
95
+ r = torch.sigmoid(torch.matmul(x, self.Wr) + torch.matmul(h_1, self.Ur) + self.br)
96
+ # [ batch_size, embed_dim ]
97
+ h_hat = torch.tanh(torch.matmul(x, self.Wh) + r * torch.matmul(h_1, self.Uh) + self.bh)
98
+ # [ batch_size, 1 ]
99
+ a = self.attention(x, item)
100
+ # [ batch_size, embed_dim ]
101
+ u_hat = a * u
102
+ # [ batch_size, embed_dim ]
103
+ h = (1 - u_hat) * h_1 + u_hat * h_hat
104
+ # [ batch_size, embed_dim ]
105
+ return h
106
+
107
+
108
+ class DIEN(nn.Module):
109
+ """Deep Interest Evolution Network
110
+ Args:
111
+ features (list): the list of `Feature Class`. training by MLP. It means the user profile features and context features in origin paper, exclude history and target features.
112
+ history_features (list): the list of `Feature Class`,training by ActivationUnit. It means the user behaviour sequence features, eg.item id sequence, shop id sequence.
113
+ target_features (list): the list of `Feature Class`, training by ActivationUnit. It means the target feature which will execute target-attention with history feature.
114
+ mlp_params (dict): the params of the last MLP module, keys include:`{"dims":list, "activation":str, "dropout":float, "output_layer":bool`}
115
+ history_labels (list): the list of history_features whether it is clicked history or not. It should be 0 or 1.
116
+ alpha (float): the weighting of auxiliary loss.
117
+ """
118
+
119
+ def __init__(self, features, history_features, target_features, mlp_params, history_labels, alpha=0.2):
120
+ super().__init__()
121
+ self.alpha = alpha # 计算辅助损失函数时的权重
122
+ self.features = features
123
+ self.history_features = history_features
124
+ self.target_features = target_features
125
+ self.num_history_features = len(history_features)
126
+ self.all_dims = sum([fea.embed_dim for fea in features + history_features + target_features])
127
+ # self.GRU = nn.GRU(batch_first=True)
128
+ self.embedding = EmbeddingLayer(features + history_features + target_features)
129
+ self.interest_extractor_layers = nn.ModuleList([nn.GRU(fea.embed_dim, fea.embed_dim, batch_first=True) for fea in self.history_features])
130
+ self.interest_evolving_layers = nn.ModuleList([AUGRU(fea.embed_dim) for fea in self.history_features])
131
+
132
+ self.mlp = MLP(self.all_dims, activation="dice", **mlp_params)
133
+ self.history_labels = torch.Tensor(history_labels)
134
+ self.BCELoss = nn.BCELoss()
135
+
136
+
137
+ # # 注意力计算中的线性层
138
+ # self.attention_liner = nn.Linear(self.embed_dim, t)
139
+ # # AFM公式中的h
140
+ # self.h = init.xavier_uniform_(Parameter(torch.empty(t, 1)))
141
+ # # AFM公式中的p
142
+ # self.p = init.xavier_uniform_(Parameter(torch.empty(self.embed_dim, 1)))
143
+
144
+ def auxiliary(self, outs, history_features, history_labels):
145
+ '''
146
+ :param history_features: 历史序列物品的向量 [ batch_size, len_seqs, dim ]
147
+ :param outs: 兴趣抽取层GRU网络输出的outs [ batch_size, len_seqs, dim ]
148
+ :param history_labels: 历史序列物品标注 [ batch_size, len_seqs, 1 ]
149
+ :return: 辅助损失函数
150
+ '''
151
+ # [ batch_size * len_seqs, dim ]
152
+ history_features = history_features.reshape(-1, history_features.shape[2])
153
+ # [ batch_size * len_seqs, dim ]
154
+ outs = outs.reshape(-1, outs.shape[2])
155
+ # [ batch_size * len_seqs ]
156
+ out = torch.sum(outs * history_features, dim=1)
157
+ # [ batch_size * len_seqs, 1 ]
158
+ out = torch.unsqueeze(torch.sigmoid(out), 1)
159
+ # [ batch_size * len_seqs,1 ]
160
+ history_labels = history_labels.reshape(-1, 1).float()
161
+ return self.BCELoss(out, history_labels)
162
+
163
+ def forward(self, x):
164
+ # (batch_size, num_features, emb_dim)
165
+ embed_x_features = self.embedding(x, self.features)
166
+ # (batch_size, num_history_features, seq_length, emb_dim)
167
+ embed_x_history = self.embedding(x, self.history_features)
168
+ # (batch_size, num_target_features, emb_dim)
169
+ embed_x_target = self.embedding(x, self.target_features)
170
+
171
+ interest_extractor = []
172
+ auxi_loss = 0
173
+ for i in range(self.num_history_features):
174
+ outs, _ = self.interest_extractor_layers[i](embed_x_history[:, i, :, :])
175
+ # 利用GRU输出的outs得到辅助损失函数
176
+ auxi_loss += self.auxiliary(outs, embed_x_history[:, i, :, :], self.history_labels)
177
+ # (batch_size, 1, seq_length, emb_dim)
178
+ interest_extractor.append(outs.unsqueeze(1))
179
+ # (batch_size, num_history_features, seq_length, emb_dim)
180
+ interest_extractor = torch.cat(interest_extractor, dim=1)
181
+ interest_evolving = []
182
+ for i in range(self.num_history_features):
183
+ _, h = self.interest_evolving_layers[i](interest_extractor[:, i, :, :], embed_x_target[:, i, :])
184
+ interest_evolving.append(h.unsqueeze(1)) # (batch_size, 1, emb_dim)
185
+ # (batch_size, num_history_features, emb_dim)
186
+ interest_evolving = torch.cat(interest_evolving, dim=1)
187
+
188
+ mlp_in = torch.cat([interest_evolving.flatten(start_dim=1), embed_x_target.flatten(start_dim=1), embed_x_features.flatten(start_dim=1)], dim=1) # (batch_size, N)
189
+ y = self.mlp(mlp_in)
190
+
191
+ return torch.sigmoid(y.squeeze(1)), self.alpha * auxi_loss
@@ -1,7 +1,7 @@
1
1
  """
2
2
  Date: create on 23/04/2022, update on 30/04/2022
3
- References:
4
- paper: (KDD'2018) Deep Interest Network for Click-Through Rate Prediction
3
+ References:
4
+ paper: (KDD'2018) Deep Interest Network for Click-Through Rate Prediction
5
5
  url: https://arxiv.org/abs/1706.06978
6
6
  code: https://github.com/huawei-noah/benchmark/blob/main/FuxiCTR/fuxictr/pytorch/models/DIN.py
7
7
  Authors: Mincai Lai, laimincai@shanghaitech.edu.cn
@@ -10,7 +10,7 @@ Authors: Mincai Lai, laimincai@shanghaitech.edu.cn
10
10
  import torch
11
11
  import torch.nn as nn
12
12
 
13
- from ...basic.layers import EmbeddingLayer, MLP
13
+ from ...basic.layers import MLP, EmbeddingLayer
14
14
 
15
15
 
16
16
  class DIN(nn.Module):
@@ -23,7 +23,7 @@ class DIN(nn.Module):
23
23
  attention_mlp_params (dict): the params of the ActivationUnit module, keys include:`{"dims":list, "activation":str, "dropout":float, "use_softmax":bool`}
24
24
  """
25
25
 
26
- def __init__(self, features, history_features, target_features, mlp_params={"dims": [200, 80], "activation": "dice"}, attention_mlp_params={"dims": [36], "activation": "dice"}):
26
+ def __init__(self, features, history_features, target_features, mlp_params, attention_mlp_params):
27
27
  super().__init__()
28
28
  self.features = features
29
29
  self.history_features = history_features
@@ -33,19 +33,23 @@ class DIN(nn.Module):
33
33
 
34
34
  self.embedding = EmbeddingLayer(features + history_features + target_features)
35
35
  self.attention_layers = nn.ModuleList([ActivationUnit(fea.embed_dim, **attention_mlp_params) for fea in self.history_features])
36
- self.mlp = MLP(self.all_dims, **mlp_params)
36
+ self.mlp = MLP(self.all_dims, activation="dice", **mlp_params)
37
37
 
38
38
  def forward(self, x):
39
- embed_x_features = self.embedding(x, self.features) #(batch_size, num_features, emb_dim)
40
- embed_x_history = self.embedding(x, self.history_features) #(batch_size, num_history_features, seq_length, emb_dim)
41
- embed_x_target = self.embedding(x, self.target_features) #(batch_size, num_target_features, emb_dim)
39
+ # (batch_size, num_features, emb_dim)
40
+ embed_x_features = self.embedding(x, self.features)
41
+ # (batch_size, num_history_features, seq_length, emb_dim)
42
+ embed_x_history = self.embedding(x, self.history_features)
43
+ # (batch_size, num_target_features, emb_dim)
44
+ embed_x_target = self.embedding(x, self.target_features)
42
45
  attention_pooling = []
43
46
  for i in range(self.num_history_features):
44
47
  attention_seq = self.attention_layers[i](embed_x_history[:, i, :, :], embed_x_target[:, i, :])
45
- attention_pooling.append(attention_seq.unsqueeze(1)) #(batch_size, 1, emb_dim)
46
- attention_pooling = torch.cat(attention_pooling, dim=1) #(batch_size, num_history_features, emb_dim)
48
+ attention_pooling.append(attention_seq.unsqueeze(1)) # (batch_size, 1, emb_dim)
49
+ # (batch_size, num_history_features, emb_dim)
50
+ attention_pooling = torch.cat(attention_pooling, dim=1)
47
51
 
48
- mlp_in = torch.cat([attention_pooling.flatten(start_dim=1), embed_x_target.flatten(start_dim=1), embed_x_features.flatten(start_dim=1)], dim=1) #(batch_size, N)
52
+ mlp_in = torch.cat([attention_pooling.flatten(start_dim=1), embed_x_target.flatten(start_dim=1), embed_x_features.flatten(start_dim=1)], dim=1) # (batch_size, N)
49
53
 
50
54
  y = self.mlp(mlp_in)
51
55
  return torch.sigmoid(y.squeeze(1))
@@ -62,20 +66,28 @@ class ActivationUnit(nn.Module):
62
66
  - Output: `(batch_size, emb_dim)`
63
67
  """
64
68
 
65
- def __init__(self, emb_dim, dims=[36], activation="dice", use_softmax=False):
69
+ def __init__(self, emb_dim, dims=None, activation="dice", use_softmax=False):
66
70
  super(ActivationUnit, self).__init__()
71
+ if dims is None:
72
+ dims = [36]
67
73
  self.emb_dim = emb_dim
68
74
  self.use_softmax = use_softmax
69
- self.attention = MLP(4 * self.emb_dim, dims, activation=activation)
75
+ self.attention = MLP(4 * self.emb_dim, dims=dims, activation=activation)
70
76
 
71
77
  def forward(self, history, target):
72
78
  seq_length = history.size(1)
73
- target = target.unsqueeze(1).expand(-1, seq_length, -1) #batch_size,seq_length,emb_dim
79
+ # batch_size,seq_length,emb_dim
80
+ target = target.unsqueeze(1).expand(-1, seq_length, -1)
74
81
  att_input = torch.cat([target, history, target - history, target * history], dim=-1) # batch_size,seq_length,4*emb_dim
75
- att_weight = self.attention(att_input.view(-1, 4 * self.emb_dim)) # #(batch_size*seq_length,4*emb_dim)
76
- att_weight = att_weight.view(-1, seq_length) #(batch_size*seq_length, 1) -> (batch_size,seq_length)
82
+ # (batch_size*seq_length,4*emb_dim)
83
+ att_weight = self.attention(att_input.view(-1, 4 * self.emb_dim))
84
+ # (batch_size*seq_length, 1) -> (batch_size,seq_length)
85
+ att_weight = att_weight.view(-1, seq_length)
77
86
  if self.use_softmax:
78
87
  att_weight = att_weight.softmax(dim=-1)
79
- # (batch_size, seq_length, 1) * (batch_size, seq_length, emb_dim)
80
- output = (att_weight.unsqueeze(-1) * history).sum(dim=1) #(batch_size,emb_dim)
81
- return output
88
+
89
+
90
+ # (batch_size, seq_length, 1) * (batch_size, seq_length, emb_dim)
91
+ # (batch_size,emb_dim)
92
+ output = (att_weight.unsqueeze(-1) * history).sum(dim=1)
93
+ return output
@@ -0,0 +1,101 @@
1
+ """
2
+ Date: create on 09/13/2022
3
+ References:
4
+ paper: (KDD'21) EDCN: Enhancing Explicit and Implicit Feature Interactions via Information Sharing for Parallel Deep CTR Models
5
+ url: https://dlp-kdd.github.io/assets/pdf/DLP-KDD_2021_paper_12.pdf
6
+ Authors: lailai, lailai_zxy@tju.edu.cn
7
+ """
8
+
9
+ import torch
10
+ from torch import nn
11
+
12
+ from ...basic.layers import LR, MLP, CrossLayer, EmbeddingLayer
13
+
14
+
15
+ class EDCN(torch.nn.Module):
16
+ """Deep & Cross Network with a mixture of low-rank architecture
17
+
18
+ Args:
19
+ features (list[Feature Class]): training by the whole module.
20
+ n_cross_layers (int) : the number of layers of feature intersection layers
21
+ mlp_params (dict): the params of the last MLP module, keys include:`{"dims":list, "activation":str, "dropout":float, "output_layer":bool`}
22
+ bridge_type (str): the type interaction function, in ["hadamard_product", "pointwise_addition", "concatenation", "attention_pooling"]
23
+ use_regulation_module (bool): True, whether to use regulation module
24
+ temperature (int): the temperature coefficient to control distribution
25
+ """
26
+
27
+ def __init__(self, features, n_cross_layers, mlp_params, bridge_type="hadamard_product", use_regulation_module=True, temperature=1):
28
+ super().__init__()
29
+ self.features = features
30
+ self.n_cross_layers = n_cross_layers
31
+ self.num_fields = len(features)
32
+ self.dims = sum([fea.embed_dim for fea in features])
33
+ self.fea_dims = [fea.embed_dim for fea in features]
34
+ self.embedding = EmbeddingLayer(features)
35
+ self.cross_layers = nn.ModuleList([CrossLayer(self.dims) for _ in range(n_cross_layers)])
36
+ self.bridge_modules = nn.ModuleList([BridgeModule(self.dims, bridge_type) for _ in range(n_cross_layers)])
37
+ self.regulation_modules = nn.ModuleList([RegulationModule(self.num_fields, self.fea_dims, tau=temperature, use_regulation=use_regulation_module) for _ in range(n_cross_layers)])
38
+ mlp_params["dims"] = [self.dims, self.dims]
39
+ self.mlps = nn.ModuleList([MLP(self.dims, output_layer=False, **mlp_params) for _ in range(n_cross_layers)])
40
+ self.linear = LR(self.dims * 3)
41
+
42
+ def forward(self, x):
43
+ embed_x = self.embedding(x, self.features, squeeze_dim=True)
44
+ cross_i, deep_i = self.regulation_modules[0](embed_x)
45
+ cross_0 = cross_i
46
+ for i in range(self.n_cross_layers):
47
+ if i > 0:
48
+ cross_i, deep_i = self.regulation_modules[i](bridge_i)
49
+ cross_i = cross_i + self.cross_layers[i](cross_0, cross_i)
50
+ deep_i = self.mlps[i](deep_i)
51
+ bridge_i = self.bridge_modules[i](cross_i, deep_i)
52
+ x_stack = torch.cat([cross_i, deep_i, bridge_i], dim=1)
53
+ y = self.linear(x_stack)
54
+ return torch.sigmoid(y.squeeze(1))
55
+
56
+
57
+ class BridgeModule(torch.nn.Module):
58
+
59
+ def __init__(self, input_dim, bridge_type):
60
+ super(BridgeModule, self).__init__()
61
+ assert bridge_type in ["hadamard_product", "pointwise_addition", "concatenation", "attention_pooling"], 'bridge_type= is not supported'.format(bridge_type)
62
+ self.bridge_type = bridge_type
63
+ if bridge_type == "concatenation":
64
+ self.concat_pooling = nn.Sequential(nn.Linear(input_dim * 2, input_dim), nn.ReLU())
65
+ elif bridge_type == "attention_pooling":
66
+ self.attention_x = nn.Sequential(nn.Linear(input_dim, input_dim), nn.ReLU(), nn.Linear(input_dim, input_dim, bias=False), nn.Softmax(dim=-1))
67
+ self.attention_h = nn.Sequential(nn.Linear(input_dim, input_dim), nn.ReLU(), nn.Linear(input_dim, input_dim, bias=False), nn.Softmax(dim=-1))
68
+
69
+ def forward(self, x, h):
70
+ if self.bridge_type == "hadamard_product":
71
+ out = x * h
72
+ elif self.bridge_type == "pointwise_addition":
73
+ out = x + h
74
+ elif self.bridge_type == "concatenation":
75
+ out = self.concat_pooling(torch.cat([x, h], dim=-1))
76
+ elif self.bridge_type == "attention_pooling":
77
+ out = self.attention_x(x) * x + self.attention_h(h) * h
78
+ return out
79
+
80
+
81
+ class RegulationModule(torch.nn.Module):
82
+
83
+ def __init__(self, num_fields, dims, tau, use_regulation=True):
84
+ super(RegulationModule, self).__init__()
85
+ self.use_regulation = use_regulation
86
+ if self.use_regulation:
87
+ self.num_fields = num_fields
88
+ self.dims = dims
89
+ self.tau = tau
90
+ self.g1 = nn.Parameter(torch.ones(num_fields))
91
+ self.g2 = nn.Parameter(torch.ones(num_fields))
92
+
93
+ def forward(self, x):
94
+ if self.use_regulation:
95
+ g1 = torch.cat([(self.g1[i] / self.tau).softmax(dim=-1).unsqueeze(-1).repeat(1, self.dims[i]) for i in range(self.num_fields)], dim=-1)
96
+ g2 = torch.cat([(self.g2[i] / self.tau).softmax(dim=-1).unsqueeze(-1).repeat(1, self.dims[i]) for i in range(self.num_fields)], dim=-1)
97
+
98
+ out1, out2 = g1 * x, g2 * x
99
+ else:
100
+ out1, out2 = x, x
101
+ return out1, out2
@@ -0,0 +1,42 @@
1
+ """
2
+ Date: create on 10/19/2022
3
+ References:
4
+ paper: (RecSys '19) FiBiNET: combining feature importance and bilinear feature interaction for click-through rate prediction
5
+ url: https://dl.acm.org/doi/abs/10.1145/3298689.3347043
6
+ Authors: lailai, lailai_zxy@tju.edu.cn
7
+ """
8
+ import torch
9
+ from torch import nn
10
+
11
+ from ...basic.features import SparseFeature
12
+ from ...basic.layers import MLP, BiLinearInteractionLayer, EmbeddingLayer, SENETLayer
13
+
14
+
15
+ class FiBiNet(torch.nn.Module):
16
+ """
17
+ Args:
18
+ features (list[Feature Class]): training by the whole module.
19
+ reduction_ratio (int) : Hidden layer reduction factor of SENET layer
20
+ mlp_params (dict): the params of the last MLP module, keys include:`{"dims":list, "activation":str, "dropout":float, "output_layer":bool`}
21
+ bilinear_type (str): the type bilinear interaction function, in ["field_all", "field_each", "field_interaction"], field_all means that all features share a W, field_each means that a feature field corresponds to a W_i, field_interaction means that a feature field intersection corresponds to a W_ij
22
+ """
23
+
24
+ def __init__(self, features, mlp_params, reduction_ratio=3, bilinear_type="field_interaction", **kwargs):
25
+ super(FiBiNet, self).__init__()
26
+ self.features = features
27
+ self.embedding = EmbeddingLayer(features)
28
+ embedding_dim = max([fea.embed_dim for fea in features])
29
+ num_fields = len([fea.embed_dim for fea in features if isinstance(fea, SparseFeature) and fea.shared_with is None])
30
+ self.senet_layer = SENETLayer(num_fields, reduction_ratio)
31
+ self.bilinear_interaction = BiLinearInteractionLayer(embedding_dim, num_fields, bilinear_type)
32
+ self.dims = num_fields * (num_fields - 1) * embedding_dim
33
+ self.mlp = MLP(self.dims, **mlp_params)
34
+
35
+ def forward(self, x):
36
+ embed_x = self.embedding(x, self.features)
37
+ embed_senet = self.senet_layer(embed_x)
38
+ embed_bi1 = self.bilinear_interaction(embed_x)
39
+ embed_bi2 = self.bilinear_interaction(embed_senet)
40
+ shallow_part = torch.flatten(torch.cat([embed_bi1, embed_bi2], dim=1), start_dim=1)
41
+ mlp_out = self.mlp(shallow_part)
42
+ return torch.sigmoid(mlp_out.squeeze(1))
@@ -1,7 +1,7 @@
1
1
  """
2
2
  Date: create on 22/04/2022
3
- References:
4
- paper: (DLRS'2016) Wide & Deep Learning for Recommender Systems
3
+ References:
4
+ paper: (DLRS'2016) Wide & Deep Learning for Recommender Systems
5
5
  url: https://arxiv.org/abs/1606.07792
6
6
  Authors: Mincai Lai, laimincai@shanghaitech.edu.cn
7
7
  """
@@ -31,11 +31,11 @@ class WideDeep(torch.nn.Module):
31
31
  self.mlp = MLP(self.deep_dims, **mlp_params)
32
32
 
33
33
  def forward(self, x):
34
- input_wide = self.embedding(x, self.wide_features, squeeze_dim=True) #[batch_size, wide_dims]
35
- input_deep = self.embedding(x, self.deep_features, squeeze_dim=True) #[batch_size, deep_dims]
34
+ input_wide = self.embedding(x, self.wide_features, squeeze_dim=True) # [batch_size, wide_dims]
35
+ input_deep = self.embedding(x, self.deep_features, squeeze_dim=True) # [batch_size, deep_dims]
36
36
 
37
- y_wide = self.linear(input_wide) #[batch_size, 1]
38
- y_deep = self.mlp(input_deep) #[batch_size, 1]
37
+ y_wide = self.linear(input_wide) # [batch_size, 1]
38
+ y_deep = self.mlp(input_deep) # [batch_size, 1]
39
39
  y = y_wide + y_deep
40
40
  y = torch.sigmoid(y.squeeze(1))
41
41
  return y
@@ -1,2 +1,4 @@
1
- from .trainer import CTRTrainer
2
- from .mtl_trainer import MTLTrainer
1
+ from .ctr_trainer import CTRTrainer
2
+ from .match_trainer import MatchTrainer
3
+ from .mtl_trainer import MTLTrainer
4
+ from .seq_trainer import SeqTrainer
@@ -0,0 +1,191 @@
1
+ import os
2
+
3
+ import torch
4
+ import tqdm
5
+ from sklearn.metrics import roc_auc_score
6
+
7
+ from ..basic.callback import EarlyStopper
8
+ from ..basic.loss_func import RegularizationLoss
9
+
10
+
11
+ class CTRTrainer(object):
12
+ """A general trainer for single task learning.
13
+
14
+ Args:
15
+ model (nn.Module): any multi task learning model.
16
+ optimizer_fn (torch.optim): optimizer function of pytorch (default = `torch.optim.Adam`).
17
+ optimizer_params (dict): parameters of optimizer_fn.
18
+ scheduler_fn (torch.optim.lr_scheduler) : torch scheduling class, eg. `torch.optim.lr_scheduler.StepLR`.
19
+ scheduler_params (dict): parameters of optimizer scheduler_fn.
20
+ n_epoch (int): epoch number of training.
21
+ earlystop_patience (int): how long to wait after last time validation auc improved (default=10).
22
+ device (str): `"cpu"` or `"cuda:0"`
23
+ gpus (list): id of multi gpu (default=[]). If the length >=1, then the model will wrapped by nn.DataParallel.
24
+ loss_mode (int, optional): the training mode, `{0:point-wise, 1:pair-wise, 2:list-wise}`. Defaults to 0.
25
+ model_path (str): the path you want to save the model (default="./"). Note only save the best weight in the validation data.
26
+ embedding_l1 (float): L1 regularization coefficient for embedding parameters (default=0.0).
27
+ embedding_l2 (float): L2 regularization coefficient for embedding parameters (default=0.0).
28
+ dense_l1 (float): L1 regularization coefficient for dense parameters (default=0.0).
29
+ dense_l2 (float): L2 regularization coefficient for dense parameters (default=0.0).
30
+ """
31
+
32
+ def __init__(
33
+ self,
34
+ model,
35
+ optimizer_fn=torch.optim.Adam,
36
+ optimizer_params=None,
37
+ regularization_params=None,
38
+ scheduler_fn=None,
39
+ scheduler_params=None,
40
+ n_epoch=10,
41
+ earlystop_patience=10,
42
+ device="cpu",
43
+ gpus=None,
44
+ loss_mode=True,
45
+ model_path="./",
46
+ ):
47
+ self.model = model # for uniform weights save method in one gpu or multi gpu
48
+ if gpus is None:
49
+ gpus = []
50
+ self.gpus = gpus
51
+ if len(gpus) > 1:
52
+ print('parallel running on these gpus:', gpus)
53
+ self.model = torch.nn.DataParallel(self.model, device_ids=gpus)
54
+ # torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
55
+ self.device = torch.device(device)
56
+ self.model.to(self.device)
57
+ if optimizer_params is None:
58
+ optimizer_params = {"lr": 1e-3, "weight_decay": 1e-5}
59
+ self.optimizer = optimizer_fn(self.model.parameters(), **optimizer_params) # default optimizer
60
+ if regularization_params is None:
61
+ regularization_params = {"embedding_l1": 0.0, "embedding_l2": 0.0, "dense_l1": 0.0, "dense_l2": 0.0}
62
+ self.scheduler = None
63
+ if scheduler_fn is not None:
64
+ self.scheduler = scheduler_fn(self.optimizer, **scheduler_params)
65
+ self.loss_mode = loss_mode
66
+ self.criterion = torch.nn.BCELoss() # default loss cross_entropy
67
+ self.evaluate_fn = roc_auc_score # default evaluate function
68
+ self.n_epoch = n_epoch
69
+ self.early_stopper = EarlyStopper(patience=earlystop_patience)
70
+ self.model_path = model_path
71
+ # Initialize regularization loss
72
+ self.reg_loss_fn = RegularizationLoss(**regularization_params)
73
+
74
+ def train_one_epoch(self, data_loader, log_interval=10):
75
+ self.model.train()
76
+ total_loss = 0
77
+ tk0 = tqdm.tqdm(data_loader, desc="train", smoothing=0, mininterval=1.0)
78
+ for i, (x_dict, y) in enumerate(tk0):
79
+ x_dict = {k: v.to(self.device) for k, v in x_dict.items()} # tensor to GPU
80
+ y = y.to(self.device).float()
81
+ if self.loss_mode:
82
+ y_pred = self.model(x_dict)
83
+ loss = self.criterion(y_pred, y)
84
+ else:
85
+ y_pred, other_loss = self.model(x_dict)
86
+ loss = self.criterion(y_pred, y) + other_loss
87
+
88
+ # Add regularization loss
89
+ reg_loss = self.reg_loss_fn(self.model)
90
+ loss = loss + reg_loss
91
+
92
+ self.model.zero_grad()
93
+ loss.backward()
94
+ self.optimizer.step()
95
+ total_loss += loss.item()
96
+ if (i + 1) % log_interval == 0:
97
+ tk0.set_postfix(loss=total_loss / log_interval)
98
+ total_loss = 0
99
+
100
+ def fit(self, train_dataloader, val_dataloader=None):
101
+ for epoch_i in range(self.n_epoch):
102
+ print('epoch:', epoch_i)
103
+ self.train_one_epoch(train_dataloader)
104
+ if self.scheduler is not None:
105
+ if epoch_i % self.scheduler.step_size == 0:
106
+ print("Current lr : {}".format(self.optimizer.state_dict()['param_groups'][0]['lr']))
107
+ self.scheduler.step() # update lr in epoch level by scheduler
108
+ if val_dataloader:
109
+ auc = self.evaluate(self.model, val_dataloader)
110
+ print('epoch:', epoch_i, 'validation: auc:', auc)
111
+ if self.early_stopper.stop_training(auc, self.model.state_dict()):
112
+ print(f'validation: best auc: {self.early_stopper.best_auc}')
113
+ self.model.load_state_dict(self.early_stopper.best_weights)
114
+ break
115
+ torch.save(self.model.state_dict(), os.path.join(self.model_path, "model.pth")) # save best auc model
116
+
117
+ def evaluate(self, model, data_loader):
118
+ model.eval()
119
+ targets, predicts = list(), list()
120
+ with torch.no_grad():
121
+ tk0 = tqdm.tqdm(data_loader, desc="validation", smoothing=0, mininterval=1.0)
122
+ for i, (x_dict, y) in enumerate(tk0):
123
+ x_dict = {k: v.to(self.device) for k, v in x_dict.items()}
124
+ # 确保y是float类型且维度为[batch_size, 1]
125
+ y = y.to(self.device).float().view(-1, 1)
126
+ if self.loss_mode:
127
+ y_pred = model(x_dict)
128
+ else:
129
+ y_pred, _ = model(x_dict)
130
+ targets.extend(y.tolist())
131
+ predicts.extend(y_pred.tolist())
132
+ return self.evaluate_fn(targets, predicts)
133
+
134
+ def predict(self, model, data_loader):
135
+ model.eval()
136
+ predicts = list()
137
+ with torch.no_grad():
138
+ tk0 = tqdm.tqdm(data_loader, desc="predict", smoothing=0, mininterval=1.0)
139
+ for i, (x_dict, y) in enumerate(tk0):
140
+ x_dict = {k: v.to(self.device) for k, v in x_dict.items()}
141
+ y = y.to(self.device)
142
+ if self.loss_mode:
143
+ y_pred = model(x_dict)
144
+ else:
145
+ y_pred, _ = model(x_dict)
146
+ predicts.extend(y_pred.tolist())
147
+ return predicts
148
+
149
+ def export_onnx(self, output_path, dummy_input=None, batch_size=2, seq_length=10, opset_version=14, dynamic_batch=True, device=None, verbose=False):
150
+ """Export the trained model to ONNX format.
151
+
152
+ This method exports the ranking model (e.g., DeepFM, WideDeep, DCN) to ONNX format
153
+ for deployment. The export is non-invasive and does not modify the model code.
154
+
155
+ Args:
156
+ output_path (str): Path to save the ONNX model file.
157
+ dummy_input (dict, optional): Example input dict {feature_name: tensor}.
158
+ If not provided, dummy inputs will be generated automatically.
159
+ batch_size (int): Batch size for auto-generated dummy input (default: 2).
160
+ seq_length (int): Sequence length for SequenceFeature (default: 10).
161
+ opset_version (int): ONNX opset version (default: 14).
162
+ dynamic_batch (bool): Enable dynamic batch size (default: True).
163
+ device (str, optional): Device for export ('cpu', 'cuda', etc.).
164
+ If None, defaults to 'cpu' for maximum compatibility.
165
+ verbose (bool): Print export details (default: False).
166
+
167
+ Returns:
168
+ bool: True if export succeeded, False otherwise.
169
+
170
+ Example:
171
+ >>> trainer = CTRTrainer(model, ...)
172
+ >>> trainer.fit(train_dl, val_dl)
173
+ >>> trainer.export_onnx("deepfm.onnx")
174
+
175
+ >>> # With custom dummy input
176
+ >>> dummy = {"user_id": torch.tensor([1, 2]), "item_id": torch.tensor([10, 20])}
177
+ >>> trainer.export_onnx("model.onnx", dummy_input=dummy)
178
+
179
+ >>> # Export on specific device
180
+ >>> trainer.export_onnx("model.onnx", device="cpu")
181
+ """
182
+ from ..utils.onnx_export import ONNXExporter
183
+
184
+ # Handle DataParallel wrapped model
185
+ model = self.model.module if hasattr(self.model, 'module') else self.model
186
+
187
+ # Use provided device or default to 'cpu'
188
+ export_device = device if device is not None else 'cpu'
189
+
190
+ exporter = ONNXExporter(model, device=export_device)
191
+ return exporter.export(output_path=output_path, dummy_input=dummy_input, batch_size=batch_size, seq_length=seq_length, opset_version=opset_version, dynamic_batch=dynamic_batch, verbose=verbose)