torch-rechub 0.0.1__py3-none-any.whl → 0.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. torch_rechub/basic/activation.py +54 -52
  2. torch_rechub/basic/callback.py +32 -32
  3. torch_rechub/basic/features.py +94 -57
  4. torch_rechub/basic/initializers.py +92 -0
  5. torch_rechub/basic/layers.py +720 -240
  6. torch_rechub/basic/loss_func.py +34 -0
  7. torch_rechub/basic/metaoptimizer.py +72 -0
  8. torch_rechub/basic/metric.py +250 -0
  9. torch_rechub/models/matching/__init__.py +11 -0
  10. torch_rechub/models/matching/comirec.py +188 -0
  11. torch_rechub/models/matching/dssm.py +66 -0
  12. torch_rechub/models/matching/dssm_facebook.py +79 -0
  13. torch_rechub/models/matching/dssm_senet.py +75 -0
  14. torch_rechub/models/matching/gru4rec.py +87 -0
  15. torch_rechub/models/matching/mind.py +101 -0
  16. torch_rechub/models/matching/narm.py +76 -0
  17. torch_rechub/models/matching/sasrec.py +140 -0
  18. torch_rechub/models/matching/sine.py +151 -0
  19. torch_rechub/models/matching/stamp.py +83 -0
  20. torch_rechub/models/matching/youtube_dnn.py +71 -0
  21. torch_rechub/models/matching/youtube_sbc.py +98 -0
  22. torch_rechub/models/multi_task/__init__.py +5 -4
  23. torch_rechub/models/multi_task/aitm.py +84 -0
  24. torch_rechub/models/multi_task/esmm.py +55 -45
  25. torch_rechub/models/multi_task/mmoe.py +58 -52
  26. torch_rechub/models/multi_task/ple.py +130 -104
  27. torch_rechub/models/multi_task/shared_bottom.py +45 -44
  28. torch_rechub/models/ranking/__init__.py +11 -3
  29. torch_rechub/models/ranking/afm.py +63 -0
  30. torch_rechub/models/ranking/bst.py +63 -0
  31. torch_rechub/models/ranking/dcn.py +38 -0
  32. torch_rechub/models/ranking/dcn_v2.py +69 -0
  33. torch_rechub/models/ranking/deepffm.py +123 -0
  34. torch_rechub/models/ranking/deepfm.py +41 -41
  35. torch_rechub/models/ranking/dien.py +191 -0
  36. torch_rechub/models/ranking/din.py +91 -81
  37. torch_rechub/models/ranking/edcn.py +117 -0
  38. torch_rechub/models/ranking/fibinet.py +50 -0
  39. torch_rechub/models/ranking/widedeep.py +41 -41
  40. torch_rechub/trainers/__init__.py +2 -1
  41. torch_rechub/trainers/{trainer.py → ctr_trainer.py} +128 -111
  42. torch_rechub/trainers/match_trainer.py +170 -0
  43. torch_rechub/trainers/mtl_trainer.py +206 -144
  44. torch_rechub/utils/__init__.py +0 -0
  45. torch_rechub/utils/data.py +360 -0
  46. torch_rechub/utils/match.py +274 -0
  47. torch_rechub/utils/mtl.py +126 -0
  48. {torch_rechub-0.0.1.dist-info → torch_rechub-0.0.3.dist-info}/LICENSE +21 -21
  49. torch_rechub-0.0.3.dist-info/METADATA +177 -0
  50. torch_rechub-0.0.3.dist-info/RECORD +55 -0
  51. {torch_rechub-0.0.1.dist-info → torch_rechub-0.0.3.dist-info}/WHEEL +1 -1
  52. torch_rechub/basic/utils.py +0 -168
  53. torch_rechub-0.0.1.dist-info/METADATA +0 -105
  54. torch_rechub-0.0.1.dist-info/RECORD +0 -26
  55. {torch_rechub-0.0.1.dist-info → torch_rechub-0.0.3.dist-info}/top_level.txt +0 -0
@@ -1,52 +1,58 @@
1
- """
2
- Date: create on 04/05/2022
3
- References:
4
- paper: (KDD'2018) Modeling Task Relationships in Multi-task Learning with Multi-gate Mixture-of-Experts
5
- url: https://dl.acm.org/doi/pdf/10.1145/3219819.3220007
6
- Authors: Mincai Lai, laimincai@shanghaitech.edu.cn
7
- """
8
-
9
- import torch
10
- import torch.nn as nn
11
-
12
- from ...basic.layers import MLP, EmbeddingLayer, PredictionLayer
13
-
14
-
15
- class MMOE(nn.Module):
16
- """Multi-gate Mixture-of-Experts model.
17
-
18
- Args:
19
- features (list): the list of `Feature Class`, training by the expert and tower module.
20
- task_types (list): types of tasks, only support `["classfication", "regression"]`.
21
- n_expert (int): the number of expert net.
22
- expert_params (dict): the params of all the expert module, keys include:`{"dims":list, "activation":str, "dropout":float}, keep `{"output_layer":False}`.
23
- tower_params_list (list): the list of tower params dict, the keys same as expert_params.
24
- """
25
-
26
- def __init__(self, features, task_types, n_expert, expert_params, tower_params_list):
27
- super().__init__()
28
- self.features = features
29
- self.task_types = task_types
30
- self.n_task = len(task_types)
31
- self.n_expert = n_expert
32
- self.embedding = EmbeddingLayer(features)
33
- self.input_dims = sum([fea.embed_dim for fea in features])
34
- self.experts = nn.ModuleList(MLP(self.input_dims, **{**expert_params, **{"output_layer": False}}) for i in range(self.n_expert))
35
- self.gates = nn.ModuleList(MLP(self.input_dims, **{"dims": [self.n_expert], "activation": "softmax", "output_layer": False}) for i in range(self.n_task)) #n_gate = n_task
36
- self.towers = nn.ModuleList(MLP(expert_params["dims"][-1], **tower_params_list[i]) for i in range(self.n_task))
37
- self.predict_layers = nn.ModuleList(PredictionLayer(task_type) for task_type in task_types)
38
-
39
- def forward(self, x):
40
- embed_x = self.embedding(x, self.features, squeeze_dim=True) #[batch_size, input_dims]
41
- expert_outs = [expert(embed_x).unsqueeze(1) for expert in self.experts] #expert_out[i]: [batch_size, 1, expert_dims[-1]]
42
- expert_outs = torch.cat(expert_outs, dim=1) #[batch_size, n_expert, expert_dims[-1]]
43
- gate_outs = [gate(embed_x).unsqueeze(-1) for gate in self.gates] #gate_out[i]: [batch_size, n_expert, 1]
44
-
45
- ys = []
46
- for gate_out, tower, predict_layer in zip(gate_outs, self.towers, self.predict_layers):
47
- expert_weight = torch.mul(gate_out, expert_outs) #[batch_size, n_expert, expert_dims[-1]]
48
- expert_pooling = torch.sum(expert_weight, dim=1) #[batch_size, expert_dims[-1]]
49
- tower_out = tower(expert_pooling) #[batch_size, 1]
50
- y = predict_layer(tower_out) #logit -> proba
51
- ys.append(y)
52
- return torch.cat(ys, dim=1)
1
+ """
2
+ Date: create on 04/05/2022
3
+ References:
4
+ paper: (KDD'2018) Modeling Task Relationships in Multi-task Learning with Multi-gate Mixture-of-Experts
5
+ url: https://dl.acm.org/doi/pdf/10.1145/3219819.3220007
6
+ Authors: Mincai Lai, laimincai@shanghaitech.edu.cn
7
+ """
8
+
9
+ import torch
10
+ import torch.nn as nn
11
+
12
+ from ...basic.layers import MLP, EmbeddingLayer, PredictionLayer
13
+
14
+
15
+ class MMOE(nn.Module):
16
+ """Multi-gate Mixture-of-Experts model.
17
+
18
+ Args:
19
+ features (list): the list of `Feature Class`, training by the expert and tower module.
20
+ task_types (list): types of tasks, only support `["classfication", "regression"]`.
21
+ n_expert (int): the number of expert net.
22
+ expert_params (dict): the params of all the expert module, keys include:`{"dims":list, "activation":str, "dropout":float}.
23
+ tower_params_list (list): the list of tower params dict, the keys same as expert_params.
24
+ """
25
+
26
+ def __init__(self, features, task_types, n_expert, expert_params, tower_params_list):
27
+ super().__init__()
28
+ self.features = features
29
+ self.task_types = task_types
30
+ self.n_task = len(task_types)
31
+ self.n_expert = n_expert
32
+ self.embedding = EmbeddingLayer(features)
33
+ self.input_dims = sum([fea.embed_dim for fea in features])
34
+ self.experts = nn.ModuleList(
35
+ MLP(self.input_dims, output_layer=False, **expert_params) for i in range(self.n_expert))
36
+ self.gates = nn.ModuleList(
37
+ MLP(self.input_dims, output_layer=False, **{
38
+ "dims": [self.n_expert],
39
+ "activation": "softmax"
40
+ }) for i in range(self.n_task)) #n_gate = n_task
41
+ self.towers = nn.ModuleList(MLP(expert_params["dims"][-1], **tower_params_list[i]) for i in range(self.n_task))
42
+ self.predict_layers = nn.ModuleList(PredictionLayer(task_type) for task_type in task_types)
43
+
44
+ def forward(self, x):
45
+ embed_x = self.embedding(x, self.features, squeeze_dim=True) #[batch_size, input_dims]
46
+ expert_outs = [expert(embed_x).unsqueeze(1) for expert in self.experts
47
+ ] #expert_out[i]: [batch_size, 1, expert_dims[-1]]
48
+ expert_outs = torch.cat(expert_outs, dim=1) #[batch_size, n_expert, expert_dims[-1]]
49
+ gate_outs = [gate(embed_x).unsqueeze(-1) for gate in self.gates] #gate_out[i]: [batch_size, n_expert, 1]
50
+
51
+ ys = []
52
+ for gate_out, tower, predict_layer in zip(gate_outs, self.towers, self.predict_layers):
53
+ expert_weight = torch.mul(gate_out, expert_outs) #[batch_size, n_expert, expert_dims[-1]]
54
+ expert_pooling = torch.sum(expert_weight, dim=1) #[batch_size, expert_dims[-1]]
55
+ tower_out = tower(expert_pooling) #[batch_size, 1]
56
+ y = predict_layer(tower_out) #logit -> proba
57
+ ys.append(y)
58
+ return torch.cat(ys, dim=1)
@@ -1,104 +1,130 @@
1
- """
2
- Date: create on 05/05/2022
3
- References:
4
- paper: (RecSys'2020) Progressive Layered Extraction (PLE): A Novel Multi-Task Learning (MTL) Model for Personalized Recommendations
5
- url: https://dl.acm.org/doi/abs/10.1145/3383313.3412236
6
- Authors: Mincai Lai, laimincai@shanghaitech.edu.cn
7
- """
8
-
9
- import torch
10
- import torch.nn as nn
11
-
12
- from ...basic.layers import MLP, EmbeddingLayer, PredictionLayer
13
-
14
-
15
- class PLE(nn.Module):
16
- """Progressive Layered Extraction model.
17
-
18
- Args:
19
- features (list): the list of `Feature Class`, training by the expert and tower module.
20
- task_types (list): types of tasks, only support `["classfication", "regression"]`.
21
- n_level (int): the number of CGC layer.
22
- n_expert_specific (int): the number of task-specific expert net.
23
- n_expert_shared (int): the number of task-shared expert net.
24
- expert_params (dict): the params of all the expert module, keys include:`{"dims":list, "activation":str, "dropout":float}, keep `{"output_layer":False}`.
25
- tower_params_list (list): the list of tower params dict, the keys same as expert_params.
26
- """
27
-
28
- def __init__(self, features, task_types, n_level, n_expert_specific, n_expert_shared, expert_params={"dims": [32, 16], "output_layer": False}, tower_params_list=[{"dims": [32, 16]}, {"dims": [32, 16]}]):
29
- super().__init__()
30
- self.features = features
31
- self.n_task = len(task_types)
32
- self.task_types = task_types
33
- self.n_level = n_level
34
- self.input_dims = sum([fea.embed_dim for fea in features])
35
- self.embedding = EmbeddingLayer(features)
36
- self.cgc_layers = nn.ModuleList(CGC(i + 1, n_level, self.n_task, n_expert_specific, n_expert_shared, self.input_dims, expert_params) for i in range(n_level))
37
- self.towers = nn.ModuleList(MLP(expert_params["dims"][-1], **tower_params_list[i]) for i in range(self.n_task))
38
- self.predict_layers = nn.ModuleList(PredictionLayer(task_type) for task_type in task_types)
39
-
40
- def forward(self, x):
41
- embed_x = self.embedding(x, self.features, squeeze_dim=True) #[batch_size, input_dims]
42
- ple_inputs = [embed_x] * (self.n_task + 1)
43
- ple_outs = []
44
- for i in range(self.n_level):
45
- ple_outs = self.cgc_layers[i](ple_inputs) #ple_outs[i]: [batch_size, expert_dims[-1]]
46
- ple_inputs = ple_outs
47
- #predict
48
- ys = []
49
- for ple_out, tower, predict_layer in zip(ple_outs, self.towers, self.predict_layers):
50
- tower_out = tower(ple_out) #[batch_size, 1]
51
- y = predict_layer(tower_out) #logit -> proba
52
- ys.append(y)
53
- return torch.cat(ys, dim=1)
54
-
55
-
56
- class CGC(nn.Module):
57
- """Customized Gate Control (CGC) Model mentioned in PLE paper.
58
-
59
- Args:
60
- cur_level (int): the current level of CGC in PLE.
61
- n_level (int): the number of CGC layer.
62
- n_task (int): the number of tasks.
63
- n_expert_specific (int): the number of task-specific expert net.
64
- n_expert_shared (int): the number of task-shared expert net.
65
- input_dims (int): the input dims of the xpert module in current CGC layer.
66
- expert_params (dict): the params of all the expert module, keys include:`{"dims":list, "activation":str, "dropout":float}, keep `{"output_layer":False}`.
67
- """
68
-
69
- def __init__(self, cur_level, n_level, n_task, n_expert_specific, n_expert_shared, input_dims, expert_params={"dims": [32, 16], "output_layer": False}):
70
- super().__init__()
71
- self.cur_level = cur_level # the CGC level of PLE
72
- self.n_level = n_level
73
- self.n_task = n_task
74
- self.n_expert_specific = n_expert_specific
75
- self.n_expert_shared = n_expert_shared
76
- self.n_expert_all = n_expert_specific * self.n_task + n_expert_shared
77
- input_dims = input_dims if cur_level == 1 else expert_params["dims"][-1] #the first layer expert dim is the input data dim other expert dim
78
- self.experts_specific = nn.ModuleList(MLP(input_dims, **expert_params) for _ in range(self.n_task * self.n_expert_specific))
79
- self.experts_shared = nn.ModuleList(MLP(input_dims, **expert_params) for _ in range(self.n_expert_shared))
80
- self.gates_specific = nn.ModuleList(MLP(input_dims, **{"dims": [self.n_expert_specific + self.n_expert_shared], "activation": "softmax", "output_layer": False}) for _ in range(self.n_task)) #n_gate_specific = n_task
81
- if cur_level < n_level:
82
- self.gate_shared = MLP(input_dims, **{"dims": [self.n_expert_all], "activation": "softmax", "output_layer": False}) #n_gate_specific = n_task
83
-
84
- def forward(self, x_list):
85
- expert_specific_outs = [] #expert_out[i]: [batch_size, 1, expert_dims[-1]]
86
- for i in range(self.n_task):
87
- expert_specific_outs.extend([expert(x_list[i]).unsqueeze(1) for expert in self.experts_specific[i * self.n_expert_specific:(i + 1) * self.n_expert_specific]])
88
- expert_shared_outs = [expert(x_list[-1]).unsqueeze(1) for expert in self.experts_shared] #x_list[-1]: the input for shared experts
89
- gate_specific_outs = [gate(x_list[i]).unsqueeze(-1) for i, gate in enumerate(self.gates_specific)] #gate_out[i]: [batch_size, n_expert_specific+n_expert_shared, 1]
90
- cgc_outs = []
91
- for i, gate_out in enumerate(gate_specific_outs):
92
- cur_expert_list = expert_specific_outs[i * self.n_expert_specific:(i + 1) * self.n_expert_specific] + expert_shared_outs
93
- expert_concat = torch.cat(cur_expert_list, dim=1) #[batch_size, n_expert_specific+n_expert_shared, expert_dims[-1]]
94
- expert_weight = torch.mul(gate_out, expert_concat) #[batch_size, n_expert_specific+n_expert_shared, expert_dims[-1]]
95
- expert_pooling = torch.sum(expert_weight, dim=1) #[batch_size, expert_dims[-1]]
96
- cgc_outs.append(expert_pooling) #length: n_task
97
- if self.cur_level < self.n_level: #not the last layer
98
- gate_shared_out = self.gate_shared(x_list[-1]).unsqueeze(-1) #[batch_size, n_expert_all, 1]
99
- expert_concat = torch.cat(expert_specific_outs + expert_shared_outs, dim=1) #[batch_size, n_expert_all, expert_dims[-1]]
100
- expert_weight = torch.mul(gate_shared_out, expert_concat) #[batch_size, n_expert_all, expert_dims[-1]]
101
- expert_pooling = torch.sum(expert_weight, dim=1) #[batch_size, expert_dims[-1]]
102
- cgc_outs.append(expert_pooling) #length: n_task+1
103
-
104
- return cgc_outs
1
+ """
2
+ Date: create on 05/05/2022
3
+ References:
4
+ paper: (RecSys'2020) Progressive Layered Extraction (PLE): A Novel Multi-Task Learning (MTL) Model for Personalized Recommendations
5
+ url: https://dl.acm.org/doi/abs/10.1145/3383313.3412236
6
+ Authors: Mincai Lai, laimincai@shanghaitech.edu.cn
7
+ """
8
+
9
+ import torch
10
+ import torch.nn as nn
11
+
12
+ from ...basic.layers import MLP, EmbeddingLayer, PredictionLayer
13
+
14
+
15
+ class PLE(nn.Module):
16
+ """Progressive Layered Extraction model.
17
+
18
+ Args:
19
+ features (list): the list of `Feature Class`, training by the expert and tower module.
20
+ task_types (list): types of tasks, only support `["classfication", "regression"]`.
21
+ n_level (int): the number of CGC layer.
22
+ n_expert_specific (int): the number of task-specific expert net.
23
+ n_expert_shared (int): the number of task-shared expert net.
24
+ expert_params (dict): the params of all the expert module, keys include:`{"dims":list, "activation":str, "dropout":float}.
25
+ tower_params_list (list): the list of tower params dict, the keys same as expert_params.
26
+ """
27
+
28
+ def __init__(self, features, task_types, n_level, n_expert_specific, n_expert_shared, expert_params,
29
+ tower_params_list):
30
+ super().__init__()
31
+ self.features = features
32
+ self.n_task = len(task_types)
33
+ self.task_types = task_types
34
+ self.n_level = n_level
35
+ self.input_dims = sum([fea.embed_dim for fea in features])
36
+ self.embedding = EmbeddingLayer(features)
37
+ self.cgc_layers = nn.ModuleList(
38
+ CGC(i + 1, n_level, self.n_task, n_expert_specific, n_expert_shared, self.input_dims, expert_params)
39
+ for i in range(n_level))
40
+ self.towers = nn.ModuleList(
41
+ MLP(expert_params["dims"][-1], output_layer=False, **tower_params_list[i]) for i in range(self.n_task))
42
+ self.predict_layers = nn.ModuleList(PredictionLayer(task_type) for task_type in task_types)
43
+
44
+ def forward(self, x):
45
+ embed_x = self.embedding(x, self.features, squeeze_dim=True) #[batch_size, input_dims]
46
+ ple_inputs = [embed_x] * (self.n_task + 1)
47
+ ple_outs = []
48
+ for i in range(self.n_level):
49
+ ple_outs = self.cgc_layers[i](ple_inputs) #ple_outs[i]: [batch_size, expert_dims[-1]]
50
+ ple_inputs = ple_outs
51
+ #predict
52
+ ys = []
53
+ for ple_out, tower, predict_layer in zip(ple_outs, self.towers, self.predict_layers):
54
+ tower_out = tower(ple_out) #[batch_size, 1]
55
+ y = predict_layer(tower_out) #logit -> proba
56
+ ys.append(y)
57
+ return torch.cat(ys, dim=1)
58
+
59
+
60
+ class CGC(nn.Module):
61
+ """Customized Gate Control (CGC) Model mentioned in PLE paper.
62
+
63
+ Args:
64
+ cur_level (int): the current level of CGC in PLE.
65
+ n_level (int): the number of CGC layer.
66
+ n_task (int): the number of tasks.
67
+ n_expert_specific (int): the number of task-specific expert net.
68
+ n_expert_shared (int): the number of task-shared expert net.
69
+ input_dims (int): the input dims of the xpert module in current CGC layer.
70
+ expert_params (dict): the params of all the expert module, keys include:`{"dims":list, "activation":str, "dropout":float}.
71
+ """
72
+
73
+ def __init__(self, cur_level, n_level, n_task, n_expert_specific, n_expert_shared, input_dims, expert_params):
74
+ super().__init__()
75
+ self.cur_level = cur_level # the CGC level of PLE
76
+ self.n_level = n_level
77
+ self.n_task = n_task
78
+ self.n_expert_specific = n_expert_specific
79
+ self.n_expert_shared = n_expert_shared
80
+ self.n_expert_all = n_expert_specific * self.n_task + n_expert_shared
81
+ input_dims = input_dims if cur_level == 1 else expert_params["dims"][
82
+ -1] #the first layer expert dim is the input data dim other expert dim
83
+ self.experts_specific = nn.ModuleList(
84
+ MLP(input_dims, output_layer=False, **expert_params) for _ in range(self.n_task * self.n_expert_specific))
85
+ self.experts_shared = nn.ModuleList(
86
+ MLP(input_dims, output_layer=False, **expert_params) for _ in range(self.n_expert_shared))
87
+ self.gates_specific = nn.ModuleList(
88
+ MLP(
89
+ input_dims, **{
90
+ "dims": [self.n_expert_specific + self.n_expert_shared],
91
+ "activation": "softmax",
92
+ "output_layer": False
93
+ }) for _ in range(self.n_task)) #n_gate_specific = n_task
94
+ if cur_level < n_level:
95
+ self.gate_shared = MLP(input_dims, **{
96
+ "dims": [self.n_expert_all],
97
+ "activation": "softmax",
98
+ "output_layer": False
99
+ }) #n_gate_specific = n_task
100
+
101
+ def forward(self, x_list):
102
+ expert_specific_outs = [] #expert_out[i]: [batch_size, 1, expert_dims[-1]]
103
+ for i in range(self.n_task):
104
+ expert_specific_outs.extend([
105
+ expert(x_list[i]).unsqueeze(1)
106
+ for expert in self.experts_specific[i * self.n_expert_specific:(i + 1) * self.n_expert_specific]
107
+ ])
108
+ expert_shared_outs = [expert(x_list[-1]).unsqueeze(1) for expert in self.experts_shared
109
+ ] #x_list[-1]: the input for shared experts
110
+ gate_specific_outs = [gate(x_list[i]).unsqueeze(-1) for i, gate in enumerate(self.gates_specific)
111
+ ] #gate_out[i]: [batch_size, n_expert_specific+n_expert_shared, 1]
112
+ cgc_outs = []
113
+ for i, gate_out in enumerate(gate_specific_outs):
114
+ cur_expert_list = expert_specific_outs[i * self.n_expert_specific:(i + 1) *
115
+ self.n_expert_specific] + expert_shared_outs
116
+ expert_concat = torch.cat(cur_expert_list,
117
+ dim=1) #[batch_size, n_expert_specific+n_expert_shared, expert_dims[-1]]
118
+ expert_weight = torch.mul(gate_out,
119
+ expert_concat) #[batch_size, n_expert_specific+n_expert_shared, expert_dims[-1]]
120
+ expert_pooling = torch.sum(expert_weight, dim=1) #[batch_size, expert_dims[-1]]
121
+ cgc_outs.append(expert_pooling) #length: n_task
122
+ if self.cur_level < self.n_level: #not the last layer
123
+ gate_shared_out = self.gate_shared(x_list[-1]).unsqueeze(-1) #[batch_size, n_expert_all, 1]
124
+ expert_concat = torch.cat(expert_specific_outs + expert_shared_outs,
125
+ dim=1) #[batch_size, n_expert_all, expert_dims[-1]]
126
+ expert_weight = torch.mul(gate_shared_out, expert_concat) #[batch_size, n_expert_all, expert_dims[-1]]
127
+ expert_pooling = torch.sum(expert_weight, dim=1) #[batch_size, expert_dims[-1]]
128
+ cgc_outs.append(expert_pooling) #length: n_task+1
129
+
130
+ return cgc_outs
@@ -1,44 +1,45 @@
1
- """
2
- Date: create on 04/05/2022
3
- Reference:
4
- paper: Caruana, R. (1997). Multitask learning. Machine learning, 28(1), 41-75.
5
- Authors: Mincai Lai, laimincai@shanghaitech.edu.cn
6
- """
7
-
8
- import torch
9
- import torch.nn as nn
10
-
11
- from ...basic.layers import MLP, EmbeddingLayer, PredictionLayer
12
-
13
-
14
- class SharedBottom(nn.Module):
15
- """Shared Bottom multi task model.
16
-
17
- Args:
18
- features (list): the list of `Feature Class`, training by the bottom and tower module.
19
- task_types (list): types of tasks, only support `["classfication", "regression"]`.
20
- bottom_params (dict): the params of the last MLP module, keys include:`{"dims":list, "activation":str, "dropout":float}, keep `{"output_layer":False}`.
21
- tower_params_list (list): the list of tower params dict, the keys same as bottom_params.
22
- """
23
-
24
- def __init__(self, features, task_types, bottom_params, tower_params_list):
25
- super().__init__()
26
- self.features = features
27
- self.task_types = task_types
28
- self.embedding = EmbeddingLayer(features)
29
- self.bottom_dims = sum([fea.embed_dim for fea in features])
30
-
31
- self.bottom_mlp = MLP(self.bottom_dims, **{**bottom_params, **{"output_layer": False}})
32
- self.towers = nn.ModuleList(MLP(bottom_params["dims"][-1], **tower_params_list[i]) for i in range(len(task_types)))
33
- self.predict_layers = nn.ModuleList(PredictionLayer(task_type) for task_type in task_types)
34
-
35
- def forward(self, x):
36
- input_bottom = self.embedding(x, self.features, squeeze_dim=True)
37
- x = self.bottom_mlp(input_bottom)
38
-
39
- ys = []
40
- for tower, predict_layer in zip(self.towers, self.predict_layers):
41
- tower_out = tower(x)
42
- y = predict_layer(tower_out) #regression->keep, binary classification->sigmoid
43
- ys.append(y)
44
- return torch.cat(ys, dim=1)
1
+ """
2
+ Date: create on 04/05/2022
3
+ Reference:
4
+ paper: Caruana, R. (1997). Multitask learning. Machine learning, 28(1), 41-75.
5
+ Authors: Mincai Lai, laimincai@shanghaitech.edu.cn
6
+ """
7
+
8
+ import torch
9
+ import torch.nn as nn
10
+
11
+ from ...basic.layers import MLP, EmbeddingLayer, PredictionLayer
12
+
13
+
14
+ class SharedBottom(nn.Module):
15
+ """Shared Bottom multi task model.
16
+
17
+ Args:
18
+ features (list): the list of `Feature Class`, training by the bottom and tower module.
19
+ task_types (list): types of tasks, only support `["classfication", "regression"]`.
20
+ bottom_params (dict): the params of the last MLP module, keys include:`{"dims":list, "activation":str, "dropout":float}, keep `{"output_layer":False}`.
21
+ tower_params_list (list): the list of tower params dict, the keys same as bottom_params.
22
+ """
23
+
24
+ def __init__(self, features, task_types, bottom_params, tower_params_list):
25
+ super().__init__()
26
+ self.features = features
27
+ self.task_types = task_types
28
+ self.embedding = EmbeddingLayer(features)
29
+ self.bottom_dims = sum([fea.embed_dim for fea in features])
30
+
31
+ self.bottom_mlp = MLP(self.bottom_dims, **{**bottom_params, **{"output_layer": False}})
32
+ self.towers = nn.ModuleList(
33
+ MLP(bottom_params["dims"][-1], **tower_params_list[i]) for i in range(len(task_types)))
34
+ self.predict_layers = nn.ModuleList(PredictionLayer(task_type) for task_type in task_types)
35
+
36
+ def forward(self, x):
37
+ input_bottom = self.embedding(x, self.features, squeeze_dim=True)
38
+ x = self.bottom_mlp(input_bottom)
39
+
40
+ ys = []
41
+ for tower, predict_layer in zip(self.towers, self.predict_layers):
42
+ tower_out = tower(x)
43
+ y = predict_layer(tower_out) #regression->keep, binary classification->sigmoid
44
+ ys.append(y)
45
+ return torch.cat(ys, dim=1)
@@ -1,3 +1,11 @@
1
- from .widedeep import WideDeep
2
- from .deepfm import DeepFM
3
- from .din import DIN
1
+ from .widedeep import WideDeep
2
+ from .deepfm import DeepFM
3
+ from .din import DIN
4
+ from .dcn import DCN
5
+ from .dcn_v2 import DCNv2
6
+ from .edcn import EDCN
7
+ from .deepffm import DeepFFM, FatDeepFFM
8
+ from .fibinet import FiBiNet
9
+ from .bst import BST
10
+ from .afm import AFM
11
+ from .dien import DIEN
@@ -0,0 +1,63 @@
1
+ """
2
+ Date: create on 23/04/2024
3
+ References:
4
+ paper: (IJCAI'2017) Attentional Factorization Machines:Learning the Weight of Feature Interactions via Attention Networks
5
+ url: https://arxiv.org/abs/1708.04617
6
+ Authors: Tao Fan, thisisevy@foxmail.com
7
+ """
8
+
9
+ import torch
10
+ from ...basic.layers import FM, MLP, LR, EmbeddingLayer
11
+ from torch import nn
12
+ from torch.nn import Parameter, init
13
+
14
+
15
+ class AFM(nn.Module):
16
+ """Attentional Factorization Machine Model
17
+
18
+ Args:
19
+ fm_features (list): the list of `Feature Class`, training by the fm part module.
20
+ embed_dim (int): the dimension of input embedding.
21
+ t (int): the size of the hidden layer in the attention network.
22
+ """
23
+
24
+ def __init__(self, fm_features, embed_dim, t=64):
25
+ super(AFM, self).__init__()
26
+ self.fm_features = fm_features
27
+ self.embed_dim = embed_dim
28
+ self.fm_dims = sum([fea.embed_dim for fea in fm_features])
29
+ self.linear = LR(self.fm_dims) # 1-odrder interaction
30
+ self.fm = FM(reduce_sum=False) # 2-odrder interaction
31
+ self.embedding = EmbeddingLayer(fm_features)
32
+
33
+ # 注意力计算中的线性层
34
+ self.attention_liner = nn.Linear(self.embed_dim, t)
35
+ # AFM公式中的h
36
+ self.h = init.xavier_uniform_(Parameter(torch.empty(t, 1)))
37
+ # AFM公式中的p
38
+ self.p = init.xavier_uniform_(Parameter(torch.empty(self.embed_dim, 1)))
39
+
40
+ def attention(self, y_fm):
41
+ # embs: [ batch_size, k ]
42
+ # [ batch_size, t ]
43
+ y_fm = self.attention_liner(y_fm)
44
+ # [ batch_size, t ]
45
+ y_fm = torch.relu(y_fm)
46
+ # [ batch_size, 1 ]
47
+ y_fm = torch.matmul(y_fm, self.h)
48
+ # [ batch_size, 1 ]
49
+ atts = torch.softmax(y_fm, dim=1)
50
+ return atts
51
+
52
+ def forward(self, x):
53
+ input_fm = self.embedding(x, self.fm_features, squeeze_dim=False) # [batch_size, num_fields, embed_dim]
54
+
55
+ y_linear = self.linear(input_fm.flatten(start_dim=1))
56
+ y_fm = self.fm(input_fm)
57
+ # 得到注意力
58
+ atts = self.attention(y_fm)
59
+ # [ batch_size, 1 ]
60
+ outs = torch.matmul(atts * y_fm, self.p)
61
+ # print(y_linear.size(), outs.size())
62
+ y = y_linear + outs
63
+ return torch.sigmoid(y.squeeze(1))
@@ -0,0 +1,63 @@
1
+ """
2
+ Date: create on 26/02/2024, update on 30/04/2022
3
+ References:
4
+ paper: Behavior Sequence Transformer for E-commerce Recommendation in Alibaba
5
+ url: https://arxiv.org/pdf/1905.06874
6
+ code: https://github.com/jiwidi/Behavior-Sequence-Transformer-Pytorch/blob/master/pytorch_bst.ipynb
7
+ Authors: Tao Fan, thisisevy@foxmail.com
8
+ """
9
+
10
+ import torch
11
+ import torch.nn as nn
12
+
13
+ from ...basic.layers import EmbeddingLayer, MLP
14
+
15
+
16
+ class BST(nn.Module):
17
+ """Behavior Sequence Transformer
18
+ Args:
19
+ features (list): the list of `Feature Class`. training by MLP. It means the user profile features and context features in origin paper, exclude history and target features.
20
+ history_features (list): the list of `Feature Class`,training by ActivationUnit. It means the user behaviour sequence features, eg.item id sequence, shop id sequence.
21
+ target_features (list): the list of `Feature Class`, training by ActivationUnit. It means the target feature which will execute target-attention with history feature.
22
+ mlp_params (dict): the params of the last MLP module, keys include:`{"dims":list, "activation":str, "dropout":float, "output_layer":bool`}.
23
+ nhead (int): the number of heads in the multi-head-attention models.
24
+ dropout (float): the dropout value in the multi-head-attention models.
25
+ num_layers (Any): the number of sub-encoder-layers in the encoder.
26
+ """
27
+
28
+ def __init__(self, features, history_features, target_features, mlp_params, nhead=8, dropout=0.2, num_layers=1):
29
+ super().__init__()
30
+ self.features = features
31
+ self.history_features = history_features
32
+ self.target_features = target_features
33
+ self.num_history_features = len(history_features)
34
+ self.embed_dim = target_features[0].embed_dim
35
+ self.seq_len = 50
36
+ # TODO 在 'torch_rechub.basic.features.SequenceFeature' 中加入seq_len属性
37
+ self.all_dims = (len(features) + len(history_features) * (self.seq_len + len(target_features))) * self.embed_dim
38
+ self.embedding = EmbeddingLayer(features + history_features + target_features)
39
+ self.encoder_layer = nn.TransformerEncoderLayer(d_model=self.embed_dim, nhead=nhead, dropout=dropout)
40
+ self.transformer_layers = nn.TransformerEncoder(self.encoder_layer, num_layers=num_layers)
41
+ self.mlp = MLP(self.all_dims, activation="leakyrelu",
42
+ **mlp_params) # # 定义模型,模型的参数需要我们之前的feature类,用于构建模型的输入层,mlp指定模型后续DNN的结构
43
+
44
+ def forward(self, x):
45
+ embed_x_features = self.embedding(x, self.features) # (batch_size, num_features, emb_dim)
46
+ embed_x_history = self.embedding(x,
47
+ self.history_features) # (batch_size, num_history_features, seq_length, emb_dim)
48
+ embed_x_target = self.embedding(x, self.target_features) # (batch_size, num_target_features, emb_dim)
49
+ attention_pooling = []
50
+ for i in range(self.num_history_features):
51
+ attention_seq = self.transformer_layers(
52
+ torch.cat([torch.squeeze(embed_x_history[:, i, :, :], 1), embed_x_target], dim=1))
53
+ attention_pooling.append(attention_seq) # (batch_size, seq_length + num_target_features, emb_dim)
54
+ attention_pooling = torch.cat(attention_pooling,
55
+ dim=1) # (batch_size, num_history_features * (seq_length + num_target_features), emb_dim)
56
+
57
+ mlp_in = torch.cat([
58
+ attention_pooling.flatten(start_dim=1),
59
+ embed_x_features.flatten(start_dim=1)
60
+ ],
61
+ dim=1) # (batch_size, N)
62
+ y = self.mlp(mlp_in)
63
+ return torch.sigmoid(y.squeeze(1))
@@ -0,0 +1,38 @@
1
+ """
2
+ Date: create on 12/05/2022
3
+ References:
4
+ paper: (AKDD'2017) Deep & Cross Network for Ad Click Predictions
5
+ url: https://arxiv.org/abs/1708.05123
6
+ Authors: Mincai Lai, laimincai@shanghaitech.edu.cn
7
+ """
8
+
9
+ import torch
10
+
11
+ from ...basic.layers import LR, MLP, CrossNetwork, EmbeddingLayer
12
+
13
+
14
+ class DCN(torch.nn.Module):
15
+ """Deep & Cross Network
16
+
17
+ Args:
18
+ features (list[Feature Class]): training by the whole module.
19
+ mlp_params (dict): the params of the last MLP module, keys include:`{"dims":list, "activation":str, "dropout":float, "output_layer":bool`}
20
+ """
21
+
22
+ def __init__(self, features, n_cross_layers, mlp_params):
23
+ super().__init__()
24
+ self.features = features
25
+ self.dims = sum([fea.embed_dim for fea in features])
26
+
27
+ self.embedding = EmbeddingLayer(features)
28
+ self.cn = CrossNetwork(self.dims, n_cross_layers)
29
+ self.mlp = MLP(self.dims, output_layer=False, **mlp_params)
30
+ self.linear = LR(self.dims + mlp_params["dims"][-1])
31
+
32
+ def forward(self, x):
33
+ embed_x = self.embedding(x, self.features, squeeze_dim=True)
34
+ cn_out = self.cn(embed_x)
35
+ mlp_out = self.mlp(embed_x)
36
+ x_stack = torch.cat([cn_out, mlp_out], dim=1)
37
+ y = self.linear(x_stack)
38
+ return torch.sigmoid(y.squeeze(1))