torch-rechub 0.0.1__py3-none-any.whl → 0.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- torch_rechub/basic/activation.py +54 -52
- torch_rechub/basic/callback.py +32 -32
- torch_rechub/basic/features.py +94 -57
- torch_rechub/basic/initializers.py +92 -0
- torch_rechub/basic/layers.py +720 -240
- torch_rechub/basic/loss_func.py +34 -0
- torch_rechub/basic/metaoptimizer.py +72 -0
- torch_rechub/basic/metric.py +250 -0
- torch_rechub/models/matching/__init__.py +11 -0
- torch_rechub/models/matching/comirec.py +188 -0
- torch_rechub/models/matching/dssm.py +66 -0
- torch_rechub/models/matching/dssm_facebook.py +79 -0
- torch_rechub/models/matching/dssm_senet.py +75 -0
- torch_rechub/models/matching/gru4rec.py +87 -0
- torch_rechub/models/matching/mind.py +101 -0
- torch_rechub/models/matching/narm.py +76 -0
- torch_rechub/models/matching/sasrec.py +140 -0
- torch_rechub/models/matching/sine.py +151 -0
- torch_rechub/models/matching/stamp.py +83 -0
- torch_rechub/models/matching/youtube_dnn.py +71 -0
- torch_rechub/models/matching/youtube_sbc.py +98 -0
- torch_rechub/models/multi_task/__init__.py +5 -4
- torch_rechub/models/multi_task/aitm.py +84 -0
- torch_rechub/models/multi_task/esmm.py +55 -45
- torch_rechub/models/multi_task/mmoe.py +58 -52
- torch_rechub/models/multi_task/ple.py +130 -104
- torch_rechub/models/multi_task/shared_bottom.py +45 -44
- torch_rechub/models/ranking/__init__.py +11 -3
- torch_rechub/models/ranking/afm.py +63 -0
- torch_rechub/models/ranking/bst.py +63 -0
- torch_rechub/models/ranking/dcn.py +38 -0
- torch_rechub/models/ranking/dcn_v2.py +69 -0
- torch_rechub/models/ranking/deepffm.py +123 -0
- torch_rechub/models/ranking/deepfm.py +41 -41
- torch_rechub/models/ranking/dien.py +191 -0
- torch_rechub/models/ranking/din.py +91 -81
- torch_rechub/models/ranking/edcn.py +117 -0
- torch_rechub/models/ranking/fibinet.py +50 -0
- torch_rechub/models/ranking/widedeep.py +41 -41
- torch_rechub/trainers/__init__.py +2 -1
- torch_rechub/trainers/{trainer.py → ctr_trainer.py} +128 -111
- torch_rechub/trainers/match_trainer.py +170 -0
- torch_rechub/trainers/mtl_trainer.py +206 -144
- torch_rechub/utils/__init__.py +0 -0
- torch_rechub/utils/data.py +360 -0
- torch_rechub/utils/match.py +274 -0
- torch_rechub/utils/mtl.py +126 -0
- {torch_rechub-0.0.1.dist-info → torch_rechub-0.0.3.dist-info}/LICENSE +21 -21
- torch_rechub-0.0.3.dist-info/METADATA +177 -0
- torch_rechub-0.0.3.dist-info/RECORD +55 -0
- {torch_rechub-0.0.1.dist-info → torch_rechub-0.0.3.dist-info}/WHEEL +1 -1
- torch_rechub/basic/utils.py +0 -168
- torch_rechub-0.0.1.dist-info/METADATA +0 -105
- torch_rechub-0.0.1.dist-info/RECORD +0 -26
- {torch_rechub-0.0.1.dist-info → torch_rechub-0.0.3.dist-info}/top_level.txt +0 -0
|
@@ -1,52 +1,58 @@
|
|
|
1
|
-
"""
|
|
2
|
-
Date: create on 04/05/2022
|
|
3
|
-
References:
|
|
4
|
-
paper: (KDD'2018) Modeling Task Relationships in Multi-task Learning with Multi-gate Mixture-of-Experts
|
|
5
|
-
url: https://dl.acm.org/doi/pdf/10.1145/3219819.3220007
|
|
6
|
-
Authors: Mincai Lai, laimincai@shanghaitech.edu.cn
|
|
7
|
-
"""
|
|
8
|
-
|
|
9
|
-
import torch
|
|
10
|
-
import torch.nn as nn
|
|
11
|
-
|
|
12
|
-
from ...basic.layers import MLP, EmbeddingLayer, PredictionLayer
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
class MMOE(nn.Module):
|
|
16
|
-
"""Multi-gate Mixture-of-Experts model.
|
|
17
|
-
|
|
18
|
-
Args:
|
|
19
|
-
features (list): the list of `Feature Class`, training by the expert and tower module.
|
|
20
|
-
task_types (list): types of tasks, only support `["classfication", "regression"]`.
|
|
21
|
-
n_expert (int): the number of expert net.
|
|
22
|
-
expert_params (dict): the params of all the expert module, keys include:`{"dims":list, "activation":str, "dropout":float}
|
|
23
|
-
tower_params_list (list): the list of tower params dict, the keys same as expert_params.
|
|
24
|
-
"""
|
|
25
|
-
|
|
26
|
-
def __init__(self, features, task_types, n_expert, expert_params, tower_params_list):
|
|
27
|
-
super().__init__()
|
|
28
|
-
self.features = features
|
|
29
|
-
self.task_types = task_types
|
|
30
|
-
self.n_task = len(task_types)
|
|
31
|
-
self.n_expert = n_expert
|
|
32
|
-
self.embedding = EmbeddingLayer(features)
|
|
33
|
-
self.input_dims = sum([fea.embed_dim for fea in features])
|
|
34
|
-
self.experts = nn.ModuleList(
|
|
35
|
-
|
|
36
|
-
self.
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
1
|
+
"""
|
|
2
|
+
Date: create on 04/05/2022
|
|
3
|
+
References:
|
|
4
|
+
paper: (KDD'2018) Modeling Task Relationships in Multi-task Learning with Multi-gate Mixture-of-Experts
|
|
5
|
+
url: https://dl.acm.org/doi/pdf/10.1145/3219819.3220007
|
|
6
|
+
Authors: Mincai Lai, laimincai@shanghaitech.edu.cn
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import torch
|
|
10
|
+
import torch.nn as nn
|
|
11
|
+
|
|
12
|
+
from ...basic.layers import MLP, EmbeddingLayer, PredictionLayer
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class MMOE(nn.Module):
|
|
16
|
+
"""Multi-gate Mixture-of-Experts model.
|
|
17
|
+
|
|
18
|
+
Args:
|
|
19
|
+
features (list): the list of `Feature Class`, training by the expert and tower module.
|
|
20
|
+
task_types (list): types of tasks, only support `["classfication", "regression"]`.
|
|
21
|
+
n_expert (int): the number of expert net.
|
|
22
|
+
expert_params (dict): the params of all the expert module, keys include:`{"dims":list, "activation":str, "dropout":float}.
|
|
23
|
+
tower_params_list (list): the list of tower params dict, the keys same as expert_params.
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
def __init__(self, features, task_types, n_expert, expert_params, tower_params_list):
|
|
27
|
+
super().__init__()
|
|
28
|
+
self.features = features
|
|
29
|
+
self.task_types = task_types
|
|
30
|
+
self.n_task = len(task_types)
|
|
31
|
+
self.n_expert = n_expert
|
|
32
|
+
self.embedding = EmbeddingLayer(features)
|
|
33
|
+
self.input_dims = sum([fea.embed_dim for fea in features])
|
|
34
|
+
self.experts = nn.ModuleList(
|
|
35
|
+
MLP(self.input_dims, output_layer=False, **expert_params) for i in range(self.n_expert))
|
|
36
|
+
self.gates = nn.ModuleList(
|
|
37
|
+
MLP(self.input_dims, output_layer=False, **{
|
|
38
|
+
"dims": [self.n_expert],
|
|
39
|
+
"activation": "softmax"
|
|
40
|
+
}) for i in range(self.n_task)) #n_gate = n_task
|
|
41
|
+
self.towers = nn.ModuleList(MLP(expert_params["dims"][-1], **tower_params_list[i]) for i in range(self.n_task))
|
|
42
|
+
self.predict_layers = nn.ModuleList(PredictionLayer(task_type) for task_type in task_types)
|
|
43
|
+
|
|
44
|
+
def forward(self, x):
|
|
45
|
+
embed_x = self.embedding(x, self.features, squeeze_dim=True) #[batch_size, input_dims]
|
|
46
|
+
expert_outs = [expert(embed_x).unsqueeze(1) for expert in self.experts
|
|
47
|
+
] #expert_out[i]: [batch_size, 1, expert_dims[-1]]
|
|
48
|
+
expert_outs = torch.cat(expert_outs, dim=1) #[batch_size, n_expert, expert_dims[-1]]
|
|
49
|
+
gate_outs = [gate(embed_x).unsqueeze(-1) for gate in self.gates] #gate_out[i]: [batch_size, n_expert, 1]
|
|
50
|
+
|
|
51
|
+
ys = []
|
|
52
|
+
for gate_out, tower, predict_layer in zip(gate_outs, self.towers, self.predict_layers):
|
|
53
|
+
expert_weight = torch.mul(gate_out, expert_outs) #[batch_size, n_expert, expert_dims[-1]]
|
|
54
|
+
expert_pooling = torch.sum(expert_weight, dim=1) #[batch_size, expert_dims[-1]]
|
|
55
|
+
tower_out = tower(expert_pooling) #[batch_size, 1]
|
|
56
|
+
y = predict_layer(tower_out) #logit -> proba
|
|
57
|
+
ys.append(y)
|
|
58
|
+
return torch.cat(ys, dim=1)
|
|
@@ -1,104 +1,130 @@
|
|
|
1
|
-
"""
|
|
2
|
-
Date: create on 05/05/2022
|
|
3
|
-
References:
|
|
4
|
-
paper: (RecSys'2020) Progressive Layered Extraction (PLE): A Novel Multi-Task Learning (MTL) Model for Personalized Recommendations
|
|
5
|
-
url: https://dl.acm.org/doi/abs/10.1145/3383313.3412236
|
|
6
|
-
Authors: Mincai Lai, laimincai@shanghaitech.edu.cn
|
|
7
|
-
"""
|
|
8
|
-
|
|
9
|
-
import torch
|
|
10
|
-
import torch.nn as nn
|
|
11
|
-
|
|
12
|
-
from ...basic.layers import MLP, EmbeddingLayer, PredictionLayer
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
class PLE(nn.Module):
|
|
16
|
-
"""Progressive Layered Extraction model.
|
|
17
|
-
|
|
18
|
-
Args:
|
|
19
|
-
features (list): the list of `Feature Class`, training by the expert and tower module.
|
|
20
|
-
task_types (list): types of tasks, only support `["classfication", "regression"]`.
|
|
21
|
-
n_level (int): the number of CGC layer.
|
|
22
|
-
n_expert_specific (int): the number of task-specific expert net.
|
|
23
|
-
n_expert_shared (int): the number of task-shared expert net.
|
|
24
|
-
expert_params (dict): the params of all the expert module, keys include:`{"dims":list, "activation":str, "dropout":float}
|
|
25
|
-
tower_params_list (list): the list of tower params dict, the keys same as expert_params.
|
|
26
|
-
"""
|
|
27
|
-
|
|
28
|
-
def __init__(self, features, task_types, n_level, n_expert_specific, n_expert_shared, expert_params
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
self.
|
|
32
|
-
self.
|
|
33
|
-
self.
|
|
34
|
-
self.
|
|
35
|
-
self.
|
|
36
|
-
self.
|
|
37
|
-
self.
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
self.
|
|
76
|
-
self.
|
|
77
|
-
|
|
78
|
-
self.
|
|
79
|
-
self.
|
|
80
|
-
self.
|
|
81
|
-
if cur_level
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
1
|
+
"""
|
|
2
|
+
Date: create on 05/05/2022
|
|
3
|
+
References:
|
|
4
|
+
paper: (RecSys'2020) Progressive Layered Extraction (PLE): A Novel Multi-Task Learning (MTL) Model for Personalized Recommendations
|
|
5
|
+
url: https://dl.acm.org/doi/abs/10.1145/3383313.3412236
|
|
6
|
+
Authors: Mincai Lai, laimincai@shanghaitech.edu.cn
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import torch
|
|
10
|
+
import torch.nn as nn
|
|
11
|
+
|
|
12
|
+
from ...basic.layers import MLP, EmbeddingLayer, PredictionLayer
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class PLE(nn.Module):
|
|
16
|
+
"""Progressive Layered Extraction model.
|
|
17
|
+
|
|
18
|
+
Args:
|
|
19
|
+
features (list): the list of `Feature Class`, training by the expert and tower module.
|
|
20
|
+
task_types (list): types of tasks, only support `["classfication", "regression"]`.
|
|
21
|
+
n_level (int): the number of CGC layer.
|
|
22
|
+
n_expert_specific (int): the number of task-specific expert net.
|
|
23
|
+
n_expert_shared (int): the number of task-shared expert net.
|
|
24
|
+
expert_params (dict): the params of all the expert module, keys include:`{"dims":list, "activation":str, "dropout":float}.
|
|
25
|
+
tower_params_list (list): the list of tower params dict, the keys same as expert_params.
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
def __init__(self, features, task_types, n_level, n_expert_specific, n_expert_shared, expert_params,
|
|
29
|
+
tower_params_list):
|
|
30
|
+
super().__init__()
|
|
31
|
+
self.features = features
|
|
32
|
+
self.n_task = len(task_types)
|
|
33
|
+
self.task_types = task_types
|
|
34
|
+
self.n_level = n_level
|
|
35
|
+
self.input_dims = sum([fea.embed_dim for fea in features])
|
|
36
|
+
self.embedding = EmbeddingLayer(features)
|
|
37
|
+
self.cgc_layers = nn.ModuleList(
|
|
38
|
+
CGC(i + 1, n_level, self.n_task, n_expert_specific, n_expert_shared, self.input_dims, expert_params)
|
|
39
|
+
for i in range(n_level))
|
|
40
|
+
self.towers = nn.ModuleList(
|
|
41
|
+
MLP(expert_params["dims"][-1], output_layer=False, **tower_params_list[i]) for i in range(self.n_task))
|
|
42
|
+
self.predict_layers = nn.ModuleList(PredictionLayer(task_type) for task_type in task_types)
|
|
43
|
+
|
|
44
|
+
def forward(self, x):
|
|
45
|
+
embed_x = self.embedding(x, self.features, squeeze_dim=True) #[batch_size, input_dims]
|
|
46
|
+
ple_inputs = [embed_x] * (self.n_task + 1)
|
|
47
|
+
ple_outs = []
|
|
48
|
+
for i in range(self.n_level):
|
|
49
|
+
ple_outs = self.cgc_layers[i](ple_inputs) #ple_outs[i]: [batch_size, expert_dims[-1]]
|
|
50
|
+
ple_inputs = ple_outs
|
|
51
|
+
#predict
|
|
52
|
+
ys = []
|
|
53
|
+
for ple_out, tower, predict_layer in zip(ple_outs, self.towers, self.predict_layers):
|
|
54
|
+
tower_out = tower(ple_out) #[batch_size, 1]
|
|
55
|
+
y = predict_layer(tower_out) #logit -> proba
|
|
56
|
+
ys.append(y)
|
|
57
|
+
return torch.cat(ys, dim=1)
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
class CGC(nn.Module):
|
|
61
|
+
"""Customized Gate Control (CGC) Model mentioned in PLE paper.
|
|
62
|
+
|
|
63
|
+
Args:
|
|
64
|
+
cur_level (int): the current level of CGC in PLE.
|
|
65
|
+
n_level (int): the number of CGC layer.
|
|
66
|
+
n_task (int): the number of tasks.
|
|
67
|
+
n_expert_specific (int): the number of task-specific expert net.
|
|
68
|
+
n_expert_shared (int): the number of task-shared expert net.
|
|
69
|
+
input_dims (int): the input dims of the xpert module in current CGC layer.
|
|
70
|
+
expert_params (dict): the params of all the expert module, keys include:`{"dims":list, "activation":str, "dropout":float}.
|
|
71
|
+
"""
|
|
72
|
+
|
|
73
|
+
def __init__(self, cur_level, n_level, n_task, n_expert_specific, n_expert_shared, input_dims, expert_params):
|
|
74
|
+
super().__init__()
|
|
75
|
+
self.cur_level = cur_level # the CGC level of PLE
|
|
76
|
+
self.n_level = n_level
|
|
77
|
+
self.n_task = n_task
|
|
78
|
+
self.n_expert_specific = n_expert_specific
|
|
79
|
+
self.n_expert_shared = n_expert_shared
|
|
80
|
+
self.n_expert_all = n_expert_specific * self.n_task + n_expert_shared
|
|
81
|
+
input_dims = input_dims if cur_level == 1 else expert_params["dims"][
|
|
82
|
+
-1] #the first layer expert dim is the input data dim other expert dim
|
|
83
|
+
self.experts_specific = nn.ModuleList(
|
|
84
|
+
MLP(input_dims, output_layer=False, **expert_params) for _ in range(self.n_task * self.n_expert_specific))
|
|
85
|
+
self.experts_shared = nn.ModuleList(
|
|
86
|
+
MLP(input_dims, output_layer=False, **expert_params) for _ in range(self.n_expert_shared))
|
|
87
|
+
self.gates_specific = nn.ModuleList(
|
|
88
|
+
MLP(
|
|
89
|
+
input_dims, **{
|
|
90
|
+
"dims": [self.n_expert_specific + self.n_expert_shared],
|
|
91
|
+
"activation": "softmax",
|
|
92
|
+
"output_layer": False
|
|
93
|
+
}) for _ in range(self.n_task)) #n_gate_specific = n_task
|
|
94
|
+
if cur_level < n_level:
|
|
95
|
+
self.gate_shared = MLP(input_dims, **{
|
|
96
|
+
"dims": [self.n_expert_all],
|
|
97
|
+
"activation": "softmax",
|
|
98
|
+
"output_layer": False
|
|
99
|
+
}) #n_gate_specific = n_task
|
|
100
|
+
|
|
101
|
+
def forward(self, x_list):
|
|
102
|
+
expert_specific_outs = [] #expert_out[i]: [batch_size, 1, expert_dims[-1]]
|
|
103
|
+
for i in range(self.n_task):
|
|
104
|
+
expert_specific_outs.extend([
|
|
105
|
+
expert(x_list[i]).unsqueeze(1)
|
|
106
|
+
for expert in self.experts_specific[i * self.n_expert_specific:(i + 1) * self.n_expert_specific]
|
|
107
|
+
])
|
|
108
|
+
expert_shared_outs = [expert(x_list[-1]).unsqueeze(1) for expert in self.experts_shared
|
|
109
|
+
] #x_list[-1]: the input for shared experts
|
|
110
|
+
gate_specific_outs = [gate(x_list[i]).unsqueeze(-1) for i, gate in enumerate(self.gates_specific)
|
|
111
|
+
] #gate_out[i]: [batch_size, n_expert_specific+n_expert_shared, 1]
|
|
112
|
+
cgc_outs = []
|
|
113
|
+
for i, gate_out in enumerate(gate_specific_outs):
|
|
114
|
+
cur_expert_list = expert_specific_outs[i * self.n_expert_specific:(i + 1) *
|
|
115
|
+
self.n_expert_specific] + expert_shared_outs
|
|
116
|
+
expert_concat = torch.cat(cur_expert_list,
|
|
117
|
+
dim=1) #[batch_size, n_expert_specific+n_expert_shared, expert_dims[-1]]
|
|
118
|
+
expert_weight = torch.mul(gate_out,
|
|
119
|
+
expert_concat) #[batch_size, n_expert_specific+n_expert_shared, expert_dims[-1]]
|
|
120
|
+
expert_pooling = torch.sum(expert_weight, dim=1) #[batch_size, expert_dims[-1]]
|
|
121
|
+
cgc_outs.append(expert_pooling) #length: n_task
|
|
122
|
+
if self.cur_level < self.n_level: #not the last layer
|
|
123
|
+
gate_shared_out = self.gate_shared(x_list[-1]).unsqueeze(-1) #[batch_size, n_expert_all, 1]
|
|
124
|
+
expert_concat = torch.cat(expert_specific_outs + expert_shared_outs,
|
|
125
|
+
dim=1) #[batch_size, n_expert_all, expert_dims[-1]]
|
|
126
|
+
expert_weight = torch.mul(gate_shared_out, expert_concat) #[batch_size, n_expert_all, expert_dims[-1]]
|
|
127
|
+
expert_pooling = torch.sum(expert_weight, dim=1) #[batch_size, expert_dims[-1]]
|
|
128
|
+
cgc_outs.append(expert_pooling) #length: n_task+1
|
|
129
|
+
|
|
130
|
+
return cgc_outs
|
|
@@ -1,44 +1,45 @@
|
|
|
1
|
-
"""
|
|
2
|
-
Date: create on 04/05/2022
|
|
3
|
-
Reference:
|
|
4
|
-
paper: Caruana, R. (1997). Multitask learning. Machine learning, 28(1), 41-75.
|
|
5
|
-
Authors: Mincai Lai, laimincai@shanghaitech.edu.cn
|
|
6
|
-
"""
|
|
7
|
-
|
|
8
|
-
import torch
|
|
9
|
-
import torch.nn as nn
|
|
10
|
-
|
|
11
|
-
from ...basic.layers import MLP, EmbeddingLayer, PredictionLayer
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
class SharedBottom(nn.Module):
|
|
15
|
-
"""Shared Bottom multi task model.
|
|
16
|
-
|
|
17
|
-
Args:
|
|
18
|
-
features (list): the list of `Feature Class`, training by the bottom and tower module.
|
|
19
|
-
task_types (list): types of tasks, only support `["classfication", "regression"]`.
|
|
20
|
-
bottom_params (dict): the params of the last MLP module, keys include:`{"dims":list, "activation":str, "dropout":float}, keep `{"output_layer":False}`.
|
|
21
|
-
tower_params_list (list): the list of tower params dict, the keys same as bottom_params.
|
|
22
|
-
"""
|
|
23
|
-
|
|
24
|
-
def __init__(self, features, task_types, bottom_params, tower_params_list):
|
|
25
|
-
super().__init__()
|
|
26
|
-
self.features = features
|
|
27
|
-
self.task_types = task_types
|
|
28
|
-
self.embedding = EmbeddingLayer(features)
|
|
29
|
-
self.bottom_dims = sum([fea.embed_dim for fea in features])
|
|
30
|
-
|
|
31
|
-
self.bottom_mlp = MLP(self.bottom_dims, **{**bottom_params, **{"output_layer": False}})
|
|
32
|
-
self.towers = nn.ModuleList(
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
1
|
+
"""
|
|
2
|
+
Date: create on 04/05/2022
|
|
3
|
+
Reference:
|
|
4
|
+
paper: Caruana, R. (1997). Multitask learning. Machine learning, 28(1), 41-75.
|
|
5
|
+
Authors: Mincai Lai, laimincai@shanghaitech.edu.cn
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
import torch
|
|
9
|
+
import torch.nn as nn
|
|
10
|
+
|
|
11
|
+
from ...basic.layers import MLP, EmbeddingLayer, PredictionLayer
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class SharedBottom(nn.Module):
|
|
15
|
+
"""Shared Bottom multi task model.
|
|
16
|
+
|
|
17
|
+
Args:
|
|
18
|
+
features (list): the list of `Feature Class`, training by the bottom and tower module.
|
|
19
|
+
task_types (list): types of tasks, only support `["classfication", "regression"]`.
|
|
20
|
+
bottom_params (dict): the params of the last MLP module, keys include:`{"dims":list, "activation":str, "dropout":float}, keep `{"output_layer":False}`.
|
|
21
|
+
tower_params_list (list): the list of tower params dict, the keys same as bottom_params.
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
def __init__(self, features, task_types, bottom_params, tower_params_list):
|
|
25
|
+
super().__init__()
|
|
26
|
+
self.features = features
|
|
27
|
+
self.task_types = task_types
|
|
28
|
+
self.embedding = EmbeddingLayer(features)
|
|
29
|
+
self.bottom_dims = sum([fea.embed_dim for fea in features])
|
|
30
|
+
|
|
31
|
+
self.bottom_mlp = MLP(self.bottom_dims, **{**bottom_params, **{"output_layer": False}})
|
|
32
|
+
self.towers = nn.ModuleList(
|
|
33
|
+
MLP(bottom_params["dims"][-1], **tower_params_list[i]) for i in range(len(task_types)))
|
|
34
|
+
self.predict_layers = nn.ModuleList(PredictionLayer(task_type) for task_type in task_types)
|
|
35
|
+
|
|
36
|
+
def forward(self, x):
|
|
37
|
+
input_bottom = self.embedding(x, self.features, squeeze_dim=True)
|
|
38
|
+
x = self.bottom_mlp(input_bottom)
|
|
39
|
+
|
|
40
|
+
ys = []
|
|
41
|
+
for tower, predict_layer in zip(self.towers, self.predict_layers):
|
|
42
|
+
tower_out = tower(x)
|
|
43
|
+
y = predict_layer(tower_out) #regression->keep, binary classification->sigmoid
|
|
44
|
+
ys.append(y)
|
|
45
|
+
return torch.cat(ys, dim=1)
|
|
@@ -1,3 +1,11 @@
|
|
|
1
|
-
from .widedeep import WideDeep
|
|
2
|
-
from .deepfm import DeepFM
|
|
3
|
-
from .din import DIN
|
|
1
|
+
from .widedeep import WideDeep
|
|
2
|
+
from .deepfm import DeepFM
|
|
3
|
+
from .din import DIN
|
|
4
|
+
from .dcn import DCN
|
|
5
|
+
from .dcn_v2 import DCNv2
|
|
6
|
+
from .edcn import EDCN
|
|
7
|
+
from .deepffm import DeepFFM, FatDeepFFM
|
|
8
|
+
from .fibinet import FiBiNet
|
|
9
|
+
from .bst import BST
|
|
10
|
+
from .afm import AFM
|
|
11
|
+
from .dien import DIEN
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Date: create on 23/04/2024
|
|
3
|
+
References:
|
|
4
|
+
paper: (IJCAI'2017) Attentional Factorization Machines:Learning the Weight of Feature Interactions via Attention Networks
|
|
5
|
+
url: https://arxiv.org/abs/1708.04617
|
|
6
|
+
Authors: Tao Fan, thisisevy@foxmail.com
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import torch
|
|
10
|
+
from ...basic.layers import FM, MLP, LR, EmbeddingLayer
|
|
11
|
+
from torch import nn
|
|
12
|
+
from torch.nn import Parameter, init
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class AFM(nn.Module):
|
|
16
|
+
"""Attentional Factorization Machine Model
|
|
17
|
+
|
|
18
|
+
Args:
|
|
19
|
+
fm_features (list): the list of `Feature Class`, training by the fm part module.
|
|
20
|
+
embed_dim (int): the dimension of input embedding.
|
|
21
|
+
t (int): the size of the hidden layer in the attention network.
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
def __init__(self, fm_features, embed_dim, t=64):
|
|
25
|
+
super(AFM, self).__init__()
|
|
26
|
+
self.fm_features = fm_features
|
|
27
|
+
self.embed_dim = embed_dim
|
|
28
|
+
self.fm_dims = sum([fea.embed_dim for fea in fm_features])
|
|
29
|
+
self.linear = LR(self.fm_dims) # 1-odrder interaction
|
|
30
|
+
self.fm = FM(reduce_sum=False) # 2-odrder interaction
|
|
31
|
+
self.embedding = EmbeddingLayer(fm_features)
|
|
32
|
+
|
|
33
|
+
# 注意力计算中的线性层
|
|
34
|
+
self.attention_liner = nn.Linear(self.embed_dim, t)
|
|
35
|
+
# AFM公式中的h
|
|
36
|
+
self.h = init.xavier_uniform_(Parameter(torch.empty(t, 1)))
|
|
37
|
+
# AFM公式中的p
|
|
38
|
+
self.p = init.xavier_uniform_(Parameter(torch.empty(self.embed_dim, 1)))
|
|
39
|
+
|
|
40
|
+
def attention(self, y_fm):
|
|
41
|
+
# embs: [ batch_size, k ]
|
|
42
|
+
# [ batch_size, t ]
|
|
43
|
+
y_fm = self.attention_liner(y_fm)
|
|
44
|
+
# [ batch_size, t ]
|
|
45
|
+
y_fm = torch.relu(y_fm)
|
|
46
|
+
# [ batch_size, 1 ]
|
|
47
|
+
y_fm = torch.matmul(y_fm, self.h)
|
|
48
|
+
# [ batch_size, 1 ]
|
|
49
|
+
atts = torch.softmax(y_fm, dim=1)
|
|
50
|
+
return atts
|
|
51
|
+
|
|
52
|
+
def forward(self, x):
|
|
53
|
+
input_fm = self.embedding(x, self.fm_features, squeeze_dim=False) # [batch_size, num_fields, embed_dim]
|
|
54
|
+
|
|
55
|
+
y_linear = self.linear(input_fm.flatten(start_dim=1))
|
|
56
|
+
y_fm = self.fm(input_fm)
|
|
57
|
+
# 得到注意力
|
|
58
|
+
atts = self.attention(y_fm)
|
|
59
|
+
# [ batch_size, 1 ]
|
|
60
|
+
outs = torch.matmul(atts * y_fm, self.p)
|
|
61
|
+
# print(y_linear.size(), outs.size())
|
|
62
|
+
y = y_linear + outs
|
|
63
|
+
return torch.sigmoid(y.squeeze(1))
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Date: create on 26/02/2024, update on 30/04/2022
|
|
3
|
+
References:
|
|
4
|
+
paper: Behavior Sequence Transformer for E-commerce Recommendation in Alibaba
|
|
5
|
+
url: https://arxiv.org/pdf/1905.06874
|
|
6
|
+
code: https://github.com/jiwidi/Behavior-Sequence-Transformer-Pytorch/blob/master/pytorch_bst.ipynb
|
|
7
|
+
Authors: Tao Fan, thisisevy@foxmail.com
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
import torch
|
|
11
|
+
import torch.nn as nn
|
|
12
|
+
|
|
13
|
+
from ...basic.layers import EmbeddingLayer, MLP
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class BST(nn.Module):
|
|
17
|
+
"""Behavior Sequence Transformer
|
|
18
|
+
Args:
|
|
19
|
+
features (list): the list of `Feature Class`. training by MLP. It means the user profile features and context features in origin paper, exclude history and target features.
|
|
20
|
+
history_features (list): the list of `Feature Class`,training by ActivationUnit. It means the user behaviour sequence features, eg.item id sequence, shop id sequence.
|
|
21
|
+
target_features (list): the list of `Feature Class`, training by ActivationUnit. It means the target feature which will execute target-attention with history feature.
|
|
22
|
+
mlp_params (dict): the params of the last MLP module, keys include:`{"dims":list, "activation":str, "dropout":float, "output_layer":bool`}.
|
|
23
|
+
nhead (int): the number of heads in the multi-head-attention models.
|
|
24
|
+
dropout (float): the dropout value in the multi-head-attention models.
|
|
25
|
+
num_layers (Any): the number of sub-encoder-layers in the encoder.
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
def __init__(self, features, history_features, target_features, mlp_params, nhead=8, dropout=0.2, num_layers=1):
|
|
29
|
+
super().__init__()
|
|
30
|
+
self.features = features
|
|
31
|
+
self.history_features = history_features
|
|
32
|
+
self.target_features = target_features
|
|
33
|
+
self.num_history_features = len(history_features)
|
|
34
|
+
self.embed_dim = target_features[0].embed_dim
|
|
35
|
+
self.seq_len = 50
|
|
36
|
+
# TODO 在 'torch_rechub.basic.features.SequenceFeature' 中加入seq_len属性
|
|
37
|
+
self.all_dims = (len(features) + len(history_features) * (self.seq_len + len(target_features))) * self.embed_dim
|
|
38
|
+
self.embedding = EmbeddingLayer(features + history_features + target_features)
|
|
39
|
+
self.encoder_layer = nn.TransformerEncoderLayer(d_model=self.embed_dim, nhead=nhead, dropout=dropout)
|
|
40
|
+
self.transformer_layers = nn.TransformerEncoder(self.encoder_layer, num_layers=num_layers)
|
|
41
|
+
self.mlp = MLP(self.all_dims, activation="leakyrelu",
|
|
42
|
+
**mlp_params) # # 定义模型,模型的参数需要我们之前的feature类,用于构建模型的输入层,mlp指定模型后续DNN的结构
|
|
43
|
+
|
|
44
|
+
def forward(self, x):
|
|
45
|
+
embed_x_features = self.embedding(x, self.features) # (batch_size, num_features, emb_dim)
|
|
46
|
+
embed_x_history = self.embedding(x,
|
|
47
|
+
self.history_features) # (batch_size, num_history_features, seq_length, emb_dim)
|
|
48
|
+
embed_x_target = self.embedding(x, self.target_features) # (batch_size, num_target_features, emb_dim)
|
|
49
|
+
attention_pooling = []
|
|
50
|
+
for i in range(self.num_history_features):
|
|
51
|
+
attention_seq = self.transformer_layers(
|
|
52
|
+
torch.cat([torch.squeeze(embed_x_history[:, i, :, :], 1), embed_x_target], dim=1))
|
|
53
|
+
attention_pooling.append(attention_seq) # (batch_size, seq_length + num_target_features, emb_dim)
|
|
54
|
+
attention_pooling = torch.cat(attention_pooling,
|
|
55
|
+
dim=1) # (batch_size, num_history_features * (seq_length + num_target_features), emb_dim)
|
|
56
|
+
|
|
57
|
+
mlp_in = torch.cat([
|
|
58
|
+
attention_pooling.flatten(start_dim=1),
|
|
59
|
+
embed_x_features.flatten(start_dim=1)
|
|
60
|
+
],
|
|
61
|
+
dim=1) # (batch_size, N)
|
|
62
|
+
y = self.mlp(mlp_in)
|
|
63
|
+
return torch.sigmoid(y.squeeze(1))
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Date: create on 12/05/2022
|
|
3
|
+
References:
|
|
4
|
+
paper: (AKDD'2017) Deep & Cross Network for Ad Click Predictions
|
|
5
|
+
url: https://arxiv.org/abs/1708.05123
|
|
6
|
+
Authors: Mincai Lai, laimincai@shanghaitech.edu.cn
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import torch
|
|
10
|
+
|
|
11
|
+
from ...basic.layers import LR, MLP, CrossNetwork, EmbeddingLayer
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class DCN(torch.nn.Module):
|
|
15
|
+
"""Deep & Cross Network
|
|
16
|
+
|
|
17
|
+
Args:
|
|
18
|
+
features (list[Feature Class]): training by the whole module.
|
|
19
|
+
mlp_params (dict): the params of the last MLP module, keys include:`{"dims":list, "activation":str, "dropout":float, "output_layer":bool`}
|
|
20
|
+
"""
|
|
21
|
+
|
|
22
|
+
def __init__(self, features, n_cross_layers, mlp_params):
|
|
23
|
+
super().__init__()
|
|
24
|
+
self.features = features
|
|
25
|
+
self.dims = sum([fea.embed_dim for fea in features])
|
|
26
|
+
|
|
27
|
+
self.embedding = EmbeddingLayer(features)
|
|
28
|
+
self.cn = CrossNetwork(self.dims, n_cross_layers)
|
|
29
|
+
self.mlp = MLP(self.dims, output_layer=False, **mlp_params)
|
|
30
|
+
self.linear = LR(self.dims + mlp_params["dims"][-1])
|
|
31
|
+
|
|
32
|
+
def forward(self, x):
|
|
33
|
+
embed_x = self.embedding(x, self.features, squeeze_dim=True)
|
|
34
|
+
cn_out = self.cn(embed_x)
|
|
35
|
+
mlp_out = self.mlp(embed_x)
|
|
36
|
+
x_stack = torch.cat([cn_out, mlp_out], dim=1)
|
|
37
|
+
y = self.linear(x_stack)
|
|
38
|
+
return torch.sigmoid(y.squeeze(1))
|