torch-rechub 0.0.3__py3-none-any.whl → 0.0.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- torch_rechub/__init__.py +14 -0
- torch_rechub/basic/activation.py +54 -54
- torch_rechub/basic/callback.py +33 -33
- torch_rechub/basic/features.py +87 -94
- torch_rechub/basic/initializers.py +92 -92
- torch_rechub/basic/layers.py +994 -720
- torch_rechub/basic/loss_func.py +223 -34
- torch_rechub/basic/metaoptimizer.py +76 -72
- torch_rechub/basic/metric.py +251 -250
- torch_rechub/models/generative/__init__.py +6 -0
- torch_rechub/models/generative/hllm.py +249 -0
- torch_rechub/models/generative/hstu.py +189 -0
- torch_rechub/models/matching/__init__.py +13 -11
- torch_rechub/models/matching/comirec.py +193 -188
- torch_rechub/models/matching/dssm.py +72 -66
- torch_rechub/models/matching/dssm_facebook.py +77 -79
- torch_rechub/models/matching/dssm_senet.py +28 -16
- torch_rechub/models/matching/gru4rec.py +85 -87
- torch_rechub/models/matching/mind.py +103 -101
- torch_rechub/models/matching/narm.py +82 -76
- torch_rechub/models/matching/sasrec.py +143 -140
- torch_rechub/models/matching/sine.py +148 -151
- torch_rechub/models/matching/stamp.py +81 -83
- torch_rechub/models/matching/youtube_dnn.py +75 -71
- torch_rechub/models/matching/youtube_sbc.py +98 -98
- torch_rechub/models/multi_task/__init__.py +7 -5
- torch_rechub/models/multi_task/aitm.py +83 -84
- torch_rechub/models/multi_task/esmm.py +56 -55
- torch_rechub/models/multi_task/mmoe.py +58 -58
- torch_rechub/models/multi_task/ple.py +116 -130
- torch_rechub/models/multi_task/shared_bottom.py +45 -45
- torch_rechub/models/ranking/__init__.py +14 -11
- torch_rechub/models/ranking/afm.py +65 -63
- torch_rechub/models/ranking/autoint.py +102 -0
- torch_rechub/models/ranking/bst.py +61 -63
- torch_rechub/models/ranking/dcn.py +38 -38
- torch_rechub/models/ranking/dcn_v2.py +59 -69
- torch_rechub/models/ranking/deepffm.py +131 -123
- torch_rechub/models/ranking/deepfm.py +43 -42
- torch_rechub/models/ranking/dien.py +191 -191
- torch_rechub/models/ranking/din.py +93 -91
- torch_rechub/models/ranking/edcn.py +101 -117
- torch_rechub/models/ranking/fibinet.py +42 -50
- torch_rechub/models/ranking/widedeep.py +41 -41
- torch_rechub/trainers/__init__.py +4 -3
- torch_rechub/trainers/ctr_trainer.py +288 -128
- torch_rechub/trainers/match_trainer.py +336 -170
- torch_rechub/trainers/matching.md +3 -0
- torch_rechub/trainers/mtl_trainer.py +356 -207
- torch_rechub/trainers/seq_trainer.py +427 -0
- torch_rechub/utils/data.py +492 -360
- torch_rechub/utils/hstu_utils.py +198 -0
- torch_rechub/utils/match.py +457 -274
- torch_rechub/utils/model_utils.py +233 -0
- torch_rechub/utils/mtl.py +136 -126
- torch_rechub/utils/onnx_export.py +220 -0
- torch_rechub/utils/visualization.py +271 -0
- torch_rechub-0.0.5.dist-info/METADATA +402 -0
- torch_rechub-0.0.5.dist-info/RECORD +64 -0
- {torch_rechub-0.0.3.dist-info → torch_rechub-0.0.5.dist-info}/WHEEL +1 -2
- {torch_rechub-0.0.3.dist-info → torch_rechub-0.0.5.dist-info/licenses}/LICENSE +21 -21
- torch_rechub-0.0.3.dist-info/METADATA +0 -177
- torch_rechub-0.0.3.dist-info/RECORD +0 -55
- torch_rechub-0.0.3.dist-info/top_level.txt +0 -1
|
@@ -1,98 +1,98 @@
|
|
|
1
|
-
"""
|
|
2
|
-
Date: create on 23/05/2022
|
|
3
|
-
References:
|
|
4
|
-
paper: (RecSys'2019) Sampling-Bias-Corrected Neural Modeling for Large Corpus Item Recommendations
|
|
5
|
-
url: https://dl.acm.org/doi/10.1145/3298689.3346996
|
|
6
|
-
Authors: Mincai Lai, laimincai@shanghaitech.edu.cn
|
|
7
|
-
"""
|
|
8
|
-
|
|
9
|
-
import
|
|
10
|
-
import torch
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
self.
|
|
41
|
-
self.
|
|
42
|
-
self.
|
|
43
|
-
self.
|
|
44
|
-
self.
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
self.
|
|
48
|
-
self.
|
|
49
|
-
self.
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
#
|
|
54
|
-
self.
|
|
55
|
-
|
|
56
|
-
self.
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
item_embedding = self.item_mlp(input_item)
|
|
98
|
-
return item_embedding
|
|
1
|
+
"""
|
|
2
|
+
Date: create on 23/05/2022
|
|
3
|
+
References:
|
|
4
|
+
paper: (RecSys'2019) Sampling-Bias-Corrected Neural Modeling for Large Corpus Item Recommendations
|
|
5
|
+
url: https://dl.acm.org/doi/10.1145/3298689.3346996
|
|
6
|
+
Authors: Mincai Lai, laimincai@shanghaitech.edu.cn
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import numpy as np
|
|
10
|
+
import torch
|
|
11
|
+
import torch.nn.functional as F
|
|
12
|
+
|
|
13
|
+
from ...basic.layers import MLP, EmbeddingLayer
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class YoutubeSBC(torch.nn.Module):
|
|
17
|
+
"""Sampling-Bias-Corrected Neural Modeling for Matching by Youtube.
|
|
18
|
+
It's a DSSM match model trained by In-batch softmax loss on list-wise samples, and add sample debias module.
|
|
19
|
+
|
|
20
|
+
Args:
|
|
21
|
+
user_features (list[Feature Class]): training by the user tower module.
|
|
22
|
+
item_features (list[Feature Class]): training by the item tower module.
|
|
23
|
+
sample_weight_feature (list[Feature Class]): used for sampling bias corrected in training.
|
|
24
|
+
user_params (dict): the params of the User Tower module, keys include:`{"dims":list, "activation":str, "dropout":float, "output_layer":bool`}.
|
|
25
|
+
item_params (dict): the params of the Item Tower module, keys include:`{"dims":list, "activation":str, "dropout":float, "output_layer":bool`}.
|
|
26
|
+
batch_size (int): same as batch size of DataLoader, used in in-batch sampling
|
|
27
|
+
n_neg (int): the number of negative sample for every positive sample, default to 3. Note it's must smaller than batch_size.
|
|
28
|
+
temperature (float): temperature factor for similarity score, default to 1.0.
|
|
29
|
+
"""
|
|
30
|
+
|
|
31
|
+
def __init__(self, user_features, item_features, sample_weight_feature, user_params, item_params, batch_size, n_neg=3, temperature=1.0):
|
|
32
|
+
super().__init__()
|
|
33
|
+
self.user_features = user_features
|
|
34
|
+
self.item_features = item_features
|
|
35
|
+
self.sample_weight_feature = sample_weight_feature
|
|
36
|
+
self.n_neg = n_neg
|
|
37
|
+
self.temperature = temperature
|
|
38
|
+
self.user_dims = sum([fea.embed_dim for fea in user_features])
|
|
39
|
+
self.item_dims = sum([fea.embed_dim for fea in item_features])
|
|
40
|
+
self.batch_size = batch_size
|
|
41
|
+
self.embedding = EmbeddingLayer(user_features + item_features + sample_weight_feature)
|
|
42
|
+
self.user_mlp = MLP(self.user_dims, output_layer=False, **user_params)
|
|
43
|
+
self.item_mlp = MLP(self.item_dims, output_layer=False, **item_params)
|
|
44
|
+
self.mode = None
|
|
45
|
+
|
|
46
|
+
# in-batch sampling index
|
|
47
|
+
self.index0 = np.repeat(np.arange(batch_size), n_neg + 1)
|
|
48
|
+
self.index1 = np.concatenate([np.arange(i, i + n_neg + 1) for i in range(batch_size)])
|
|
49
|
+
self.index1[np.where(self.index1 >= batch_size)] -= batch_size
|
|
50
|
+
|
|
51
|
+
def forward(self, x):
|
|
52
|
+
user_embedding = self.user_tower(x) # (batch_size, embedding_dim)
|
|
53
|
+
item_embedding = self.item_tower(x) # (batch_size, embedding_dim)
|
|
54
|
+
if self.mode == "user":
|
|
55
|
+
return user_embedding
|
|
56
|
+
if self.mode == "item":
|
|
57
|
+
return item_embedding
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
# pred[i, j] means predicted score that user_i give to item_j
|
|
61
|
+
pred = torch.cosine_similarity(user_embedding.unsqueeze(1), item_embedding, dim=2) # (batch_size, batch_size)
|
|
62
|
+
|
|
63
|
+
# get sample weight of items in this batch
|
|
64
|
+
sample_weight = self.embedding(x, self.sample_weight_feature, squeeze_dim=True).squeeze(1) # (batch_size)
|
|
65
|
+
# Sampling Bias Corrected, using broadcast. (batch_size, batch_size)
|
|
66
|
+
scores = pred - torch.log(sample_weight)
|
|
67
|
+
|
|
68
|
+
if user_embedding.shape[0] * (self.n_neg + 1) != self.index0.shape[0]: # last batch
|
|
69
|
+
batch_size = user_embedding.shape[0]
|
|
70
|
+
index0 = self.index0[:batch_size * (self.n_neg + 1)]
|
|
71
|
+
index1 = self.index1[:batch_size * (self.n_neg + 1)]
|
|
72
|
+
index0[np.where(index0 >= batch_size)] -= batch_size
|
|
73
|
+
index1[np.where(index1 >= batch_size)] -= batch_size
|
|
74
|
+
scores = scores[index0, index1] # (batch_size, 1 + self.n_neg)
|
|
75
|
+
else:
|
|
76
|
+
# (batch_size, 1 + self.n_neg)
|
|
77
|
+
scores = scores[self.index0, self.index1]
|
|
78
|
+
|
|
79
|
+
scores = scores / self.temperature
|
|
80
|
+
return scores.view(-1, self.n_neg + 1) # (batch_size, 1 + self.n_neg)
|
|
81
|
+
|
|
82
|
+
def user_tower(self, x):
|
|
83
|
+
if self.mode == "item":
|
|
84
|
+
return None
|
|
85
|
+
# [batch_size, num_features*deep_dims]
|
|
86
|
+
input_user = self.embedding(x, self.user_features, squeeze_dim=True)
|
|
87
|
+
# [batch_size, user_params["dims"][-1]]
|
|
88
|
+
user_embedding = self.user_mlp(input_user)
|
|
89
|
+
return user_embedding
|
|
90
|
+
|
|
91
|
+
def item_tower(self, x):
|
|
92
|
+
if self.mode == "user":
|
|
93
|
+
return None
|
|
94
|
+
# [batch_size, num_features*embed_dim]
|
|
95
|
+
input_item = self.embedding(x, self.item_features, squeeze_dim=True)
|
|
96
|
+
# [batch_size, item_params["dims"][-1]]
|
|
97
|
+
item_embedding = self.item_mlp(input_item)
|
|
98
|
+
return item_embedding
|
|
@@ -1,5 +1,7 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
from .
|
|
4
|
-
from .
|
|
5
|
-
from .
|
|
1
|
+
__all__ = ['SharedBottom', 'ESMM', 'MMOE', 'PLE', 'AITM']
|
|
2
|
+
|
|
3
|
+
from .aitm import AITM
|
|
4
|
+
from .esmm import ESMM
|
|
5
|
+
from .mmoe import MMOE
|
|
6
|
+
from .ple import PLE
|
|
7
|
+
from .shared_bottom import SharedBottom
|
|
@@ -1,84 +1,83 @@
|
|
|
1
|
-
"""
|
|
2
|
-
Date: create on 14/05/2022
|
|
3
|
-
References:
|
|
4
|
-
paper: (KDD'2021) Modeling the Sequential Dependence among Audience Multi-step Conversions with Multi-task Learning in Targeted Display Advertising
|
|
5
|
-
url: https://arxiv.org/abs/2105.08489
|
|
6
|
-
code: https://github.com/adtalos/AITM-torch
|
|
7
|
-
Authors: Mincai Lai, laimincai@shanghaitech.edu.cn
|
|
8
|
-
"""
|
|
9
|
-
|
|
10
|
-
import torch
|
|
11
|
-
import torch.nn as nn
|
|
12
|
-
|
|
13
|
-
from ...basic.layers import MLP, EmbeddingLayer
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
class AITM(nn.Module):
|
|
17
|
-
""" Adaptive Information Transfer Multi-task (AITM) framework.
|
|
18
|
-
all the task type must be binary classificatioon.
|
|
19
|
-
|
|
20
|
-
Args:
|
|
21
|
-
features (list[Feature Class]): training by the whole module.
|
|
22
|
-
n_task (int): the number of binary classificatioon task.
|
|
23
|
-
bottom_params (dict): the params of all the botwer expert module, keys include:`{"dims":list, "activation":str, "dropout":float}`.
|
|
24
|
-
tower_params_list (list): the list of tower params dict, the keys same as expert_params.
|
|
25
|
-
"""
|
|
26
|
-
|
|
27
|
-
def __init__(self, features, n_task, bottom_params, tower_params_list):
|
|
28
|
-
super().__init__()
|
|
29
|
-
self.features = features
|
|
30
|
-
self.n_task = n_task
|
|
31
|
-
self.input_dims = sum([fea.embed_dim for fea in features])
|
|
32
|
-
self.embedding = EmbeddingLayer(features)
|
|
33
|
-
|
|
34
|
-
self.bottoms = nn.ModuleList(
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
self.
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
y
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
self.
|
|
72
|
-
self.
|
|
73
|
-
self.
|
|
74
|
-
self.
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
a =
|
|
82
|
-
|
|
83
|
-
outputs
|
|
84
|
-
return outputs
|
|
1
|
+
"""
|
|
2
|
+
Date: create on 14/05/2022
|
|
3
|
+
References:
|
|
4
|
+
paper: (KDD'2021) Modeling the Sequential Dependence among Audience Multi-step Conversions with Multi-task Learning in Targeted Display Advertising
|
|
5
|
+
url: https://arxiv.org/abs/2105.08489
|
|
6
|
+
code: https://github.com/adtalos/AITM-torch
|
|
7
|
+
Authors: Mincai Lai, laimincai@shanghaitech.edu.cn
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
import torch
|
|
11
|
+
import torch.nn as nn
|
|
12
|
+
|
|
13
|
+
from ...basic.layers import MLP, EmbeddingLayer
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class AITM(nn.Module):
|
|
17
|
+
""" Adaptive Information Transfer Multi-task (AITM) framework.
|
|
18
|
+
all the task type must be binary classificatioon.
|
|
19
|
+
|
|
20
|
+
Args:
|
|
21
|
+
features (list[Feature Class]): training by the whole module.
|
|
22
|
+
n_task (int): the number of binary classificatioon task.
|
|
23
|
+
bottom_params (dict): the params of all the botwer expert module, keys include:`{"dims":list, "activation":str, "dropout":float}`.
|
|
24
|
+
tower_params_list (list): the list of tower params dict, the keys same as expert_params.
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
def __init__(self, features, n_task, bottom_params, tower_params_list):
|
|
28
|
+
super().__init__()
|
|
29
|
+
self.features = features
|
|
30
|
+
self.n_task = n_task
|
|
31
|
+
self.input_dims = sum([fea.embed_dim for fea in features])
|
|
32
|
+
self.embedding = EmbeddingLayer(features)
|
|
33
|
+
|
|
34
|
+
self.bottoms = nn.ModuleList(MLP(self.input_dims, output_layer=False, **bottom_params) for i in range(self.n_task))
|
|
35
|
+
self.towers = nn.ModuleList(MLP(bottom_params["dims"][-1], **tower_params_list[i]) for i in range(self.n_task))
|
|
36
|
+
|
|
37
|
+
self.info_gates = nn.ModuleList(MLP(bottom_params["dims"][-1], output_layer=False, dims=[bottom_params["dims"][-1]]) for i in range(self.n_task - 1))
|
|
38
|
+
self.aits = nn.ModuleList(AttentionLayer(bottom_params["dims"][-1]) for _ in range(self.n_task - 1))
|
|
39
|
+
|
|
40
|
+
def forward(self, x):
|
|
41
|
+
embed_x = self.embedding(x, self.features, squeeze_dim=True) # [batch_size, *]
|
|
42
|
+
input_towers = [self.bottoms[i](embed_x) for i in range(self.n_task)] # [i]:[batch_size, bottom_dims[-1]]
|
|
43
|
+
for i in range(1, self.n_task): # for task 1:n-1
|
|
44
|
+
# [batch_size,1,bottom_dims[-1]]
|
|
45
|
+
info = self.info_gates[i - 1](input_towers[i - 1]).unsqueeze(1)
|
|
46
|
+
# [batch_size, 2, bottom_dims[-1]]
|
|
47
|
+
ait_input = torch.cat([input_towers[i].unsqueeze(1), info], dim=1)
|
|
48
|
+
input_towers[i] = self.aits[i - 1](ait_input)
|
|
49
|
+
|
|
50
|
+
ys = []
|
|
51
|
+
for input_tower, tower in zip(input_towers, self.towers):
|
|
52
|
+
y = tower(input_tower)
|
|
53
|
+
ys.append(torch.sigmoid(y))
|
|
54
|
+
return torch.cat(ys, dim=1)
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
class AttentionLayer(nn.Module):
|
|
58
|
+
"""attention for info tranfer
|
|
59
|
+
|
|
60
|
+
Args:
|
|
61
|
+
dim (int): attention dim
|
|
62
|
+
|
|
63
|
+
Shape:
|
|
64
|
+
Input: (batch_size, 2, dim)
|
|
65
|
+
Output: (batch_size, dim)
|
|
66
|
+
"""
|
|
67
|
+
|
|
68
|
+
def __init__(self, dim=32):
|
|
69
|
+
super().__init__()
|
|
70
|
+
self.dim = dim
|
|
71
|
+
self.q_layer = nn.Linear(dim, dim, bias=False)
|
|
72
|
+
self.k_layer = nn.Linear(dim, dim, bias=False)
|
|
73
|
+
self.v_layer = nn.Linear(dim, dim, bias=False)
|
|
74
|
+
self.softmax = nn.Softmax(dim=1)
|
|
75
|
+
|
|
76
|
+
def forward(self, x):
|
|
77
|
+
Q = self.q_layer(x)
|
|
78
|
+
K = self.k_layer(x)
|
|
79
|
+
V = self.v_layer(x)
|
|
80
|
+
a = torch.sum(torch.mul(Q, K), -1) / torch.sqrt(torch.tensor(self.dim))
|
|
81
|
+
a = self.softmax(a)
|
|
82
|
+
outputs = torch.sum(torch.mul(torch.unsqueeze(a, -1), V), dim=1)
|
|
83
|
+
return outputs
|
|
@@ -1,55 +1,56 @@
|
|
|
1
|
-
"""
|
|
2
|
-
Date: create on 04/05/2022
|
|
3
|
-
References:
|
|
4
|
-
paper: (SIGIR'2018) Entire Space Multi-Task Model: An Effective Approach for Estimating Post-Click Conversion Rate
|
|
5
|
-
url: https://arxiv.org/abs/1804.07931
|
|
6
|
-
Authors: Mincai Lai, laimincai@shanghaitech.edu.cn
|
|
7
|
-
"""
|
|
8
|
-
|
|
9
|
-
import torch
|
|
10
|
-
import torch.nn as nn
|
|
11
|
-
|
|
12
|
-
from ...basic.layers import MLP, EmbeddingLayer
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
class ESMM(nn.Module):
|
|
16
|
-
"""Entire Space Multi-Task Model
|
|
17
|
-
|
|
18
|
-
Args:
|
|
19
|
-
user_features (list): the list of `Feature Class`, training by shared bottom and tower module. It means the user features.
|
|
20
|
-
item_features (list): the list of `Feature Class`, training by shared bottom and tower module. It means the item features.
|
|
21
|
-
cvr_params (dict): the params of the CVR Tower module, keys include:`{"dims":list, "activation":str, "dropout":float`}
|
|
22
|
-
ctr_params (dict): the params of the CTR Tower module, keys include:`{"dims":list, "activation":str, "dropout":float`}
|
|
23
|
-
"""
|
|
24
|
-
|
|
25
|
-
def __init__(self, user_features, item_features, cvr_params, ctr_params):
|
|
26
|
-
super().__init__()
|
|
27
|
-
self.user_features = user_features
|
|
28
|
-
self.item_features = item_features
|
|
29
|
-
self.embedding = EmbeddingLayer(user_features + item_features)
|
|
30
|
-
self.tower_dims = len(user_features) * user_features[0].embed_dim + len(item_features) * item_features[0].embed_dim
|
|
31
|
-
self.tower_cvr = MLP(self.tower_dims, **cvr_params)
|
|
32
|
-
self.tower_ctr = MLP(self.tower_dims, **ctr_params)
|
|
33
|
-
|
|
34
|
-
def forward(self, x):
|
|
35
|
-
# # Field-wise Pooling Layer for user and item
|
|
36
|
-
# embed_user_features = self.embedding(x, self.user_features, squeeze_dim=False).sum(dim=1) #[batch_size, embed_dim]
|
|
37
|
-
# embed_item_features = self.embedding(x, self.item_features,
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
#
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
1
|
+
"""
|
|
2
|
+
Date: create on 04/05/2022
|
|
3
|
+
References:
|
|
4
|
+
paper: (SIGIR'2018) Entire Space Multi-Task Model: An Effective Approach for Estimating Post-Click Conversion Rate
|
|
5
|
+
url: https://arxiv.org/abs/1804.07931
|
|
6
|
+
Authors: Mincai Lai, laimincai@shanghaitech.edu.cn
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import torch
|
|
10
|
+
import torch.nn as nn
|
|
11
|
+
|
|
12
|
+
from ...basic.layers import MLP, EmbeddingLayer
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class ESMM(nn.Module):
|
|
16
|
+
"""Entire Space Multi-Task Model
|
|
17
|
+
|
|
18
|
+
Args:
|
|
19
|
+
user_features (list): the list of `Feature Class`, training by shared bottom and tower module. It means the user features.
|
|
20
|
+
item_features (list): the list of `Feature Class`, training by shared bottom and tower module. It means the item features.
|
|
21
|
+
cvr_params (dict): the params of the CVR Tower module, keys include:`{"dims":list, "activation":str, "dropout":float`}
|
|
22
|
+
ctr_params (dict): the params of the CTR Tower module, keys include:`{"dims":list, "activation":str, "dropout":float`}
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
def __init__(self, user_features, item_features, cvr_params, ctr_params):
|
|
26
|
+
super().__init__()
|
|
27
|
+
self.user_features = user_features
|
|
28
|
+
self.item_features = item_features
|
|
29
|
+
self.embedding = EmbeddingLayer(user_features + item_features)
|
|
30
|
+
self.tower_dims = len(user_features) * user_features[0].embed_dim + len(item_features) * item_features[0].embed_dim
|
|
31
|
+
self.tower_cvr = MLP(self.tower_dims, **cvr_params)
|
|
32
|
+
self.tower_ctr = MLP(self.tower_dims, **ctr_params)
|
|
33
|
+
|
|
34
|
+
def forward(self, x):
|
|
35
|
+
# # Field-wise Pooling Layer for user and item
|
|
36
|
+
# embed_user_features = self.embedding(x, self.user_features, squeeze_dim=False).sum(dim=1) #[batch_size, embed_dim]
|
|
37
|
+
# embed_item_features = self.embedding(x, self.item_features,
|
|
38
|
+
# squeeze_dim=False).sum(dim=1) #[batch_size, embed_dim]
|
|
39
|
+
|
|
40
|
+
# Here we concat all the features instead of field-wise pooling them
|
|
41
|
+
# [batch_size, num_features, embed_dim] --> [batch_size, num_features * embed_dim]
|
|
42
|
+
_batch_size = self.embedding(x, self.user_features, squeeze_dim=False).shape[0]
|
|
43
|
+
embed_user_features = self.embedding(x, self.user_features, squeeze_dim=False).reshape(_batch_size, -1)
|
|
44
|
+
embed_item_features = self.embedding(x, self.item_features, squeeze_dim=False).reshape(_batch_size, -1)
|
|
45
|
+
|
|
46
|
+
# print('embed_user_features', embed_user_features.shape)
|
|
47
|
+
|
|
48
|
+
input_tower = torch.cat((embed_user_features, embed_item_features), dim=1)
|
|
49
|
+
cvr_logit = self.tower_cvr(input_tower)
|
|
50
|
+
ctr_logit = self.tower_ctr(input_tower)
|
|
51
|
+
cvr_pred = torch.sigmoid(cvr_logit)
|
|
52
|
+
ctr_pred = torch.sigmoid(ctr_logit)
|
|
53
|
+
ctcvr_pred = torch.mul(ctr_pred, cvr_pred)
|
|
54
|
+
|
|
55
|
+
ys = [cvr_pred, ctr_pred, ctcvr_pred]
|
|
56
|
+
return torch.cat(ys, dim=1)
|
|
@@ -1,58 +1,58 @@
|
|
|
1
|
-
"""
|
|
2
|
-
Date: create on 04/05/2022
|
|
3
|
-
References:
|
|
4
|
-
paper: (KDD'2018) Modeling Task Relationships in Multi-task Learning with Multi-gate Mixture-of-Experts
|
|
5
|
-
url: https://dl.acm.org/doi/pdf/10.1145/3219819.3220007
|
|
6
|
-
Authors: Mincai Lai, laimincai@shanghaitech.edu.cn
|
|
7
|
-
"""
|
|
8
|
-
|
|
9
|
-
import torch
|
|
10
|
-
import torch.nn as nn
|
|
11
|
-
|
|
12
|
-
from ...basic.layers import MLP, EmbeddingLayer, PredictionLayer
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
class MMOE(nn.Module):
|
|
16
|
-
"""Multi-gate Mixture-of-Experts model.
|
|
17
|
-
|
|
18
|
-
Args:
|
|
19
|
-
features (list): the list of `Feature Class`, training by the expert and tower module.
|
|
20
|
-
task_types (list): types of tasks, only support `["classfication", "regression"]`.
|
|
21
|
-
n_expert (int): the number of expert net.
|
|
22
|
-
expert_params (dict): the params of all the expert module, keys include:`{"dims":list, "activation":str, "dropout":float}.
|
|
23
|
-
tower_params_list (list): the list of tower params dict, the keys same as expert_params.
|
|
24
|
-
"""
|
|
25
|
-
|
|
26
|
-
def __init__(self, features, task_types, n_expert, expert_params, tower_params_list):
|
|
27
|
-
super().__init__()
|
|
28
|
-
self.features = features
|
|
29
|
-
self.task_types = task_types
|
|
30
|
-
self.n_task = len(task_types)
|
|
31
|
-
self.n_expert = n_expert
|
|
32
|
-
self.embedding = EmbeddingLayer(features)
|
|
33
|
-
self.input_dims = sum([fea.embed_dim for fea in features])
|
|
34
|
-
self.experts = nn.ModuleList(
|
|
35
|
-
|
|
36
|
-
self.
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
expert_pooling = torch.sum(expert_weight, dim=1)
|
|
55
|
-
tower_out = tower(expert_pooling) #[batch_size, 1]
|
|
56
|
-
y = predict_layer(tower_out) #logit -> proba
|
|
57
|
-
ys.append(y)
|
|
58
|
-
return torch.cat(ys, dim=1)
|
|
1
|
+
"""
|
|
2
|
+
Date: create on 04/05/2022
|
|
3
|
+
References:
|
|
4
|
+
paper: (KDD'2018) Modeling Task Relationships in Multi-task Learning with Multi-gate Mixture-of-Experts
|
|
5
|
+
url: https://dl.acm.org/doi/pdf/10.1145/3219819.3220007
|
|
6
|
+
Authors: Mincai Lai, laimincai@shanghaitech.edu.cn
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import torch
|
|
10
|
+
import torch.nn as nn
|
|
11
|
+
|
|
12
|
+
from ...basic.layers import MLP, EmbeddingLayer, PredictionLayer
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class MMOE(nn.Module):
|
|
16
|
+
"""Multi-gate Mixture-of-Experts model.
|
|
17
|
+
|
|
18
|
+
Args:
|
|
19
|
+
features (list): the list of `Feature Class`, training by the expert and tower module.
|
|
20
|
+
task_types (list): types of tasks, only support `["classfication", "regression"]`.
|
|
21
|
+
n_expert (int): the number of expert net.
|
|
22
|
+
expert_params (dict): the params of all the expert module, keys include:`{"dims":list, "activation":str, "dropout":float}.
|
|
23
|
+
tower_params_list (list): the list of tower params dict, the keys same as expert_params.
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
def __init__(self, features, task_types, n_expert, expert_params, tower_params_list):
|
|
27
|
+
super().__init__()
|
|
28
|
+
self.features = features
|
|
29
|
+
self.task_types = task_types
|
|
30
|
+
self.n_task = len(task_types)
|
|
31
|
+
self.n_expert = n_expert
|
|
32
|
+
self.embedding = EmbeddingLayer(features)
|
|
33
|
+
self.input_dims = sum([fea.embed_dim for fea in features])
|
|
34
|
+
self.experts = nn.ModuleList(MLP(self.input_dims, output_layer=False, **expert_params) for i in range(self.n_expert))
|
|
35
|
+
self.gates = nn.ModuleList(MLP(self.input_dims, output_layer=False, **{"dims": [self.n_expert], "activation": "softmax"}) for i in range(self.n_task)) # n_gate = n_task
|
|
36
|
+
self.towers = nn.ModuleList(MLP(expert_params["dims"][-1], **tower_params_list[i]) for i in range(self.n_task))
|
|
37
|
+
self.predict_layers = nn.ModuleList(PredictionLayer(task_type) for task_type in task_types)
|
|
38
|
+
|
|
39
|
+
def forward(self, x):
|
|
40
|
+
# [batch_size, input_dims]
|
|
41
|
+
embed_x = self.embedding(x, self.features, squeeze_dim=True)
|
|
42
|
+
# expert_out[i]: [batch_size, 1, expert_dims[-1]]
|
|
43
|
+
expert_outs = [expert(embed_x).unsqueeze(1) for expert in self.experts]
|
|
44
|
+
# [batch_size, n_expert, expert_dims[-1]]
|
|
45
|
+
expert_outs = torch.cat(expert_outs, dim=1)
|
|
46
|
+
# gate_out[i]: [batch_size, n_expert, 1]
|
|
47
|
+
gate_outs = [gate(embed_x).unsqueeze(-1) for gate in self.gates]
|
|
48
|
+
|
|
49
|
+
ys = []
|
|
50
|
+
for gate_out, tower, predict_layer in zip(gate_outs, self.towers, self.predict_layers):
|
|
51
|
+
# [batch_size, n_expert, expert_dims[-1]]
|
|
52
|
+
expert_weight = torch.mul(gate_out, expert_outs)
|
|
53
|
+
# [batch_size, expert_dims[-1]]
|
|
54
|
+
expert_pooling = torch.sum(expert_weight, dim=1)
|
|
55
|
+
tower_out = tower(expert_pooling) # [batch_size, 1]
|
|
56
|
+
y = predict_layer(tower_out) # logit -> proba
|
|
57
|
+
ys.append(y)
|
|
58
|
+
return torch.cat(ys, dim=1)
|