torch-rechub 0.0.1__py3-none-any.whl → 0.0.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- torch_rechub/__init__.py +14 -0
- torch_rechub/basic/activation.py +3 -1
- torch_rechub/basic/callback.py +2 -2
- torch_rechub/basic/features.py +38 -8
- torch_rechub/basic/initializers.py +92 -0
- torch_rechub/basic/layers.py +800 -46
- torch_rechub/basic/loss_func.py +223 -0
- torch_rechub/basic/metaoptimizer.py +76 -0
- torch_rechub/basic/metric.py +251 -0
- torch_rechub/models/generative/__init__.py +6 -0
- torch_rechub/models/generative/hllm.py +249 -0
- torch_rechub/models/generative/hstu.py +189 -0
- torch_rechub/models/matching/__init__.py +13 -0
- torch_rechub/models/matching/comirec.py +193 -0
- torch_rechub/models/matching/dssm.py +72 -0
- torch_rechub/models/matching/dssm_facebook.py +77 -0
- torch_rechub/models/matching/dssm_senet.py +87 -0
- torch_rechub/models/matching/gru4rec.py +85 -0
- torch_rechub/models/matching/mind.py +103 -0
- torch_rechub/models/matching/narm.py +82 -0
- torch_rechub/models/matching/sasrec.py +143 -0
- torch_rechub/models/matching/sine.py +148 -0
- torch_rechub/models/matching/stamp.py +81 -0
- torch_rechub/models/matching/youtube_dnn.py +75 -0
- torch_rechub/models/matching/youtube_sbc.py +98 -0
- torch_rechub/models/multi_task/__init__.py +5 -2
- torch_rechub/models/multi_task/aitm.py +83 -0
- torch_rechub/models/multi_task/esmm.py +19 -8
- torch_rechub/models/multi_task/mmoe.py +18 -12
- torch_rechub/models/multi_task/ple.py +41 -29
- torch_rechub/models/multi_task/shared_bottom.py +3 -2
- torch_rechub/models/ranking/__init__.py +13 -2
- torch_rechub/models/ranking/afm.py +65 -0
- torch_rechub/models/ranking/autoint.py +102 -0
- torch_rechub/models/ranking/bst.py +61 -0
- torch_rechub/models/ranking/dcn.py +38 -0
- torch_rechub/models/ranking/dcn_v2.py +59 -0
- torch_rechub/models/ranking/deepffm.py +131 -0
- torch_rechub/models/ranking/deepfm.py +8 -7
- torch_rechub/models/ranking/dien.py +191 -0
- torch_rechub/models/ranking/din.py +31 -19
- torch_rechub/models/ranking/edcn.py +101 -0
- torch_rechub/models/ranking/fibinet.py +42 -0
- torch_rechub/models/ranking/widedeep.py +6 -6
- torch_rechub/trainers/__init__.py +4 -2
- torch_rechub/trainers/ctr_trainer.py +191 -0
- torch_rechub/trainers/match_trainer.py +239 -0
- torch_rechub/trainers/matching.md +3 -0
- torch_rechub/trainers/mtl_trainer.py +137 -23
- torch_rechub/trainers/seq_trainer.py +293 -0
- torch_rechub/utils/__init__.py +0 -0
- torch_rechub/utils/data.py +492 -0
- torch_rechub/utils/hstu_utils.py +198 -0
- torch_rechub/utils/match.py +457 -0
- torch_rechub/utils/mtl.py +136 -0
- torch_rechub/utils/onnx_export.py +353 -0
- torch_rechub-0.0.4.dist-info/METADATA +391 -0
- torch_rechub-0.0.4.dist-info/RECORD +62 -0
- {torch_rechub-0.0.1.dist-info → torch_rechub-0.0.4.dist-info}/WHEEL +1 -2
- {torch_rechub-0.0.1.dist-info → torch_rechub-0.0.4.dist-info/licenses}/LICENSE +1 -1
- torch_rechub/basic/utils.py +0 -168
- torch_rechub/trainers/trainer.py +0 -111
- torch_rechub-0.0.1.dist-info/METADATA +0 -105
- torch_rechub-0.0.1.dist-info/RECORD +0 -26
- torch_rechub-0.0.1.dist-info/top_level.txt +0 -1
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Date: created on 03/07/2022
|
|
3
|
+
References:
|
|
4
|
+
paper: Sparse-Interest Network for Sequential Recommendation
|
|
5
|
+
url: https://arxiv.org/abs/2102.09267
|
|
6
|
+
code: https://github.com/Qiaoyut/SINE/blob/master/model.py
|
|
7
|
+
Authors: Bo Kang, klinux@live.com
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
import torch
|
|
11
|
+
import torch.nn.functional as F
|
|
12
|
+
from torch import einsum
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class SINE(torch.nn.Module):
|
|
16
|
+
"""The match model was proposed in `Sparse-Interest Network for Sequential Recommendation` paper.
|
|
17
|
+
|
|
18
|
+
Args:
|
|
19
|
+
history_features (list[str]): training history feature names, this is for indexing the historical sequences from input dictionary
|
|
20
|
+
item_features (list[str]): item feature names, this is for indexing the items from input dictionary
|
|
21
|
+
neg_item_features (list[str]): neg item feature names, this for indexing negative items from input dictionary
|
|
22
|
+
num_items (int): number of items in the data
|
|
23
|
+
embedding_dim (int): dimensionality of the embeddings
|
|
24
|
+
hidden_dim (int): dimensionality of the hidden layer in self attention modules
|
|
25
|
+
num_concept (int): number of concept, also called conceptual prototypes
|
|
26
|
+
num_intention (int): number of (user) specific intentions out of the concepts
|
|
27
|
+
seq_max_len (int): max sequence length of input item sequence
|
|
28
|
+
num_heads (int): number of attention heads in self attention modules, default to 1
|
|
29
|
+
temperature (float): temperature factor in the similarity measure, default to 1.0
|
|
30
|
+
"""
|
|
31
|
+
|
|
32
|
+
def __init__(self, history_features, item_features, neg_item_features, num_items, embedding_dim, hidden_dim, num_concept, num_intention, seq_max_len, num_heads=1, temperature=1.0):
|
|
33
|
+
super().__init__()
|
|
34
|
+
self.item_features = item_features
|
|
35
|
+
self.history_features = history_features
|
|
36
|
+
self.neg_item_features = neg_item_features
|
|
37
|
+
self.temperature = temperature
|
|
38
|
+
self.num_concept = num_concept
|
|
39
|
+
self.num_intention = num_intention
|
|
40
|
+
self.seq_max_len = seq_max_len
|
|
41
|
+
|
|
42
|
+
std = 1e-4
|
|
43
|
+
self.item_embedding = torch.nn.Embedding(num_items, embedding_dim)
|
|
44
|
+
torch.nn.init.normal_(self.item_embedding.weight, 0, std)
|
|
45
|
+
self.concept_embedding = torch.nn.Embedding(num_concept, embedding_dim)
|
|
46
|
+
torch.nn.init.normal_(self.concept_embedding.weight, 0, std)
|
|
47
|
+
self.position_embedding = torch.nn.Embedding(seq_max_len, embedding_dim)
|
|
48
|
+
torch.nn.init.normal_(self.position_embedding.weight, 0, std)
|
|
49
|
+
|
|
50
|
+
self.w_1 = torch.nn.Parameter(torch.rand(embedding_dim, hidden_dim), requires_grad=True)
|
|
51
|
+
self.w_2 = torch.nn.Parameter(torch.rand(hidden_dim, num_heads), requires_grad=True)
|
|
52
|
+
|
|
53
|
+
self.w_3 = torch.nn.Parameter(torch.rand(embedding_dim, embedding_dim), requires_grad=True)
|
|
54
|
+
|
|
55
|
+
self.w_k1 = torch.nn.Parameter(torch.rand(embedding_dim, hidden_dim), requires_grad=True)
|
|
56
|
+
self.w_k2 = torch.nn.Parameter(torch.rand(hidden_dim, num_intention), requires_grad=True)
|
|
57
|
+
|
|
58
|
+
self.w_4 = torch.nn.Parameter(torch.rand(embedding_dim, hidden_dim), requires_grad=True)
|
|
59
|
+
self.w_5 = torch.nn.Parameter(torch.rand(hidden_dim, num_heads), requires_grad=True)
|
|
60
|
+
|
|
61
|
+
self.mode = None
|
|
62
|
+
|
|
63
|
+
def forward(self, x):
|
|
64
|
+
user_embedding = self.user_tower(x)
|
|
65
|
+
item_embedding = self.item_tower(x)
|
|
66
|
+
if self.mode == "user":
|
|
67
|
+
return user_embedding
|
|
68
|
+
if self.mode == "item":
|
|
69
|
+
return item_embedding
|
|
70
|
+
|
|
71
|
+
y = torch.mul(user_embedding, item_embedding).sum(dim=-1)
|
|
72
|
+
|
|
73
|
+
# # compute covariance regularizer
|
|
74
|
+
# M = torch.cov(self.concept_embedding.weight, correction=0)
|
|
75
|
+
# l_c = (torch.norm(M, p='fro')**2 - torch.norm(torch.diag(M), p='fro')**2)/2
|
|
76
|
+
|
|
77
|
+
return y
|
|
78
|
+
|
|
79
|
+
def user_tower(self, x):
|
|
80
|
+
if self.mode == "item":
|
|
81
|
+
return None
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
# sparse interests extraction
|
|
85
|
+
# # user specific historical item embedding X^u
|
|
86
|
+
hist_item = x[self.history_features[0]]
|
|
87
|
+
x_u = self.item_embedding(hist_item) + \
|
|
88
|
+
self.position_embedding.weight.unsqueeze(0)
|
|
89
|
+
x_u_mask = (x[self.history_features[0]] > 0).long()
|
|
90
|
+
|
|
91
|
+
# # user specific conceptual prototypes C^u
|
|
92
|
+
# ## attention a
|
|
93
|
+
h_1 = einsum('bse, ed -> bsd', x_u, self.w_1).tanh()
|
|
94
|
+
a_hist = F.softmax(einsum('bsd, dh -> bsh', h_1, self.w_2) + -1.e9 * (1 - x_u_mask.unsqueeze(-1).float()), dim=1)
|
|
95
|
+
|
|
96
|
+
# ## virtual concept vector z_u
|
|
97
|
+
z_u = einsum("bse, bsh -> be", x_u, a_hist)
|
|
98
|
+
|
|
99
|
+
# ## similarity between user's concept vector and entire conceptual prototypes s^u
|
|
100
|
+
s_u = einsum("be, te -> bt", z_u, self.concept_embedding.weight)
|
|
101
|
+
s_u_top_k = torch.topk(s_u, self.num_intention)
|
|
102
|
+
|
|
103
|
+
# ## final C^u
|
|
104
|
+
c_u = einsum("bk, bke -> bke", torch.sigmoid(s_u_top_k.values), self.concept_embedding(s_u_top_k.indices))
|
|
105
|
+
|
|
106
|
+
# # user intention assignment P_{k|t}
|
|
107
|
+
p_u = F.softmax(einsum("bse, bke -> bks", F.normalize(x_u @ self.w_3, dim=-1), F.normalize(c_u, p=2, dim=-1)), dim=1)
|
|
108
|
+
|
|
109
|
+
# # attention weighing P_{t|k}
|
|
110
|
+
h_2 = einsum('bse, ed -> bsd', x_u, self.w_k1).tanh()
|
|
111
|
+
a_concept_k = F.softmax(einsum('bsd, dk -> bsk', h_2, self.w_k2) + -1.e9 * (1 - x_u_mask.unsqueeze(-1).float()), dim=1)
|
|
112
|
+
|
|
113
|
+
# # multiple interests encoding \phi_\theta^k(x^u)
|
|
114
|
+
phi_u = einsum("bks, bse -> bke", p_u * a_concept_k.permute(0, 2, 1), x_u)
|
|
115
|
+
|
|
116
|
+
# adaptive interest aggregation
|
|
117
|
+
# # intention aware input behavior \hat{X^u}
|
|
118
|
+
x_u_hat = einsum('bks, bke -> bse', p_u, c_u)
|
|
119
|
+
|
|
120
|
+
# # user's next intention C^u_{apt}
|
|
121
|
+
h_3 = einsum('bse, ed -> bsd', x_u_hat, self.w_4).tanh()
|
|
122
|
+
c_u_apt = F.normalize(einsum("bs, bse -> be", F.softmax(einsum('bsd, dh -> bsh', h_3, self.w_5).reshape(-1, self.seq_max_len) + -1.e9 * (1 - x_u_mask.float()), dim=1), x_u_hat), -1)
|
|
123
|
+
|
|
124
|
+
# # aggregation weights e_k^u
|
|
125
|
+
e_u = F.softmax(einsum('be, bke -> bk', c_u_apt, phi_u) / self.temperature, dim=1)
|
|
126
|
+
|
|
127
|
+
# final user representation v^u
|
|
128
|
+
v_u = einsum('bk, bke -> be', e_u, phi_u)
|
|
129
|
+
|
|
130
|
+
if self.mode == "user":
|
|
131
|
+
return v_u
|
|
132
|
+
return v_u.unsqueeze(1)
|
|
133
|
+
|
|
134
|
+
def item_tower(self, x):
|
|
135
|
+
if self.mode == "user":
|
|
136
|
+
return None
|
|
137
|
+
pos_embedding = self.item_embedding(x[self.item_features[0]]).unsqueeze(1)
|
|
138
|
+
if self.mode == "item": # inference embedding mode
|
|
139
|
+
return pos_embedding.squeeze(1) # [batch_size, embed_dim]
|
|
140
|
+
neg_embeddings = self.item_embedding(x[self.neg_item_features[0]]).squeeze(1) # [batch_size, n_neg_items, embed_dim]
|
|
141
|
+
|
|
142
|
+
# [batch_size, 1+n_neg_items, embed_dim]
|
|
143
|
+
return torch.cat((pos_embedding, neg_embeddings), dim=1)
|
|
144
|
+
|
|
145
|
+
def gen_mask(self, x):
|
|
146
|
+
his_list = x[self.history_features[0].name]
|
|
147
|
+
mask = (his_list > 0).long()
|
|
148
|
+
return mask
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Date: created on 17/09/2022
|
|
3
|
+
References:
|
|
4
|
+
paper: STAMP: Short-Term Attention/Memory Priority Model for Session-based Recommendation
|
|
5
|
+
url: https://dl.acm.org/doi/10.1145/3219819.3219950
|
|
6
|
+
official Tensorflow implementation: https://github.com/uestcnlp/STAMP
|
|
7
|
+
Authors: Bo Kang, klinux@live.com
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
import torch
|
|
11
|
+
import torch.nn as nn
|
|
12
|
+
import torch.nn.functional as F
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class STAMP(nn.Module):
|
|
16
|
+
|
|
17
|
+
def __init__(self, item_history_feature, weight_std, emb_std):
|
|
18
|
+
super(STAMP, self).__init__()
|
|
19
|
+
|
|
20
|
+
# item embedding layer
|
|
21
|
+
self.item_history_feature = item_history_feature
|
|
22
|
+
n_items, item_emb_dim, = item_history_feature.vocab_size, item_history_feature.embed_dim
|
|
23
|
+
self.item_emb = nn.Embedding(n_items, item_emb_dim, padding_idx=0)
|
|
24
|
+
|
|
25
|
+
# weights and biases for attention computation
|
|
26
|
+
self.w_0 = nn.Parameter(torch.zeros(item_emb_dim, 1))
|
|
27
|
+
self.w_1_t = nn.Parameter(torch.zeros(item_emb_dim, item_emb_dim))
|
|
28
|
+
self.w_2_t = nn.Parameter(torch.zeros(item_emb_dim, item_emb_dim))
|
|
29
|
+
self.w_3_t = nn.Parameter(torch.zeros(item_emb_dim, item_emb_dim))
|
|
30
|
+
self.b_a = nn.Parameter(torch.zeros(item_emb_dim))
|
|
31
|
+
self._init_parameter_weights(weight_std)
|
|
32
|
+
|
|
33
|
+
# mlp layers
|
|
34
|
+
self.f_s = nn.Sequential(nn.Tanh(), nn.Linear(item_emb_dim, item_emb_dim))
|
|
35
|
+
self.f_t = nn.Sequential(nn.Tanh(), nn.Linear(item_emb_dim, item_emb_dim))
|
|
36
|
+
self.emb_std = emb_std
|
|
37
|
+
self.apply(self._init_module_weights)
|
|
38
|
+
|
|
39
|
+
def _init_parameter_weights(self, weight_std):
|
|
40
|
+
nn.init.normal_(self.w_0, std=weight_std)
|
|
41
|
+
nn.init.normal_(self.w_1_t, std=weight_std)
|
|
42
|
+
nn.init.normal_(self.w_2_t, std=weight_std)
|
|
43
|
+
nn.init.normal_(self.w_3_t, std=weight_std)
|
|
44
|
+
|
|
45
|
+
def _init_module_weights(self, module):
|
|
46
|
+
if isinstance(module, nn.Linear):
|
|
47
|
+
module.weight.data.normal_(std=self.emb_std)
|
|
48
|
+
if module.bias is not None:
|
|
49
|
+
module.bias.data.zero_()
|
|
50
|
+
elif isinstance(module, nn.Embedding):
|
|
51
|
+
module.weight.data.normal_(std=self.emb_std)
|
|
52
|
+
|
|
53
|
+
def forward(self, input_dict):
|
|
54
|
+
# Index the embeddings for the items in the session
|
|
55
|
+
input = input_dict[self.item_history_feature.name]
|
|
56
|
+
value_mask = (input != 0).unsqueeze(-1)
|
|
57
|
+
value_counts = value_mask.sum(dim=1, keepdim=True).squeeze(-1)
|
|
58
|
+
item_emb_batch = self.item_emb(input) * value_mask
|
|
59
|
+
|
|
60
|
+
# Index the embeddings of the latest clicked items
|
|
61
|
+
x_t = self.item_emb(torch.gather(input, 1, value_counts - 1))
|
|
62
|
+
|
|
63
|
+
# Eq. 2, user's general interest in the current session
|
|
64
|
+
m_s = ((item_emb_batch).sum(1) / value_counts).unsqueeze(1)
|
|
65
|
+
|
|
66
|
+
# Eq. 7, compute attention coefficient
|
|
67
|
+
a = F.normalize(torch.exp(torch.sigmoid(item_emb_batch @ self.w_1_t + x_t @ self.w_2_t + m_s @ self.w_3_t + self.b_a) @ self.w_0) * value_mask, p=1, dim=1)
|
|
68
|
+
|
|
69
|
+
# Eq. 8, compute user's attention-based interests
|
|
70
|
+
m_a = (a * item_emb_batch).sum(1) + m_s.squeeze(1)
|
|
71
|
+
|
|
72
|
+
# Eq. 3, compute the output state of the general interest
|
|
73
|
+
h_s = self.f_s(m_a)
|
|
74
|
+
|
|
75
|
+
# Eq. 9, compute the output state of the short-term interest
|
|
76
|
+
h_t = self.f_t(x_t).squeeze(1)
|
|
77
|
+
|
|
78
|
+
# Eq. 4, compute candidate scores
|
|
79
|
+
z = h_s * h_t @ self.item_emb.weight.T
|
|
80
|
+
|
|
81
|
+
return z
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Date: create on 23/05/2022
|
|
3
|
+
References:
|
|
4
|
+
paper: (RecSys'2016) Deep Neural Networks for YouTube Recommendations
|
|
5
|
+
url: https://dl.acm.org/doi/10.1145/2959100.2959190
|
|
6
|
+
Authors: Mincai Lai, laimincai@shanghaitech.edu.cn
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import torch
|
|
10
|
+
import torch.nn.functional as F
|
|
11
|
+
|
|
12
|
+
from ...basic.layers import MLP, EmbeddingLayer
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class YoutubeDNN(torch.nn.Module):
|
|
16
|
+
"""The match model mentioned in `Deep Neural Networks for YouTube Recommendations` paper.
|
|
17
|
+
It's a DSSM match model trained by global softmax loss on list-wise samples.
|
|
18
|
+
Note in origin paper, it's without item dnn tower and train item embedding directly.
|
|
19
|
+
|
|
20
|
+
Args:
|
|
21
|
+
user_features (list[Feature Class]): training by the user tower module.
|
|
22
|
+
item_features (list[Feature Class]): training by the embedding table, it's the item id feature.
|
|
23
|
+
neg_item_feature (list[Feature Class]): training by the embedding table, it's the negative items id feature.
|
|
24
|
+
user_params (dict): the params of the User Tower module, keys include:`{"dims":list, "activation":str, "dropout":float, "output_layer":bool`}.
|
|
25
|
+
temperature (float): temperature factor for similarity score, default to 1.0.
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
def __init__(self, user_features, item_features, neg_item_feature, user_params, temperature=1.0):
|
|
29
|
+
super().__init__()
|
|
30
|
+
self.user_features = user_features
|
|
31
|
+
self.item_features = item_features
|
|
32
|
+
self.neg_item_feature = neg_item_feature
|
|
33
|
+
self.temperature = temperature
|
|
34
|
+
self.user_dims = sum([fea.embed_dim for fea in user_features])
|
|
35
|
+
self.embedding = EmbeddingLayer(user_features + item_features)
|
|
36
|
+
self.user_mlp = MLP(self.user_dims, output_layer=False, **user_params)
|
|
37
|
+
self.mode = None
|
|
38
|
+
|
|
39
|
+
def forward(self, x):
|
|
40
|
+
user_embedding = self.user_tower(x)
|
|
41
|
+
item_embedding = self.item_tower(x)
|
|
42
|
+
if self.mode == "user":
|
|
43
|
+
return user_embedding
|
|
44
|
+
if self.mode == "item":
|
|
45
|
+
return item_embedding
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
# calculate cosine score
|
|
49
|
+
y = torch.mul(user_embedding, item_embedding).sum(dim=2)
|
|
50
|
+
y = y / self.temperature
|
|
51
|
+
return y
|
|
52
|
+
|
|
53
|
+
def user_tower(self, x):
|
|
54
|
+
if self.mode == "item":
|
|
55
|
+
return None
|
|
56
|
+
# [batch_size, num_features*deep_dims]
|
|
57
|
+
input_user = self.embedding(x, self.user_features, squeeze_dim=True)
|
|
58
|
+
user_embedding = self.user_mlp(input_user).unsqueeze(1) # [batch_size, 1, embed_dim]
|
|
59
|
+
user_embedding = F.normalize(user_embedding, p=2, dim=2)
|
|
60
|
+
if self.mode == "user":
|
|
61
|
+
# inference embedding mode -> [batch_size, embed_dim]
|
|
62
|
+
return user_embedding.squeeze(1)
|
|
63
|
+
return user_embedding
|
|
64
|
+
|
|
65
|
+
def item_tower(self, x):
|
|
66
|
+
if self.mode == "user":
|
|
67
|
+
return None
|
|
68
|
+
pos_embedding = self.embedding(x, self.item_features, squeeze_dim=False) # [batch_size, 1, embed_dim]
|
|
69
|
+
pos_embedding = F.normalize(pos_embedding, p=2, dim=2)
|
|
70
|
+
if self.mode == "item": # inference embedding mode
|
|
71
|
+
return pos_embedding.squeeze(1) # [batch_size, embed_dim]
|
|
72
|
+
neg_embeddings = self.embedding(x, self.neg_item_feature, squeeze_dim=False).squeeze(1) # [batch_size, n_neg_items, embed_dim]
|
|
73
|
+
neg_embeddings = F.normalize(neg_embeddings, p=2, dim=2)
|
|
74
|
+
# [batch_size, 1+n_neg_items, embed_dim]
|
|
75
|
+
return torch.cat((pos_embedding, neg_embeddings), dim=1)
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Date: create on 23/05/2022
|
|
3
|
+
References:
|
|
4
|
+
paper: (RecSys'2019) Sampling-Bias-Corrected Neural Modeling for Large Corpus Item Recommendations
|
|
5
|
+
url: https://dl.acm.org/doi/10.1145/3298689.3346996
|
|
6
|
+
Authors: Mincai Lai, laimincai@shanghaitech.edu.cn
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import numpy as np
|
|
10
|
+
import torch
|
|
11
|
+
import torch.nn.functional as F
|
|
12
|
+
|
|
13
|
+
from ...basic.layers import MLP, EmbeddingLayer
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class YoutubeSBC(torch.nn.Module):
|
|
17
|
+
"""Sampling-Bias-Corrected Neural Modeling for Matching by Youtube.
|
|
18
|
+
It's a DSSM match model trained by In-batch softmax loss on list-wise samples, and add sample debias module.
|
|
19
|
+
|
|
20
|
+
Args:
|
|
21
|
+
user_features (list[Feature Class]): training by the user tower module.
|
|
22
|
+
item_features (list[Feature Class]): training by the item tower module.
|
|
23
|
+
sample_weight_feature (list[Feature Class]): used for sampling bias corrected in training.
|
|
24
|
+
user_params (dict): the params of the User Tower module, keys include:`{"dims":list, "activation":str, "dropout":float, "output_layer":bool`}.
|
|
25
|
+
item_params (dict): the params of the Item Tower module, keys include:`{"dims":list, "activation":str, "dropout":float, "output_layer":bool`}.
|
|
26
|
+
batch_size (int): same as batch size of DataLoader, used in in-batch sampling
|
|
27
|
+
n_neg (int): the number of negative sample for every positive sample, default to 3. Note it's must smaller than batch_size.
|
|
28
|
+
temperature (float): temperature factor for similarity score, default to 1.0.
|
|
29
|
+
"""
|
|
30
|
+
|
|
31
|
+
def __init__(self, user_features, item_features, sample_weight_feature, user_params, item_params, batch_size, n_neg=3, temperature=1.0):
|
|
32
|
+
super().__init__()
|
|
33
|
+
self.user_features = user_features
|
|
34
|
+
self.item_features = item_features
|
|
35
|
+
self.sample_weight_feature = sample_weight_feature
|
|
36
|
+
self.n_neg = n_neg
|
|
37
|
+
self.temperature = temperature
|
|
38
|
+
self.user_dims = sum([fea.embed_dim for fea in user_features])
|
|
39
|
+
self.item_dims = sum([fea.embed_dim for fea in item_features])
|
|
40
|
+
self.batch_size = batch_size
|
|
41
|
+
self.embedding = EmbeddingLayer(user_features + item_features + sample_weight_feature)
|
|
42
|
+
self.user_mlp = MLP(self.user_dims, output_layer=False, **user_params)
|
|
43
|
+
self.item_mlp = MLP(self.item_dims, output_layer=False, **item_params)
|
|
44
|
+
self.mode = None
|
|
45
|
+
|
|
46
|
+
# in-batch sampling index
|
|
47
|
+
self.index0 = np.repeat(np.arange(batch_size), n_neg + 1)
|
|
48
|
+
self.index1 = np.concatenate([np.arange(i, i + n_neg + 1) for i in range(batch_size)])
|
|
49
|
+
self.index1[np.where(self.index1 >= batch_size)] -= batch_size
|
|
50
|
+
|
|
51
|
+
def forward(self, x):
|
|
52
|
+
user_embedding = self.user_tower(x) # (batch_size, embedding_dim)
|
|
53
|
+
item_embedding = self.item_tower(x) # (batch_size, embedding_dim)
|
|
54
|
+
if self.mode == "user":
|
|
55
|
+
return user_embedding
|
|
56
|
+
if self.mode == "item":
|
|
57
|
+
return item_embedding
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
# pred[i, j] means predicted score that user_i give to item_j
|
|
61
|
+
pred = torch.cosine_similarity(user_embedding.unsqueeze(1), item_embedding, dim=2) # (batch_size, batch_size)
|
|
62
|
+
|
|
63
|
+
# get sample weight of items in this batch
|
|
64
|
+
sample_weight = self.embedding(x, self.sample_weight_feature, squeeze_dim=True).squeeze(1) # (batch_size)
|
|
65
|
+
# Sampling Bias Corrected, using broadcast. (batch_size, batch_size)
|
|
66
|
+
scores = pred - torch.log(sample_weight)
|
|
67
|
+
|
|
68
|
+
if user_embedding.shape[0] * (self.n_neg + 1) != self.index0.shape[0]: # last batch
|
|
69
|
+
batch_size = user_embedding.shape[0]
|
|
70
|
+
index0 = self.index0[:batch_size * (self.n_neg + 1)]
|
|
71
|
+
index1 = self.index1[:batch_size * (self.n_neg + 1)]
|
|
72
|
+
index0[np.where(index0 >= batch_size)] -= batch_size
|
|
73
|
+
index1[np.where(index1 >= batch_size)] -= batch_size
|
|
74
|
+
scores = scores[index0, index1] # (batch_size, 1 + self.n_neg)
|
|
75
|
+
else:
|
|
76
|
+
# (batch_size, 1 + self.n_neg)
|
|
77
|
+
scores = scores[self.index0, self.index1]
|
|
78
|
+
|
|
79
|
+
scores = scores / self.temperature
|
|
80
|
+
return scores.view(-1, self.n_neg + 1) # (batch_size, 1 + self.n_neg)
|
|
81
|
+
|
|
82
|
+
def user_tower(self, x):
|
|
83
|
+
if self.mode == "item":
|
|
84
|
+
return None
|
|
85
|
+
# [batch_size, num_features*deep_dims]
|
|
86
|
+
input_user = self.embedding(x, self.user_features, squeeze_dim=True)
|
|
87
|
+
# [batch_size, user_params["dims"][-1]]
|
|
88
|
+
user_embedding = self.user_mlp(input_user)
|
|
89
|
+
return user_embedding
|
|
90
|
+
|
|
91
|
+
def item_tower(self, x):
|
|
92
|
+
if self.mode == "user":
|
|
93
|
+
return None
|
|
94
|
+
# [batch_size, num_features*embed_dim]
|
|
95
|
+
input_item = self.embedding(x, self.item_features, squeeze_dim=True)
|
|
96
|
+
# [batch_size, item_params["dims"][-1]]
|
|
97
|
+
item_embedding = self.item_mlp(input_item)
|
|
98
|
+
return item_embedding
|
|
@@ -1,4 +1,7 @@
|
|
|
1
|
-
|
|
1
|
+
__all__ = ['SharedBottom', 'ESMM', 'MMOE', 'PLE', 'AITM']
|
|
2
|
+
|
|
3
|
+
from .aitm import AITM
|
|
2
4
|
from .esmm import ESMM
|
|
3
5
|
from .mmoe import MMOE
|
|
4
|
-
from .ple import PLE
|
|
6
|
+
from .ple import PLE
|
|
7
|
+
from .shared_bottom import SharedBottom
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Date: create on 14/05/2022
|
|
3
|
+
References:
|
|
4
|
+
paper: (KDD'2021) Modeling the Sequential Dependence among Audience Multi-step Conversions with Multi-task Learning in Targeted Display Advertising
|
|
5
|
+
url: https://arxiv.org/abs/2105.08489
|
|
6
|
+
code: https://github.com/adtalos/AITM-torch
|
|
7
|
+
Authors: Mincai Lai, laimincai@shanghaitech.edu.cn
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
import torch
|
|
11
|
+
import torch.nn as nn
|
|
12
|
+
|
|
13
|
+
from ...basic.layers import MLP, EmbeddingLayer
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class AITM(nn.Module):
|
|
17
|
+
""" Adaptive Information Transfer Multi-task (AITM) framework.
|
|
18
|
+
all the task type must be binary classificatioon.
|
|
19
|
+
|
|
20
|
+
Args:
|
|
21
|
+
features (list[Feature Class]): training by the whole module.
|
|
22
|
+
n_task (int): the number of binary classificatioon task.
|
|
23
|
+
bottom_params (dict): the params of all the botwer expert module, keys include:`{"dims":list, "activation":str, "dropout":float}`.
|
|
24
|
+
tower_params_list (list): the list of tower params dict, the keys same as expert_params.
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
def __init__(self, features, n_task, bottom_params, tower_params_list):
|
|
28
|
+
super().__init__()
|
|
29
|
+
self.features = features
|
|
30
|
+
self.n_task = n_task
|
|
31
|
+
self.input_dims = sum([fea.embed_dim for fea in features])
|
|
32
|
+
self.embedding = EmbeddingLayer(features)
|
|
33
|
+
|
|
34
|
+
self.bottoms = nn.ModuleList(MLP(self.input_dims, output_layer=False, **bottom_params) for i in range(self.n_task))
|
|
35
|
+
self.towers = nn.ModuleList(MLP(bottom_params["dims"][-1], **tower_params_list[i]) for i in range(self.n_task))
|
|
36
|
+
|
|
37
|
+
self.info_gates = nn.ModuleList(MLP(bottom_params["dims"][-1], output_layer=False, dims=[bottom_params["dims"][-1]]) for i in range(self.n_task - 1))
|
|
38
|
+
self.aits = nn.ModuleList(AttentionLayer(bottom_params["dims"][-1]) for _ in range(self.n_task - 1))
|
|
39
|
+
|
|
40
|
+
def forward(self, x):
|
|
41
|
+
embed_x = self.embedding(x, self.features, squeeze_dim=True) # [batch_size, *]
|
|
42
|
+
input_towers = [self.bottoms[i](embed_x) for i in range(self.n_task)] # [i]:[batch_size, bottom_dims[-1]]
|
|
43
|
+
for i in range(1, self.n_task): # for task 1:n-1
|
|
44
|
+
# [batch_size,1,bottom_dims[-1]]
|
|
45
|
+
info = self.info_gates[i - 1](input_towers[i - 1]).unsqueeze(1)
|
|
46
|
+
# [batch_size, 2, bottom_dims[-1]]
|
|
47
|
+
ait_input = torch.cat([input_towers[i].unsqueeze(1), info], dim=1)
|
|
48
|
+
input_towers[i] = self.aits[i - 1](ait_input)
|
|
49
|
+
|
|
50
|
+
ys = []
|
|
51
|
+
for input_tower, tower in zip(input_towers, self.towers):
|
|
52
|
+
y = tower(input_tower)
|
|
53
|
+
ys.append(torch.sigmoid(y))
|
|
54
|
+
return torch.cat(ys, dim=1)
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
class AttentionLayer(nn.Module):
|
|
58
|
+
"""attention for info tranfer
|
|
59
|
+
|
|
60
|
+
Args:
|
|
61
|
+
dim (int): attention dim
|
|
62
|
+
|
|
63
|
+
Shape:
|
|
64
|
+
Input: (batch_size, 2, dim)
|
|
65
|
+
Output: (batch_size, dim)
|
|
66
|
+
"""
|
|
67
|
+
|
|
68
|
+
def __init__(self, dim=32):
|
|
69
|
+
super().__init__()
|
|
70
|
+
self.dim = dim
|
|
71
|
+
self.q_layer = nn.Linear(dim, dim, bias=False)
|
|
72
|
+
self.k_layer = nn.Linear(dim, dim, bias=False)
|
|
73
|
+
self.v_layer = nn.Linear(dim, dim, bias=False)
|
|
74
|
+
self.softmax = nn.Softmax(dim=1)
|
|
75
|
+
|
|
76
|
+
def forward(self, x):
|
|
77
|
+
Q = self.q_layer(x)
|
|
78
|
+
K = self.k_layer(x)
|
|
79
|
+
V = self.v_layer(x)
|
|
80
|
+
a = torch.sum(torch.mul(Q, K), -1) / torch.sqrt(torch.tensor(self.dim))
|
|
81
|
+
a = self.softmax(a)
|
|
82
|
+
outputs = torch.sum(torch.mul(torch.unsqueeze(a, -1), V), dim=1)
|
|
83
|
+
return outputs
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
"""
|
|
2
2
|
Date: create on 04/05/2022
|
|
3
|
-
References:
|
|
4
|
-
paper: (SIGIR'2018) Entire Space Multi-Task Model: An Effective Approach for Estimating Post-Click Conversion Rate
|
|
3
|
+
References:
|
|
4
|
+
paper: (SIGIR'2018) Entire Space Multi-Task Model: An Effective Approach for Estimating Post-Click Conversion Rate
|
|
5
5
|
url: https://arxiv.org/abs/1804.07931
|
|
6
6
|
Authors: Mincai Lai, laimincai@shanghaitech.edu.cn
|
|
7
7
|
"""
|
|
@@ -18,8 +18,8 @@ class ESMM(nn.Module):
|
|
|
18
18
|
Args:
|
|
19
19
|
user_features (list): the list of `Feature Class`, training by shared bottom and tower module. It means the user features.
|
|
20
20
|
item_features (list): the list of `Feature Class`, training by shared bottom and tower module. It means the item features.
|
|
21
|
-
cvr_params (dict): the params of the CVR Tower module, keys include:`{"dims":list, "activation":str, "dropout":float
|
|
22
|
-
ctr_params (dict): the params of the CTR Tower module, keys include:`{"dims":list, "activation":str, "dropout":float
|
|
21
|
+
cvr_params (dict): the params of the CVR Tower module, keys include:`{"dims":list, "activation":str, "dropout":float`}
|
|
22
|
+
ctr_params (dict): the params of the CTR Tower module, keys include:`{"dims":list, "activation":str, "dropout":float`}
|
|
23
23
|
"""
|
|
24
24
|
|
|
25
25
|
def __init__(self, user_features, item_features, cvr_params, ctr_params):
|
|
@@ -27,19 +27,30 @@ class ESMM(nn.Module):
|
|
|
27
27
|
self.user_features = user_features
|
|
28
28
|
self.item_features = item_features
|
|
29
29
|
self.embedding = EmbeddingLayer(user_features + item_features)
|
|
30
|
-
self.tower_dims = user_features[0].embed_dim + item_features[0].embed_dim
|
|
30
|
+
self.tower_dims = len(user_features) * user_features[0].embed_dim + len(item_features) * item_features[0].embed_dim
|
|
31
31
|
self.tower_cvr = MLP(self.tower_dims, **cvr_params)
|
|
32
32
|
self.tower_ctr = MLP(self.tower_dims, **ctr_params)
|
|
33
33
|
|
|
34
34
|
def forward(self, x):
|
|
35
|
-
|
|
36
|
-
|
|
35
|
+
# # Field-wise Pooling Layer for user and item
|
|
36
|
+
# embed_user_features = self.embedding(x, self.user_features, squeeze_dim=False).sum(dim=1) #[batch_size, embed_dim]
|
|
37
|
+
# embed_item_features = self.embedding(x, self.item_features,
|
|
38
|
+
# squeeze_dim=False).sum(dim=1) #[batch_size, embed_dim]
|
|
39
|
+
|
|
40
|
+
# Here we concat all the features instead of field-wise pooling them
|
|
41
|
+
# [batch_size, num_features, embed_dim] --> [batch_size, num_features * embed_dim]
|
|
42
|
+
_batch_size = self.embedding(x, self.user_features, squeeze_dim=False).shape[0]
|
|
43
|
+
embed_user_features = self.embedding(x, self.user_features, squeeze_dim=False).reshape(_batch_size, -1)
|
|
44
|
+
embed_item_features = self.embedding(x, self.item_features, squeeze_dim=False).reshape(_batch_size, -1)
|
|
45
|
+
|
|
46
|
+
# print('embed_user_features', embed_user_features.shape)
|
|
47
|
+
|
|
37
48
|
input_tower = torch.cat((embed_user_features, embed_item_features), dim=1)
|
|
38
49
|
cvr_logit = self.tower_cvr(input_tower)
|
|
39
50
|
ctr_logit = self.tower_ctr(input_tower)
|
|
40
51
|
cvr_pred = torch.sigmoid(cvr_logit)
|
|
41
52
|
ctr_pred = torch.sigmoid(ctr_logit)
|
|
42
|
-
ctcvr_pred = torch.mul(
|
|
53
|
+
ctcvr_pred = torch.mul(ctr_pred, cvr_pred)
|
|
43
54
|
|
|
44
55
|
ys = [cvr_pred, ctr_pred, ctcvr_pred]
|
|
45
56
|
return torch.cat(ys, dim=1)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
"""
|
|
2
2
|
Date: create on 04/05/2022
|
|
3
|
-
References:
|
|
3
|
+
References:
|
|
4
4
|
paper: (KDD'2018) Modeling Task Relationships in Multi-task Learning with Multi-gate Mixture-of-Experts
|
|
5
5
|
url: https://dl.acm.org/doi/pdf/10.1145/3219819.3220007
|
|
6
6
|
Authors: Mincai Lai, laimincai@shanghaitech.edu.cn
|
|
@@ -19,7 +19,7 @@ class MMOE(nn.Module):
|
|
|
19
19
|
features (list): the list of `Feature Class`, training by the expert and tower module.
|
|
20
20
|
task_types (list): types of tasks, only support `["classfication", "regression"]`.
|
|
21
21
|
n_expert (int): the number of expert net.
|
|
22
|
-
expert_params (dict): the params of all the expert module, keys include:`{"dims":list, "activation":str, "dropout":float}
|
|
22
|
+
expert_params (dict): the params of all the expert module, keys include:`{"dims":list, "activation":str, "dropout":float}.
|
|
23
23
|
tower_params_list (list): the list of tower params dict, the keys same as expert_params.
|
|
24
24
|
"""
|
|
25
25
|
|
|
@@ -31,22 +31,28 @@ class MMOE(nn.Module):
|
|
|
31
31
|
self.n_expert = n_expert
|
|
32
32
|
self.embedding = EmbeddingLayer(features)
|
|
33
33
|
self.input_dims = sum([fea.embed_dim for fea in features])
|
|
34
|
-
self.experts = nn.ModuleList(MLP(self.input_dims,
|
|
35
|
-
self.gates = nn.ModuleList(MLP(self.input_dims, **{"dims": [self.n_expert], "activation": "softmax"
|
|
34
|
+
self.experts = nn.ModuleList(MLP(self.input_dims, output_layer=False, **expert_params) for i in range(self.n_expert))
|
|
35
|
+
self.gates = nn.ModuleList(MLP(self.input_dims, output_layer=False, **{"dims": [self.n_expert], "activation": "softmax"}) for i in range(self.n_task)) # n_gate = n_task
|
|
36
36
|
self.towers = nn.ModuleList(MLP(expert_params["dims"][-1], **tower_params_list[i]) for i in range(self.n_task))
|
|
37
37
|
self.predict_layers = nn.ModuleList(PredictionLayer(task_type) for task_type in task_types)
|
|
38
38
|
|
|
39
39
|
def forward(self, x):
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
40
|
+
# [batch_size, input_dims]
|
|
41
|
+
embed_x = self.embedding(x, self.features, squeeze_dim=True)
|
|
42
|
+
# expert_out[i]: [batch_size, 1, expert_dims[-1]]
|
|
43
|
+
expert_outs = [expert(embed_x).unsqueeze(1) for expert in self.experts]
|
|
44
|
+
# [batch_size, n_expert, expert_dims[-1]]
|
|
45
|
+
expert_outs = torch.cat(expert_outs, dim=1)
|
|
46
|
+
# gate_out[i]: [batch_size, n_expert, 1]
|
|
47
|
+
gate_outs = [gate(embed_x).unsqueeze(-1) for gate in self.gates]
|
|
44
48
|
|
|
45
49
|
ys = []
|
|
46
50
|
for gate_out, tower, predict_layer in zip(gate_outs, self.towers, self.predict_layers):
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
+
# [batch_size, n_expert, expert_dims[-1]]
|
|
52
|
+
expert_weight = torch.mul(gate_out, expert_outs)
|
|
53
|
+
# [batch_size, expert_dims[-1]]
|
|
54
|
+
expert_pooling = torch.sum(expert_weight, dim=1)
|
|
55
|
+
tower_out = tower(expert_pooling) # [batch_size, 1]
|
|
56
|
+
y = predict_layer(tower_out) # logit -> proba
|
|
51
57
|
ys.append(y)
|
|
52
58
|
return torch.cat(ys, dim=1)
|