torch-rechub 0.0.1__py3-none-any.whl → 0.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. torch_rechub/basic/activation.py +54 -52
  2. torch_rechub/basic/callback.py +32 -32
  3. torch_rechub/basic/features.py +94 -57
  4. torch_rechub/basic/initializers.py +92 -0
  5. torch_rechub/basic/layers.py +720 -240
  6. torch_rechub/basic/loss_func.py +34 -0
  7. torch_rechub/basic/metaoptimizer.py +72 -0
  8. torch_rechub/basic/metric.py +250 -0
  9. torch_rechub/models/matching/__init__.py +11 -0
  10. torch_rechub/models/matching/comirec.py +188 -0
  11. torch_rechub/models/matching/dssm.py +66 -0
  12. torch_rechub/models/matching/dssm_facebook.py +79 -0
  13. torch_rechub/models/matching/dssm_senet.py +75 -0
  14. torch_rechub/models/matching/gru4rec.py +87 -0
  15. torch_rechub/models/matching/mind.py +101 -0
  16. torch_rechub/models/matching/narm.py +76 -0
  17. torch_rechub/models/matching/sasrec.py +140 -0
  18. torch_rechub/models/matching/sine.py +151 -0
  19. torch_rechub/models/matching/stamp.py +83 -0
  20. torch_rechub/models/matching/youtube_dnn.py +71 -0
  21. torch_rechub/models/matching/youtube_sbc.py +98 -0
  22. torch_rechub/models/multi_task/__init__.py +5 -4
  23. torch_rechub/models/multi_task/aitm.py +84 -0
  24. torch_rechub/models/multi_task/esmm.py +55 -45
  25. torch_rechub/models/multi_task/mmoe.py +58 -52
  26. torch_rechub/models/multi_task/ple.py +130 -104
  27. torch_rechub/models/multi_task/shared_bottom.py +45 -44
  28. torch_rechub/models/ranking/__init__.py +11 -3
  29. torch_rechub/models/ranking/afm.py +63 -0
  30. torch_rechub/models/ranking/bst.py +63 -0
  31. torch_rechub/models/ranking/dcn.py +38 -0
  32. torch_rechub/models/ranking/dcn_v2.py +69 -0
  33. torch_rechub/models/ranking/deepffm.py +123 -0
  34. torch_rechub/models/ranking/deepfm.py +41 -41
  35. torch_rechub/models/ranking/dien.py +191 -0
  36. torch_rechub/models/ranking/din.py +91 -81
  37. torch_rechub/models/ranking/edcn.py +117 -0
  38. torch_rechub/models/ranking/fibinet.py +50 -0
  39. torch_rechub/models/ranking/widedeep.py +41 -41
  40. torch_rechub/trainers/__init__.py +2 -1
  41. torch_rechub/trainers/{trainer.py → ctr_trainer.py} +128 -111
  42. torch_rechub/trainers/match_trainer.py +170 -0
  43. torch_rechub/trainers/mtl_trainer.py +206 -144
  44. torch_rechub/utils/__init__.py +0 -0
  45. torch_rechub/utils/data.py +360 -0
  46. torch_rechub/utils/match.py +274 -0
  47. torch_rechub/utils/mtl.py +126 -0
  48. {torch_rechub-0.0.1.dist-info → torch_rechub-0.0.3.dist-info}/LICENSE +21 -21
  49. torch_rechub-0.0.3.dist-info/METADATA +177 -0
  50. torch_rechub-0.0.3.dist-info/RECORD +55 -0
  51. {torch_rechub-0.0.1.dist-info → torch_rechub-0.0.3.dist-info}/WHEEL +1 -1
  52. torch_rechub/basic/utils.py +0 -168
  53. torch_rechub-0.0.1.dist-info/METADATA +0 -105
  54. torch_rechub-0.0.1.dist-info/RECORD +0 -26
  55. {torch_rechub-0.0.1.dist-info → torch_rechub-0.0.3.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,151 @@
1
+ """
2
+ Date: created on 03/07/2022
3
+ References:
4
+ paper: Sparse-Interest Network for Sequential Recommendation
5
+ url: https://arxiv.org/abs/2102.09267
6
+ code: https://github.com/Qiaoyut/SINE/blob/master/model.py
7
+ Authors: Bo Kang, klinux@live.com
8
+ """
9
+
10
+ import torch
11
+ import torch.nn.functional as F
12
+ from torch import einsum
13
+
14
+ class SINE(torch.nn.Module):
15
+ """The match model was proposed in `Sparse-Interest Network for Sequential Recommendation` paper.
16
+
17
+ Args:
18
+ history_features (list[str]): training history feature names, this is for indexing the historical sequences from input dictionary
19
+ item_features (list[str]): item feature names, this is for indexing the items from input dictionary
20
+ neg_item_features (list[str]): neg item feature names, this for indexing negative items from input dictionary
21
+ num_items (int): number of items in the data
22
+ embedding_dim (int): dimensionality of the embeddings
23
+ hidden_dim (int): dimensionality of the hidden layer in self attention modules
24
+ num_concept (int): number of concept, also called conceptual prototypes
25
+ num_intention (int): number of (user) specific intentions out of the concepts
26
+ seq_max_len (int): max sequence length of input item sequence
27
+ num_heads (int): number of attention heads in self attention modules, default to 1
28
+ temperature (float): temperature factor in the similarity measure, default to 1.0
29
+ """
30
+ def __init__(self, history_features, item_features, neg_item_features, num_items, embedding_dim, hidden_dim, num_concept, num_intention, seq_max_len, num_heads=1, temperature=1.0):
31
+ super().__init__()
32
+ self.item_features = item_features
33
+ self.history_features = history_features
34
+ self.neg_item_features = neg_item_features
35
+ self.temperature = temperature
36
+ self.num_concept = num_concept
37
+ self.num_intention = num_intention
38
+ self.seq_max_len = seq_max_len
39
+
40
+ std = 1e-4
41
+ self.item_embedding = torch.nn.Embedding(num_items, embedding_dim)
42
+ torch.nn.init.normal_(self.item_embedding.weight, 0, std)
43
+ self.concept_embedding = torch.nn.Embedding(num_concept, embedding_dim)
44
+ torch.nn.init.normal_(self.concept_embedding.weight, 0, std)
45
+ self.position_embedding = torch.nn.Embedding(seq_max_len, embedding_dim)
46
+ torch.nn.init.normal_(self.position_embedding.weight, 0, std)
47
+
48
+
49
+ self.w_1 = torch.nn.Parameter(torch.rand(embedding_dim, hidden_dim), requires_grad=True)
50
+ self.w_2 = torch.nn.Parameter(torch.rand(hidden_dim, num_heads), requires_grad=True)
51
+
52
+ self.w_3 = torch.nn.Parameter(torch.rand(embedding_dim, embedding_dim), requires_grad=True)
53
+
54
+ self.w_k1 = torch.nn.Parameter(torch.rand(embedding_dim, hidden_dim), requires_grad=True)
55
+ self.w_k2 = torch.nn.Parameter(torch.rand(hidden_dim, num_intention), requires_grad=True)
56
+
57
+ self.w_4 = torch.nn.Parameter(torch.rand(embedding_dim, hidden_dim), requires_grad=True)
58
+ self.w_5 = torch.nn.Parameter(torch.rand(hidden_dim, num_heads), requires_grad=True)
59
+
60
+ self.mode = None
61
+
62
+ def forward(self, x):
63
+ user_embedding = self.user_tower(x)
64
+ item_embedding = self.item_tower(x)
65
+ if self.mode == "user":
66
+ return user_embedding
67
+ if self.mode == "item":
68
+ return item_embedding
69
+
70
+ y = torch.mul(user_embedding, item_embedding).sum(dim=-1)
71
+
72
+ # # compute covariance regularizer
73
+ # M = torch.cov(self.concept_embedding.weight, correction=0)
74
+ # l_c = (torch.norm(M, p='fro')**2 - torch.norm(torch.diag(M), p='fro')**2)/2
75
+
76
+ return y
77
+
78
+ def user_tower(self, x):
79
+ if self.mode == "item":
80
+ return None
81
+
82
+ # sparse interests extraction
83
+ ## user specific historical item embedding X^u
84
+ hist_item = x[self.history_features[0]]
85
+ x_u = self.item_embedding(hist_item) + self.position_embedding.weight.unsqueeze(0)
86
+ x_u_mask = (x[self.history_features[0]] > 0).long()
87
+
88
+ ## user specific conceptual prototypes C^u
89
+ ### attention a
90
+ h_1 = einsum('bse, ed -> bsd', x_u, self.w_1).tanh()
91
+ a_hist = F.softmax(einsum('bsd, dh -> bsh', h_1, self.w_2) + -1.e9 * (1 - x_u_mask.unsqueeze(-1).float()), dim=1)
92
+
93
+ ### virtual concept vector z_u
94
+ z_u = einsum("bse, bsh -> be", x_u, a_hist)
95
+
96
+ ### similarity between user's concept vector and entire conceptual prototypes s^u
97
+ s_u = einsum("be, te -> bt", z_u, self.concept_embedding.weight)
98
+ s_u_top_k = torch.topk(s_u, self.num_intention)
99
+
100
+ ### final C^u
101
+ c_u = einsum("bk, bke -> bke", torch.sigmoid(s_u_top_k.values), self.concept_embedding(s_u_top_k.indices))
102
+
103
+ ## user intention assignment P_{k|t}
104
+ p_u = F.softmax(einsum("bse, bke -> bks", F.normalize(x_u @ self.w_3, dim=-1), F.normalize(c_u, p=2, dim=-1)), dim=1)
105
+
106
+ ## attention weighing P_{t|k}
107
+ h_2 = einsum('bse, ed -> bsd', x_u, self.w_k1).tanh()
108
+ a_concept_k = F.softmax(einsum('bsd, dk -> bsk', h_2, self.w_k2) + -1.e9 * (1 - x_u_mask.unsqueeze(-1).float()), dim=1)
109
+
110
+ ## multiple interests encoding \phi_\theta^k(x^u)
111
+ phi_u = einsum("bks, bse -> bke", p_u * a_concept_k.permute(0, 2, 1), x_u)
112
+
113
+
114
+ # adaptive interest aggregation
115
+ ## intention aware input behavior \hat{X^u}
116
+ x_u_hat = einsum('bks, bke -> bse', p_u, c_u)
117
+
118
+ ## user's next intention C^u_{apt}
119
+ h_3 = einsum('bse, ed -> bsd', x_u_hat, self.w_4).tanh()
120
+ c_u_apt = F.normalize(
121
+ einsum(
122
+ "bs, bse -> be",
123
+ F.softmax(einsum('bsd, dh -> bsh', h_3, self.w_5).reshape(-1, self.seq_max_len) + -1.e9 * (1 - x_u_mask.float()), dim=1),
124
+ x_u_hat
125
+ ), -1)
126
+
127
+ ## aggregation weights e_k^u
128
+ e_u = F.softmax(einsum('be, bke -> bk', c_u_apt, phi_u) / self.temperature, dim=1)
129
+
130
+
131
+ # final user representation v^u
132
+ v_u = einsum('bk, bke -> be', e_u, phi_u)
133
+
134
+ if self.mode == "user":
135
+ return v_u
136
+ return v_u.unsqueeze(1)
137
+
138
+ def item_tower(self, x):
139
+ if self.mode == "user":
140
+ return None
141
+ pos_embedding = self.item_embedding(x[self.item_features[0]]).unsqueeze(1)
142
+ if self.mode == "item": #inference embedding mode
143
+ return pos_embedding.squeeze(1) #[batch_size, embed_dim]
144
+ neg_embeddings = self.item_embedding(x[self.neg_item_features[0]]).squeeze(1) #[batch_size, n_neg_items, embed_dim]
145
+
146
+ return torch.cat((pos_embedding, neg_embeddings), dim=1) #[batch_size, 1+n_neg_items, embed_dim]
147
+
148
+ def gen_mask(self, x):
149
+ his_list = x[self.history_features[0].name]
150
+ mask = (his_list > 0).long()
151
+ return mask
@@ -0,0 +1,83 @@
1
+ """
2
+ Date: created on 17/09/2022
3
+ References:
4
+ paper: STAMP: Short-Term Attention/Memory Priority Model for Session-based Recommendation
5
+ url: https://dl.acm.org/doi/10.1145/3219819.3219950
6
+ official Tensorflow implementation: https://github.com/uestcnlp/STAMP
7
+ Authors: Bo Kang, klinux@live.com
8
+ """
9
+
10
+ import torch
11
+ import torch.nn.functional as F
12
+ import torch.nn as nn
13
+
14
+
15
+ class STAMP(nn.Module):
16
+ def __init__(self, item_history_feature, weight_std, emb_std):
17
+ super(STAMP, self).__init__()
18
+
19
+ # item embedding layer
20
+ self.item_history_feature = item_history_feature
21
+ n_items, item_emb_dim, = item_history_feature.vocab_size, item_history_feature.embed_dim
22
+ self.item_emb = nn.Embedding(n_items, item_emb_dim, padding_idx=0)
23
+
24
+ # weights and biases for attention computation
25
+ self.w_0 = nn.Parameter(torch.zeros(item_emb_dim, 1))
26
+ self.w_1_t = nn.Parameter(torch.zeros(item_emb_dim, item_emb_dim))
27
+ self.w_2_t = nn.Parameter(torch.zeros(item_emb_dim, item_emb_dim))
28
+ self.w_3_t = nn.Parameter(torch.zeros(item_emb_dim, item_emb_dim))
29
+ self.b_a = nn.Parameter(torch.zeros(item_emb_dim))
30
+ self._init_parameter_weights(weight_std)
31
+
32
+ # mlp layers
33
+ self.f_s = nn.Sequential(nn.Tanh(), nn.Linear(item_emb_dim, item_emb_dim))
34
+ self.f_t = nn.Sequential(nn.Tanh(), nn.Linear(item_emb_dim, item_emb_dim))
35
+ self.emb_std = emb_std
36
+ self.apply(self._init_module_weights)
37
+
38
+
39
+ def _init_parameter_weights(self, weight_std):
40
+ nn.init.normal_(self.w_0, std=weight_std)
41
+ nn.init.normal_(self.w_1_t, std=weight_std)
42
+ nn.init.normal_(self.w_2_t, std=weight_std)
43
+ nn.init.normal_(self.w_3_t, std=weight_std)
44
+
45
+
46
+ def _init_module_weights(self, module):
47
+ if isinstance(module, nn.Linear):
48
+ module.weight.data.normal_(std=self.emb_std)
49
+ if module.bias is not None:
50
+ module.bias.data.zero_()
51
+ elif isinstance(module, nn.Embedding):
52
+ module.weight.data.normal_(std=self.emb_std)
53
+
54
+
55
+ def forward(self, input_dict):
56
+ # Index the embeddings for the items in the session
57
+ input = input_dict[self.item_history_feature.name]
58
+ value_mask = (input != 0).unsqueeze(-1)
59
+ value_counts = value_mask.sum(dim=1, keepdim=True).squeeze(-1)
60
+ item_emb_batch = self.item_emb(input) * value_mask
61
+
62
+ # Index the embeddings of the latest clicked items
63
+ x_t = self.item_emb(torch.gather(input, 1, value_counts-1))
64
+
65
+ # Eq. 2, user's general interest in the current session
66
+ m_s = ((item_emb_batch).sum(1) / value_counts).unsqueeze(1)
67
+
68
+ # Eq. 7, compute attention coefficient
69
+ a = F.normalize(torch.exp(torch.sigmoid(item_emb_batch@self.w_1_t + x_t@self.w_2_t + m_s@self.w_3_t + self.b_a) @ self.w_0) * value_mask, p=1, dim=1)
70
+
71
+ # Eq. 8, compute user's attention-based interests
72
+ m_a = (a * item_emb_batch).sum(1) + m_s.squeeze(1)
73
+
74
+ # Eq. 3, compute the output state of the general interest
75
+ h_s = self.f_s(m_a)
76
+
77
+ # Eq. 9, compute the output state of the short-term interest
78
+ h_t = self.f_t(x_t).squeeze(1)
79
+
80
+ # Eq. 4, compute candidate scores
81
+ z = h_s * h_t @ self.item_emb.weight.T
82
+
83
+ return z
@@ -0,0 +1,71 @@
1
+ """
2
+ Date: create on 23/05/2022
3
+ References:
4
+ paper: (RecSys'2016) Deep Neural Networks for YouTube Recommendations
5
+ url: https://dl.acm.org/doi/10.1145/2959100.2959190
6
+ Authors: Mincai Lai, laimincai@shanghaitech.edu.cn
7
+ """
8
+
9
+ import torch
10
+ import torch.nn.functional as F
11
+ from ...basic.layers import MLP, EmbeddingLayer
12
+
13
+
14
+ class YoutubeDNN(torch.nn.Module):
15
+ """The match model mentioned in `Deep Neural Networks for YouTube Recommendations` paper.
16
+ It's a DSSM match model trained by global softmax loss on list-wise samples.
17
+ Note in origin paper, it's without item dnn tower and train item embedding directly.
18
+
19
+ Args:
20
+ user_features (list[Feature Class]): training by the user tower module.
21
+ item_features (list[Feature Class]): training by the embedding table, it's the item id feature.
22
+ neg_item_feature (list[Feature Class]): training by the embedding table, it's the negative items id feature.
23
+ user_params (dict): the params of the User Tower module, keys include:`{"dims":list, "activation":str, "dropout":float, "output_layer":bool`}.
24
+ temperature (float): temperature factor for similarity score, default to 1.0.
25
+ """
26
+
27
+ def __init__(self, user_features, item_features, neg_item_feature, user_params, temperature=1.0):
28
+ super().__init__()
29
+ self.user_features = user_features
30
+ self.item_features = item_features
31
+ self.neg_item_feature = neg_item_feature
32
+ self.temperature = temperature
33
+ self.user_dims = sum([fea.embed_dim for fea in user_features])
34
+ self.embedding = EmbeddingLayer(user_features + item_features)
35
+ self.user_mlp = MLP(self.user_dims, output_layer=False, **user_params)
36
+ self.mode = None
37
+
38
+ def forward(self, x):
39
+ user_embedding = self.user_tower(x)
40
+ item_embedding = self.item_tower(x)
41
+ if self.mode == "user":
42
+ return user_embedding
43
+ if self.mode == "item":
44
+ return item_embedding
45
+
46
+ # calculate cosine score
47
+ y = torch.mul(user_embedding, item_embedding).sum(dim=2)
48
+ y = y / self.temperature
49
+ return y
50
+
51
+ def user_tower(self, x):
52
+ if self.mode == "item":
53
+ return None
54
+ input_user = self.embedding(x, self.user_features, squeeze_dim=True) #[batch_size, num_features*deep_dims]
55
+ user_embedding = self.user_mlp(input_user).unsqueeze(1) #[batch_size, 1, embed_dim]
56
+ user_embedding = F.normalize(user_embedding, p=2, dim=2)
57
+ if self.mode == "user":
58
+ return user_embedding.squeeze(1) #inference embedding mode -> [batch_size, embed_dim]
59
+ return user_embedding
60
+
61
+ def item_tower(self, x):
62
+ if self.mode == "user":
63
+ return None
64
+ pos_embedding = self.embedding(x, self.item_features, squeeze_dim=False) #[batch_size, 1, embed_dim]
65
+ pos_embedding = F.normalize(pos_embedding, p=2, dim=2)
66
+ if self.mode == "item": #inference embedding mode
67
+ return pos_embedding.squeeze(1) #[batch_size, embed_dim]
68
+ neg_embeddings = self.embedding(x, self.neg_item_feature,
69
+ squeeze_dim=False).squeeze(1) #[batch_size, n_neg_items, embed_dim]
70
+ neg_embeddings = F.normalize(neg_embeddings, p=2, dim=2)
71
+ return torch.cat((pos_embedding, neg_embeddings), dim=1) #[batch_size, 1+n_neg_items, embed_dim]
@@ -0,0 +1,98 @@
1
+ """
2
+ Date: create on 23/05/2022
3
+ References:
4
+ paper: (RecSys'2019) Sampling-Bias-Corrected Neural Modeling for Large Corpus Item Recommendations
5
+ url: https://dl.acm.org/doi/10.1145/3298689.3346996
6
+ Authors: Mincai Lai, laimincai@shanghaitech.edu.cn
7
+ """
8
+
9
+ import torch
10
+ import torch.nn.functional as F
11
+ from ...basic.layers import MLP, EmbeddingLayer
12
+ import numpy as np
13
+
14
+
15
+ class YoutubeSBC(torch.nn.Module):
16
+ """Sampling-Bias-Corrected Neural Modeling for Matching by Youtube.
17
+ It's a DSSM match model trained by In-batch softmax loss on list-wise samples, and add sample debias module.
18
+
19
+ Args:
20
+ user_features (list[Feature Class]): training by the user tower module.
21
+ item_features (list[Feature Class]): training by the item tower module.
22
+ sample_weight_feature (list[Feature Class]): used for sampling bias corrected in training.
23
+ user_params (dict): the params of the User Tower module, keys include:`{"dims":list, "activation":str, "dropout":float, "output_layer":bool`}.
24
+ item_params (dict): the params of the Item Tower module, keys include:`{"dims":list, "activation":str, "dropout":float, "output_layer":bool`}.
25
+ batch_size (int): same as batch size of DataLoader, used in in-batch sampling
26
+ n_neg (int): the number of negative sample for every positive sample, default to 3. Note it's must smaller than batch_size.
27
+ temperature (float): temperature factor for similarity score, default to 1.0.
28
+ """
29
+
30
+ def __init__(self,
31
+ user_features,
32
+ item_features,
33
+ sample_weight_feature,
34
+ user_params,
35
+ item_params,
36
+ batch_size,
37
+ n_neg=3,
38
+ temperature=1.0):
39
+ super().__init__()
40
+ self.user_features = user_features
41
+ self.item_features = item_features
42
+ self.sample_weight_feature = sample_weight_feature
43
+ self.n_neg = n_neg
44
+ self.temperature = temperature
45
+ self.user_dims = sum([fea.embed_dim for fea in user_features])
46
+ self.item_dims = sum([fea.embed_dim for fea in item_features])
47
+ self.batch_size = batch_size
48
+ self.embedding = EmbeddingLayer(user_features + item_features + sample_weight_feature)
49
+ self.user_mlp = MLP(self.user_dims, output_layer=False, **user_params)
50
+ self.item_mlp = MLP(self.item_dims, output_layer=False, **item_params)
51
+ self.mode = None
52
+
53
+ # in-batch sampling index
54
+ self.index0 = np.repeat(np.arange(batch_size), n_neg + 1)
55
+ self.index1 = np.concatenate([np.arange(i, i + n_neg + 1) for i in range(batch_size)])
56
+ self.index1[np.where(self.index1 >= batch_size)] -= batch_size
57
+
58
+ def forward(self, x):
59
+ user_embedding = self.user_tower(x) # (batch_size, embedding_dim)
60
+ item_embedding = self.item_tower(x) # (batch_size, embedding_dim)
61
+ if self.mode == "user":
62
+ return user_embedding
63
+ if self.mode == "item":
64
+ return item_embedding
65
+
66
+ # pred[i, j] means predicted score that user_i give to item_j
67
+ pred = torch.cosine_similarity(user_embedding.unsqueeze(1), item_embedding, dim=2) # (batch_size, batch_size)
68
+
69
+ # get sample weight of items in this batch
70
+ sample_weight = self.embedding(x, self.sample_weight_feature, squeeze_dim=True).squeeze(1) # (batch_size)
71
+ scores = pred - torch.log(sample_weight) #Sampling Bias Corrected, using broadcast. (batch_size, batch_size)
72
+
73
+ if user_embedding.shape[0] * (self.n_neg + 1) != self.index0.shape[0]: # last batch
74
+ batch_size = user_embedding.shape[0]
75
+ index0 = self.index0[:batch_size * (self.n_neg + 1)]
76
+ index1 = self.index1[:batch_size * (self.n_neg + 1)]
77
+ index0[np.where(index0 >= batch_size)] -= batch_size
78
+ index1[np.where(index1 >= batch_size)] -= batch_size
79
+ scores = scores[index0, index1] # (batch_size, 1 + self.n_neg)
80
+ else:
81
+ scores = scores[self.index0, self.index1] # (batch_size, 1 + self.n_neg)
82
+
83
+ scores = scores / self.temperature
84
+ return scores.view(-1, self.n_neg + 1) #(batch_size, 1 + self.n_neg)
85
+
86
+ def user_tower(self, x):
87
+ if self.mode == "item":
88
+ return None
89
+ input_user = self.embedding(x, self.user_features, squeeze_dim=True) #[batch_size, num_features*deep_dims]
90
+ user_embedding = self.user_mlp(input_user) #[batch_size, user_params["dims"][-1]]
91
+ return user_embedding
92
+
93
+ def item_tower(self, x):
94
+ if self.mode == "user":
95
+ return None
96
+ input_item = self.embedding(x, self.item_features, squeeze_dim=True) #[batch_size, num_features*embed_dim]
97
+ item_embedding = self.item_mlp(input_item) #[batch_size, item_params["dims"][-1]]
98
+ return item_embedding
@@ -1,4 +1,5 @@
1
- from .shared_bottom import SharedBottom
2
- from .esmm import ESMM
3
- from .mmoe import MMOE
4
- from .ple import PLE
1
+ from .shared_bottom import SharedBottom
2
+ from .esmm import ESMM
3
+ from .mmoe import MMOE
4
+ from .ple import PLE
5
+ from .aitm import AITM
@@ -0,0 +1,84 @@
1
+ """
2
+ Date: create on 14/05/2022
3
+ References:
4
+ paper: (KDD'2021) Modeling the Sequential Dependence among Audience Multi-step Conversions with Multi-task Learning in Targeted Display Advertising
5
+ url: https://arxiv.org/abs/2105.08489
6
+ code: https://github.com/adtalos/AITM-torch
7
+ Authors: Mincai Lai, laimincai@shanghaitech.edu.cn
8
+ """
9
+
10
+ import torch
11
+ import torch.nn as nn
12
+
13
+ from ...basic.layers import MLP, EmbeddingLayer
14
+
15
+
16
+ class AITM(nn.Module):
17
+ """ Adaptive Information Transfer Multi-task (AITM) framework.
18
+ all the task type must be binary classificatioon.
19
+
20
+ Args:
21
+ features (list[Feature Class]): training by the whole module.
22
+ n_task (int): the number of binary classificatioon task.
23
+ bottom_params (dict): the params of all the botwer expert module, keys include:`{"dims":list, "activation":str, "dropout":float}`.
24
+ tower_params_list (list): the list of tower params dict, the keys same as expert_params.
25
+ """
26
+
27
+ def __init__(self, features, n_task, bottom_params, tower_params_list):
28
+ super().__init__()
29
+ self.features = features
30
+ self.n_task = n_task
31
+ self.input_dims = sum([fea.embed_dim for fea in features])
32
+ self.embedding = EmbeddingLayer(features)
33
+
34
+ self.bottoms = nn.ModuleList(
35
+ MLP(self.input_dims, output_layer=False, **bottom_params) for i in range(self.n_task))
36
+ self.towers = nn.ModuleList(MLP(bottom_params["dims"][-1], **tower_params_list[i]) for i in range(self.n_task))
37
+
38
+ self.info_gates = nn.ModuleList(
39
+ MLP(bottom_params["dims"][-1], output_layer=False, dims=[bottom_params["dims"][-1]])
40
+ for i in range(self.n_task - 1))
41
+ self.aits = nn.ModuleList(AttentionLayer(bottom_params["dims"][-1]) for _ in range(self.n_task - 1))
42
+
43
+ def forward(self, x):
44
+ embed_x = self.embedding(x, self.features, squeeze_dim=True) #[batch_size, *]
45
+ input_towers = [self.bottoms[i](embed_x) for i in range(self.n_task)] #[i]:[batch_size, bottom_dims[-1]]
46
+ for i in range(1, self.n_task): #for task 1:n-1
47
+ info = self.info_gates[i - 1](input_towers[i - 1]).unsqueeze(1) #[batch_size,1,bottom_dims[-1]]
48
+ ait_input = torch.cat([input_towers[i].unsqueeze(1), info], dim=1) #[batch_size, 2, bottom_dims[-1]]
49
+ input_towers[i] = self.aits[i - 1](ait_input)
50
+
51
+ ys = []
52
+ for input_tower, tower in zip(input_towers, self.towers):
53
+ y = tower(input_tower)
54
+ ys.append(torch.sigmoid(y))
55
+ return torch.cat(ys, dim=1)
56
+
57
+
58
+ class AttentionLayer(nn.Module):
59
+ """attention for info tranfer
60
+
61
+ Args:
62
+ dim (int): attention dim
63
+
64
+ Shape:
65
+ Input: (batch_size, 2, dim)
66
+ Output: (batch_size, dim)
67
+ """
68
+
69
+ def __init__(self, dim=32):
70
+ super().__init__()
71
+ self.dim = dim
72
+ self.q_layer = nn.Linear(dim, dim, bias=False)
73
+ self.k_layer = nn.Linear(dim, dim, bias=False)
74
+ self.v_layer = nn.Linear(dim, dim, bias=False)
75
+ self.softmax = nn.Softmax(dim=1)
76
+
77
+ def forward(self, x):
78
+ Q = self.q_layer(x)
79
+ K = self.k_layer(x)
80
+ V = self.v_layer(x)
81
+ a = torch.sum(torch.mul(Q, K), -1) / torch.sqrt(torch.tensor(self.dim))
82
+ a = self.softmax(a)
83
+ outputs = torch.sum(torch.mul(torch.unsqueeze(a, -1), V), dim=1)
84
+ return outputs
@@ -1,45 +1,55 @@
1
- """
2
- Date: create on 04/05/2022
3
- References:
4
- paper: (SIGIR'2018) Entire Space Multi-Task Model: An Effective Approach for Estimating Post-Click Conversion Rate
5
- url: https://arxiv.org/abs/1804.07931
6
- Authors: Mincai Lai, laimincai@shanghaitech.edu.cn
7
- """
8
-
9
- import torch
10
- import torch.nn as nn
11
-
12
- from ...basic.layers import MLP, EmbeddingLayer
13
-
14
-
15
- class ESMM(nn.Module):
16
- """Entire Space Multi-Task Model
17
-
18
- Args:
19
- user_features (list): the list of `Feature Class`, training by shared bottom and tower module. It means the user features.
20
- item_features (list): the list of `Feature Class`, training by shared bottom and tower module. It means the item features.
21
- cvr_params (dict): the params of the CVR Tower module, keys include:`{"dims":list, "activation":str, "dropout":float, "output_layer":bool`}
22
- ctr_params (dict): the params of the CTR Tower module, keys include:`{"dims":list, "activation":str, "dropout":float, "output_layer":bool`}
23
- """
24
-
25
- def __init__(self, user_features, item_features, cvr_params, ctr_params):
26
- super().__init__()
27
- self.user_features = user_features
28
- self.item_features = item_features
29
- self.embedding = EmbeddingLayer(user_features + item_features)
30
- self.tower_dims = user_features[0].embed_dim + item_features[0].embed_dim #the dims after Field-wise Pooling Layer
31
- self.tower_cvr = MLP(self.tower_dims, **cvr_params)
32
- self.tower_ctr = MLP(self.tower_dims, **ctr_params)
33
-
34
- def forward(self, x):
35
- embed_user_features = self.embedding(x, self.user_features, squeeze_dim=False).sum(dim=1) #[batch_size, embed_dim]
36
- embed_item_features = self.embedding(x, self.item_features, squeeze_dim=False).sum(dim=1) #[batch_size, embed_dim]
37
- input_tower = torch.cat((embed_user_features, embed_item_features), dim=1)
38
- cvr_logit = self.tower_cvr(input_tower)
39
- ctr_logit = self.tower_ctr(input_tower)
40
- cvr_pred = torch.sigmoid(cvr_logit)
41
- ctr_pred = torch.sigmoid(ctr_logit)
42
- ctcvr_pred = torch.mul(cvr_pred, cvr_pred)
43
-
44
- ys = [cvr_pred, ctr_pred, ctcvr_pred]
45
- return torch.cat(ys, dim=1)
1
+ """
2
+ Date: create on 04/05/2022
3
+ References:
4
+ paper: (SIGIR'2018) Entire Space Multi-Task Model: An Effective Approach for Estimating Post-Click Conversion Rate
5
+ url: https://arxiv.org/abs/1804.07931
6
+ Authors: Mincai Lai, laimincai@shanghaitech.edu.cn
7
+ """
8
+
9
+ import torch
10
+ import torch.nn as nn
11
+
12
+ from ...basic.layers import MLP, EmbeddingLayer
13
+
14
+
15
+ class ESMM(nn.Module):
16
+ """Entire Space Multi-Task Model
17
+
18
+ Args:
19
+ user_features (list): the list of `Feature Class`, training by shared bottom and tower module. It means the user features.
20
+ item_features (list): the list of `Feature Class`, training by shared bottom and tower module. It means the item features.
21
+ cvr_params (dict): the params of the CVR Tower module, keys include:`{"dims":list, "activation":str, "dropout":float`}
22
+ ctr_params (dict): the params of the CTR Tower module, keys include:`{"dims":list, "activation":str, "dropout":float`}
23
+ """
24
+
25
+ def __init__(self, user_features, item_features, cvr_params, ctr_params):
26
+ super().__init__()
27
+ self.user_features = user_features
28
+ self.item_features = item_features
29
+ self.embedding = EmbeddingLayer(user_features + item_features)
30
+ self.tower_dims = len(user_features) * user_features[0].embed_dim + len(item_features) * item_features[0].embed_dim
31
+ self.tower_cvr = MLP(self.tower_dims, **cvr_params)
32
+ self.tower_ctr = MLP(self.tower_dims, **ctr_params)
33
+
34
+ def forward(self, x):
35
+ # # Field-wise Pooling Layer for user and item
36
+ # embed_user_features = self.embedding(x, self.user_features, squeeze_dim=False).sum(dim=1) #[batch_size, embed_dim]
37
+ # embed_item_features = self.embedding(x, self.item_features, squeeze_dim=False).sum(dim=1) #[batch_size, embed_dim]
38
+
39
+ # Here we concat all the features instead of field-wise pooling them
40
+ # [batch_size, num_features, embed_dim] --> [batch_size, num_features * embed_dim]
41
+ _batch_size = self.embedding(x, self.user_features, squeeze_dim=False).shape[0]
42
+ embed_user_features = self.embedding(x, self.user_features, squeeze_dim=False).reshape(_batch_size, -1)
43
+ embed_item_features = self.embedding(x, self.item_features, squeeze_dim=False).reshape(_batch_size, -1)
44
+
45
+ # print('embed_user_features', embed_user_features.shape)
46
+
47
+ input_tower = torch.cat((embed_user_features, embed_item_features), dim=1)
48
+ cvr_logit = self.tower_cvr(input_tower)
49
+ ctr_logit = self.tower_ctr(input_tower)
50
+ cvr_pred = torch.sigmoid(cvr_logit)
51
+ ctr_pred = torch.sigmoid(ctr_logit)
52
+ ctcvr_pred = torch.mul(ctr_pred, cvr_pred)
53
+
54
+ ys = [cvr_pred, ctr_pred, ctcvr_pred]
55
+ return torch.cat(ys, dim=1)