torch-rechub 0.0.1__py3-none-any.whl → 0.0.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. torch_rechub/__init__.py +14 -0
  2. torch_rechub/basic/activation.py +3 -1
  3. torch_rechub/basic/callback.py +2 -2
  4. torch_rechub/basic/features.py +38 -8
  5. torch_rechub/basic/initializers.py +92 -0
  6. torch_rechub/basic/layers.py +800 -46
  7. torch_rechub/basic/loss_func.py +223 -0
  8. torch_rechub/basic/metaoptimizer.py +76 -0
  9. torch_rechub/basic/metric.py +251 -0
  10. torch_rechub/models/generative/__init__.py +6 -0
  11. torch_rechub/models/generative/hllm.py +249 -0
  12. torch_rechub/models/generative/hstu.py +189 -0
  13. torch_rechub/models/matching/__init__.py +13 -0
  14. torch_rechub/models/matching/comirec.py +193 -0
  15. torch_rechub/models/matching/dssm.py +72 -0
  16. torch_rechub/models/matching/dssm_facebook.py +77 -0
  17. torch_rechub/models/matching/dssm_senet.py +87 -0
  18. torch_rechub/models/matching/gru4rec.py +85 -0
  19. torch_rechub/models/matching/mind.py +103 -0
  20. torch_rechub/models/matching/narm.py +82 -0
  21. torch_rechub/models/matching/sasrec.py +143 -0
  22. torch_rechub/models/matching/sine.py +148 -0
  23. torch_rechub/models/matching/stamp.py +81 -0
  24. torch_rechub/models/matching/youtube_dnn.py +75 -0
  25. torch_rechub/models/matching/youtube_sbc.py +98 -0
  26. torch_rechub/models/multi_task/__init__.py +5 -2
  27. torch_rechub/models/multi_task/aitm.py +83 -0
  28. torch_rechub/models/multi_task/esmm.py +19 -8
  29. torch_rechub/models/multi_task/mmoe.py +18 -12
  30. torch_rechub/models/multi_task/ple.py +41 -29
  31. torch_rechub/models/multi_task/shared_bottom.py +3 -2
  32. torch_rechub/models/ranking/__init__.py +13 -2
  33. torch_rechub/models/ranking/afm.py +65 -0
  34. torch_rechub/models/ranking/autoint.py +102 -0
  35. torch_rechub/models/ranking/bst.py +61 -0
  36. torch_rechub/models/ranking/dcn.py +38 -0
  37. torch_rechub/models/ranking/dcn_v2.py +59 -0
  38. torch_rechub/models/ranking/deepffm.py +131 -0
  39. torch_rechub/models/ranking/deepfm.py +8 -7
  40. torch_rechub/models/ranking/dien.py +191 -0
  41. torch_rechub/models/ranking/din.py +31 -19
  42. torch_rechub/models/ranking/edcn.py +101 -0
  43. torch_rechub/models/ranking/fibinet.py +42 -0
  44. torch_rechub/models/ranking/widedeep.py +6 -6
  45. torch_rechub/trainers/__init__.py +4 -2
  46. torch_rechub/trainers/ctr_trainer.py +191 -0
  47. torch_rechub/trainers/match_trainer.py +239 -0
  48. torch_rechub/trainers/matching.md +3 -0
  49. torch_rechub/trainers/mtl_trainer.py +137 -23
  50. torch_rechub/trainers/seq_trainer.py +293 -0
  51. torch_rechub/utils/__init__.py +0 -0
  52. torch_rechub/utils/data.py +492 -0
  53. torch_rechub/utils/hstu_utils.py +198 -0
  54. torch_rechub/utils/match.py +457 -0
  55. torch_rechub/utils/mtl.py +136 -0
  56. torch_rechub/utils/onnx_export.py +353 -0
  57. torch_rechub-0.0.4.dist-info/METADATA +391 -0
  58. torch_rechub-0.0.4.dist-info/RECORD +62 -0
  59. {torch_rechub-0.0.1.dist-info → torch_rechub-0.0.4.dist-info}/WHEEL +1 -2
  60. {torch_rechub-0.0.1.dist-info → torch_rechub-0.0.4.dist-info/licenses}/LICENSE +1 -1
  61. torch_rechub/basic/utils.py +0 -168
  62. torch_rechub/trainers/trainer.py +0 -111
  63. torch_rechub-0.0.1.dist-info/METADATA +0 -105
  64. torch_rechub-0.0.1.dist-info/RECORD +0 -26
  65. torch_rechub-0.0.1.dist-info/top_level.txt +0 -1
@@ -0,0 +1,148 @@
1
+ """
2
+ Date: created on 03/07/2022
3
+ References:
4
+ paper: Sparse-Interest Network for Sequential Recommendation
5
+ url: https://arxiv.org/abs/2102.09267
6
+ code: https://github.com/Qiaoyut/SINE/blob/master/model.py
7
+ Authors: Bo Kang, klinux@live.com
8
+ """
9
+
10
+ import torch
11
+ import torch.nn.functional as F
12
+ from torch import einsum
13
+
14
+
15
+ class SINE(torch.nn.Module):
16
+ """The match model was proposed in `Sparse-Interest Network for Sequential Recommendation` paper.
17
+
18
+ Args:
19
+ history_features (list[str]): training history feature names, this is for indexing the historical sequences from input dictionary
20
+ item_features (list[str]): item feature names, this is for indexing the items from input dictionary
21
+ neg_item_features (list[str]): neg item feature names, this for indexing negative items from input dictionary
22
+ num_items (int): number of items in the data
23
+ embedding_dim (int): dimensionality of the embeddings
24
+ hidden_dim (int): dimensionality of the hidden layer in self attention modules
25
+ num_concept (int): number of concept, also called conceptual prototypes
26
+ num_intention (int): number of (user) specific intentions out of the concepts
27
+ seq_max_len (int): max sequence length of input item sequence
28
+ num_heads (int): number of attention heads in self attention modules, default to 1
29
+ temperature (float): temperature factor in the similarity measure, default to 1.0
30
+ """
31
+
32
+ def __init__(self, history_features, item_features, neg_item_features, num_items, embedding_dim, hidden_dim, num_concept, num_intention, seq_max_len, num_heads=1, temperature=1.0):
33
+ super().__init__()
34
+ self.item_features = item_features
35
+ self.history_features = history_features
36
+ self.neg_item_features = neg_item_features
37
+ self.temperature = temperature
38
+ self.num_concept = num_concept
39
+ self.num_intention = num_intention
40
+ self.seq_max_len = seq_max_len
41
+
42
+ std = 1e-4
43
+ self.item_embedding = torch.nn.Embedding(num_items, embedding_dim)
44
+ torch.nn.init.normal_(self.item_embedding.weight, 0, std)
45
+ self.concept_embedding = torch.nn.Embedding(num_concept, embedding_dim)
46
+ torch.nn.init.normal_(self.concept_embedding.weight, 0, std)
47
+ self.position_embedding = torch.nn.Embedding(seq_max_len, embedding_dim)
48
+ torch.nn.init.normal_(self.position_embedding.weight, 0, std)
49
+
50
+ self.w_1 = torch.nn.Parameter(torch.rand(embedding_dim, hidden_dim), requires_grad=True)
51
+ self.w_2 = torch.nn.Parameter(torch.rand(hidden_dim, num_heads), requires_grad=True)
52
+
53
+ self.w_3 = torch.nn.Parameter(torch.rand(embedding_dim, embedding_dim), requires_grad=True)
54
+
55
+ self.w_k1 = torch.nn.Parameter(torch.rand(embedding_dim, hidden_dim), requires_grad=True)
56
+ self.w_k2 = torch.nn.Parameter(torch.rand(hidden_dim, num_intention), requires_grad=True)
57
+
58
+ self.w_4 = torch.nn.Parameter(torch.rand(embedding_dim, hidden_dim), requires_grad=True)
59
+ self.w_5 = torch.nn.Parameter(torch.rand(hidden_dim, num_heads), requires_grad=True)
60
+
61
+ self.mode = None
62
+
63
+ def forward(self, x):
64
+ user_embedding = self.user_tower(x)
65
+ item_embedding = self.item_tower(x)
66
+ if self.mode == "user":
67
+ return user_embedding
68
+ if self.mode == "item":
69
+ return item_embedding
70
+
71
+ y = torch.mul(user_embedding, item_embedding).sum(dim=-1)
72
+
73
+ # # compute covariance regularizer
74
+ # M = torch.cov(self.concept_embedding.weight, correction=0)
75
+ # l_c = (torch.norm(M, p='fro')**2 - torch.norm(torch.diag(M), p='fro')**2)/2
76
+
77
+ return y
78
+
79
+ def user_tower(self, x):
80
+ if self.mode == "item":
81
+ return None
82
+
83
+
84
+ # sparse interests extraction
85
+ # # user specific historical item embedding X^u
86
+ hist_item = x[self.history_features[0]]
87
+ x_u = self.item_embedding(hist_item) + \
88
+ self.position_embedding.weight.unsqueeze(0)
89
+ x_u_mask = (x[self.history_features[0]] > 0).long()
90
+
91
+ # # user specific conceptual prototypes C^u
92
+ # ## attention a
93
+ h_1 = einsum('bse, ed -> bsd', x_u, self.w_1).tanh()
94
+ a_hist = F.softmax(einsum('bsd, dh -> bsh', h_1, self.w_2) + -1.e9 * (1 - x_u_mask.unsqueeze(-1).float()), dim=1)
95
+
96
+ # ## virtual concept vector z_u
97
+ z_u = einsum("bse, bsh -> be", x_u, a_hist)
98
+
99
+ # ## similarity between user's concept vector and entire conceptual prototypes s^u
100
+ s_u = einsum("be, te -> bt", z_u, self.concept_embedding.weight)
101
+ s_u_top_k = torch.topk(s_u, self.num_intention)
102
+
103
+ # ## final C^u
104
+ c_u = einsum("bk, bke -> bke", torch.sigmoid(s_u_top_k.values), self.concept_embedding(s_u_top_k.indices))
105
+
106
+ # # user intention assignment P_{k|t}
107
+ p_u = F.softmax(einsum("bse, bke -> bks", F.normalize(x_u @ self.w_3, dim=-1), F.normalize(c_u, p=2, dim=-1)), dim=1)
108
+
109
+ # # attention weighing P_{t|k}
110
+ h_2 = einsum('bse, ed -> bsd', x_u, self.w_k1).tanh()
111
+ a_concept_k = F.softmax(einsum('bsd, dk -> bsk', h_2, self.w_k2) + -1.e9 * (1 - x_u_mask.unsqueeze(-1).float()), dim=1)
112
+
113
+ # # multiple interests encoding \phi_\theta^k(x^u)
114
+ phi_u = einsum("bks, bse -> bke", p_u * a_concept_k.permute(0, 2, 1), x_u)
115
+
116
+ # adaptive interest aggregation
117
+ # # intention aware input behavior \hat{X^u}
118
+ x_u_hat = einsum('bks, bke -> bse', p_u, c_u)
119
+
120
+ # # user's next intention C^u_{apt}
121
+ h_3 = einsum('bse, ed -> bsd', x_u_hat, self.w_4).tanh()
122
+ c_u_apt = F.normalize(einsum("bs, bse -> be", F.softmax(einsum('bsd, dh -> bsh', h_3, self.w_5).reshape(-1, self.seq_max_len) + -1.e9 * (1 - x_u_mask.float()), dim=1), x_u_hat), -1)
123
+
124
+ # # aggregation weights e_k^u
125
+ e_u = F.softmax(einsum('be, bke -> bk', c_u_apt, phi_u) / self.temperature, dim=1)
126
+
127
+ # final user representation v^u
128
+ v_u = einsum('bk, bke -> be', e_u, phi_u)
129
+
130
+ if self.mode == "user":
131
+ return v_u
132
+ return v_u.unsqueeze(1)
133
+
134
+ def item_tower(self, x):
135
+ if self.mode == "user":
136
+ return None
137
+ pos_embedding = self.item_embedding(x[self.item_features[0]]).unsqueeze(1)
138
+ if self.mode == "item": # inference embedding mode
139
+ return pos_embedding.squeeze(1) # [batch_size, embed_dim]
140
+ neg_embeddings = self.item_embedding(x[self.neg_item_features[0]]).squeeze(1) # [batch_size, n_neg_items, embed_dim]
141
+
142
+ # [batch_size, 1+n_neg_items, embed_dim]
143
+ return torch.cat((pos_embedding, neg_embeddings), dim=1)
144
+
145
+ def gen_mask(self, x):
146
+ his_list = x[self.history_features[0].name]
147
+ mask = (his_list > 0).long()
148
+ return mask
@@ -0,0 +1,81 @@
1
+ """
2
+ Date: created on 17/09/2022
3
+ References:
4
+ paper: STAMP: Short-Term Attention/Memory Priority Model for Session-based Recommendation
5
+ url: https://dl.acm.org/doi/10.1145/3219819.3219950
6
+ official Tensorflow implementation: https://github.com/uestcnlp/STAMP
7
+ Authors: Bo Kang, klinux@live.com
8
+ """
9
+
10
+ import torch
11
+ import torch.nn as nn
12
+ import torch.nn.functional as F
13
+
14
+
15
+ class STAMP(nn.Module):
16
+
17
+ def __init__(self, item_history_feature, weight_std, emb_std):
18
+ super(STAMP, self).__init__()
19
+
20
+ # item embedding layer
21
+ self.item_history_feature = item_history_feature
22
+ n_items, item_emb_dim, = item_history_feature.vocab_size, item_history_feature.embed_dim
23
+ self.item_emb = nn.Embedding(n_items, item_emb_dim, padding_idx=0)
24
+
25
+ # weights and biases for attention computation
26
+ self.w_0 = nn.Parameter(torch.zeros(item_emb_dim, 1))
27
+ self.w_1_t = nn.Parameter(torch.zeros(item_emb_dim, item_emb_dim))
28
+ self.w_2_t = nn.Parameter(torch.zeros(item_emb_dim, item_emb_dim))
29
+ self.w_3_t = nn.Parameter(torch.zeros(item_emb_dim, item_emb_dim))
30
+ self.b_a = nn.Parameter(torch.zeros(item_emb_dim))
31
+ self._init_parameter_weights(weight_std)
32
+
33
+ # mlp layers
34
+ self.f_s = nn.Sequential(nn.Tanh(), nn.Linear(item_emb_dim, item_emb_dim))
35
+ self.f_t = nn.Sequential(nn.Tanh(), nn.Linear(item_emb_dim, item_emb_dim))
36
+ self.emb_std = emb_std
37
+ self.apply(self._init_module_weights)
38
+
39
+ def _init_parameter_weights(self, weight_std):
40
+ nn.init.normal_(self.w_0, std=weight_std)
41
+ nn.init.normal_(self.w_1_t, std=weight_std)
42
+ nn.init.normal_(self.w_2_t, std=weight_std)
43
+ nn.init.normal_(self.w_3_t, std=weight_std)
44
+
45
+ def _init_module_weights(self, module):
46
+ if isinstance(module, nn.Linear):
47
+ module.weight.data.normal_(std=self.emb_std)
48
+ if module.bias is not None:
49
+ module.bias.data.zero_()
50
+ elif isinstance(module, nn.Embedding):
51
+ module.weight.data.normal_(std=self.emb_std)
52
+
53
+ def forward(self, input_dict):
54
+ # Index the embeddings for the items in the session
55
+ input = input_dict[self.item_history_feature.name]
56
+ value_mask = (input != 0).unsqueeze(-1)
57
+ value_counts = value_mask.sum(dim=1, keepdim=True).squeeze(-1)
58
+ item_emb_batch = self.item_emb(input) * value_mask
59
+
60
+ # Index the embeddings of the latest clicked items
61
+ x_t = self.item_emb(torch.gather(input, 1, value_counts - 1))
62
+
63
+ # Eq. 2, user's general interest in the current session
64
+ m_s = ((item_emb_batch).sum(1) / value_counts).unsqueeze(1)
65
+
66
+ # Eq. 7, compute attention coefficient
67
+ a = F.normalize(torch.exp(torch.sigmoid(item_emb_batch @ self.w_1_t + x_t @ self.w_2_t + m_s @ self.w_3_t + self.b_a) @ self.w_0) * value_mask, p=1, dim=1)
68
+
69
+ # Eq. 8, compute user's attention-based interests
70
+ m_a = (a * item_emb_batch).sum(1) + m_s.squeeze(1)
71
+
72
+ # Eq. 3, compute the output state of the general interest
73
+ h_s = self.f_s(m_a)
74
+
75
+ # Eq. 9, compute the output state of the short-term interest
76
+ h_t = self.f_t(x_t).squeeze(1)
77
+
78
+ # Eq. 4, compute candidate scores
79
+ z = h_s * h_t @ self.item_emb.weight.T
80
+
81
+ return z
@@ -0,0 +1,75 @@
1
+ """
2
+ Date: create on 23/05/2022
3
+ References:
4
+ paper: (RecSys'2016) Deep Neural Networks for YouTube Recommendations
5
+ url: https://dl.acm.org/doi/10.1145/2959100.2959190
6
+ Authors: Mincai Lai, laimincai@shanghaitech.edu.cn
7
+ """
8
+
9
+ import torch
10
+ import torch.nn.functional as F
11
+
12
+ from ...basic.layers import MLP, EmbeddingLayer
13
+
14
+
15
+ class YoutubeDNN(torch.nn.Module):
16
+ """The match model mentioned in `Deep Neural Networks for YouTube Recommendations` paper.
17
+ It's a DSSM match model trained by global softmax loss on list-wise samples.
18
+ Note in origin paper, it's without item dnn tower and train item embedding directly.
19
+
20
+ Args:
21
+ user_features (list[Feature Class]): training by the user tower module.
22
+ item_features (list[Feature Class]): training by the embedding table, it's the item id feature.
23
+ neg_item_feature (list[Feature Class]): training by the embedding table, it's the negative items id feature.
24
+ user_params (dict): the params of the User Tower module, keys include:`{"dims":list, "activation":str, "dropout":float, "output_layer":bool`}.
25
+ temperature (float): temperature factor for similarity score, default to 1.0.
26
+ """
27
+
28
+ def __init__(self, user_features, item_features, neg_item_feature, user_params, temperature=1.0):
29
+ super().__init__()
30
+ self.user_features = user_features
31
+ self.item_features = item_features
32
+ self.neg_item_feature = neg_item_feature
33
+ self.temperature = temperature
34
+ self.user_dims = sum([fea.embed_dim for fea in user_features])
35
+ self.embedding = EmbeddingLayer(user_features + item_features)
36
+ self.user_mlp = MLP(self.user_dims, output_layer=False, **user_params)
37
+ self.mode = None
38
+
39
+ def forward(self, x):
40
+ user_embedding = self.user_tower(x)
41
+ item_embedding = self.item_tower(x)
42
+ if self.mode == "user":
43
+ return user_embedding
44
+ if self.mode == "item":
45
+ return item_embedding
46
+
47
+
48
+ # calculate cosine score
49
+ y = torch.mul(user_embedding, item_embedding).sum(dim=2)
50
+ y = y / self.temperature
51
+ return y
52
+
53
+ def user_tower(self, x):
54
+ if self.mode == "item":
55
+ return None
56
+ # [batch_size, num_features*deep_dims]
57
+ input_user = self.embedding(x, self.user_features, squeeze_dim=True)
58
+ user_embedding = self.user_mlp(input_user).unsqueeze(1) # [batch_size, 1, embed_dim]
59
+ user_embedding = F.normalize(user_embedding, p=2, dim=2)
60
+ if self.mode == "user":
61
+ # inference embedding mode -> [batch_size, embed_dim]
62
+ return user_embedding.squeeze(1)
63
+ return user_embedding
64
+
65
+ def item_tower(self, x):
66
+ if self.mode == "user":
67
+ return None
68
+ pos_embedding = self.embedding(x, self.item_features, squeeze_dim=False) # [batch_size, 1, embed_dim]
69
+ pos_embedding = F.normalize(pos_embedding, p=2, dim=2)
70
+ if self.mode == "item": # inference embedding mode
71
+ return pos_embedding.squeeze(1) # [batch_size, embed_dim]
72
+ neg_embeddings = self.embedding(x, self.neg_item_feature, squeeze_dim=False).squeeze(1) # [batch_size, n_neg_items, embed_dim]
73
+ neg_embeddings = F.normalize(neg_embeddings, p=2, dim=2)
74
+ # [batch_size, 1+n_neg_items, embed_dim]
75
+ return torch.cat((pos_embedding, neg_embeddings), dim=1)
@@ -0,0 +1,98 @@
1
+ """
2
+ Date: create on 23/05/2022
3
+ References:
4
+ paper: (RecSys'2019) Sampling-Bias-Corrected Neural Modeling for Large Corpus Item Recommendations
5
+ url: https://dl.acm.org/doi/10.1145/3298689.3346996
6
+ Authors: Mincai Lai, laimincai@shanghaitech.edu.cn
7
+ """
8
+
9
+ import numpy as np
10
+ import torch
11
+ import torch.nn.functional as F
12
+
13
+ from ...basic.layers import MLP, EmbeddingLayer
14
+
15
+
16
+ class YoutubeSBC(torch.nn.Module):
17
+ """Sampling-Bias-Corrected Neural Modeling for Matching by Youtube.
18
+ It's a DSSM match model trained by In-batch softmax loss on list-wise samples, and add sample debias module.
19
+
20
+ Args:
21
+ user_features (list[Feature Class]): training by the user tower module.
22
+ item_features (list[Feature Class]): training by the item tower module.
23
+ sample_weight_feature (list[Feature Class]): used for sampling bias corrected in training.
24
+ user_params (dict): the params of the User Tower module, keys include:`{"dims":list, "activation":str, "dropout":float, "output_layer":bool`}.
25
+ item_params (dict): the params of the Item Tower module, keys include:`{"dims":list, "activation":str, "dropout":float, "output_layer":bool`}.
26
+ batch_size (int): same as batch size of DataLoader, used in in-batch sampling
27
+ n_neg (int): the number of negative sample for every positive sample, default to 3. Note it's must smaller than batch_size.
28
+ temperature (float): temperature factor for similarity score, default to 1.0.
29
+ """
30
+
31
+ def __init__(self, user_features, item_features, sample_weight_feature, user_params, item_params, batch_size, n_neg=3, temperature=1.0):
32
+ super().__init__()
33
+ self.user_features = user_features
34
+ self.item_features = item_features
35
+ self.sample_weight_feature = sample_weight_feature
36
+ self.n_neg = n_neg
37
+ self.temperature = temperature
38
+ self.user_dims = sum([fea.embed_dim for fea in user_features])
39
+ self.item_dims = sum([fea.embed_dim for fea in item_features])
40
+ self.batch_size = batch_size
41
+ self.embedding = EmbeddingLayer(user_features + item_features + sample_weight_feature)
42
+ self.user_mlp = MLP(self.user_dims, output_layer=False, **user_params)
43
+ self.item_mlp = MLP(self.item_dims, output_layer=False, **item_params)
44
+ self.mode = None
45
+
46
+ # in-batch sampling index
47
+ self.index0 = np.repeat(np.arange(batch_size), n_neg + 1)
48
+ self.index1 = np.concatenate([np.arange(i, i + n_neg + 1) for i in range(batch_size)])
49
+ self.index1[np.where(self.index1 >= batch_size)] -= batch_size
50
+
51
+ def forward(self, x):
52
+ user_embedding = self.user_tower(x) # (batch_size, embedding_dim)
53
+ item_embedding = self.item_tower(x) # (batch_size, embedding_dim)
54
+ if self.mode == "user":
55
+ return user_embedding
56
+ if self.mode == "item":
57
+ return item_embedding
58
+
59
+
60
+ # pred[i, j] means predicted score that user_i give to item_j
61
+ pred = torch.cosine_similarity(user_embedding.unsqueeze(1), item_embedding, dim=2) # (batch_size, batch_size)
62
+
63
+ # get sample weight of items in this batch
64
+ sample_weight = self.embedding(x, self.sample_weight_feature, squeeze_dim=True).squeeze(1) # (batch_size)
65
+ # Sampling Bias Corrected, using broadcast. (batch_size, batch_size)
66
+ scores = pred - torch.log(sample_weight)
67
+
68
+ if user_embedding.shape[0] * (self.n_neg + 1) != self.index0.shape[0]: # last batch
69
+ batch_size = user_embedding.shape[0]
70
+ index0 = self.index0[:batch_size * (self.n_neg + 1)]
71
+ index1 = self.index1[:batch_size * (self.n_neg + 1)]
72
+ index0[np.where(index0 >= batch_size)] -= batch_size
73
+ index1[np.where(index1 >= batch_size)] -= batch_size
74
+ scores = scores[index0, index1] # (batch_size, 1 + self.n_neg)
75
+ else:
76
+ # (batch_size, 1 + self.n_neg)
77
+ scores = scores[self.index0, self.index1]
78
+
79
+ scores = scores / self.temperature
80
+ return scores.view(-1, self.n_neg + 1) # (batch_size, 1 + self.n_neg)
81
+
82
+ def user_tower(self, x):
83
+ if self.mode == "item":
84
+ return None
85
+ # [batch_size, num_features*deep_dims]
86
+ input_user = self.embedding(x, self.user_features, squeeze_dim=True)
87
+ # [batch_size, user_params["dims"][-1]]
88
+ user_embedding = self.user_mlp(input_user)
89
+ return user_embedding
90
+
91
+ def item_tower(self, x):
92
+ if self.mode == "user":
93
+ return None
94
+ # [batch_size, num_features*embed_dim]
95
+ input_item = self.embedding(x, self.item_features, squeeze_dim=True)
96
+ # [batch_size, item_params["dims"][-1]]
97
+ item_embedding = self.item_mlp(input_item)
98
+ return item_embedding
@@ -1,4 +1,7 @@
1
- from .shared_bottom import SharedBottom
1
+ __all__ = ['SharedBottom', 'ESMM', 'MMOE', 'PLE', 'AITM']
2
+
3
+ from .aitm import AITM
2
4
  from .esmm import ESMM
3
5
  from .mmoe import MMOE
4
- from .ple import PLE
6
+ from .ple import PLE
7
+ from .shared_bottom import SharedBottom
@@ -0,0 +1,83 @@
1
+ """
2
+ Date: create on 14/05/2022
3
+ References:
4
+ paper: (KDD'2021) Modeling the Sequential Dependence among Audience Multi-step Conversions with Multi-task Learning in Targeted Display Advertising
5
+ url: https://arxiv.org/abs/2105.08489
6
+ code: https://github.com/adtalos/AITM-torch
7
+ Authors: Mincai Lai, laimincai@shanghaitech.edu.cn
8
+ """
9
+
10
+ import torch
11
+ import torch.nn as nn
12
+
13
+ from ...basic.layers import MLP, EmbeddingLayer
14
+
15
+
16
+ class AITM(nn.Module):
17
+ """ Adaptive Information Transfer Multi-task (AITM) framework.
18
+ all the task type must be binary classificatioon.
19
+
20
+ Args:
21
+ features (list[Feature Class]): training by the whole module.
22
+ n_task (int): the number of binary classificatioon task.
23
+ bottom_params (dict): the params of all the botwer expert module, keys include:`{"dims":list, "activation":str, "dropout":float}`.
24
+ tower_params_list (list): the list of tower params dict, the keys same as expert_params.
25
+ """
26
+
27
+ def __init__(self, features, n_task, bottom_params, tower_params_list):
28
+ super().__init__()
29
+ self.features = features
30
+ self.n_task = n_task
31
+ self.input_dims = sum([fea.embed_dim for fea in features])
32
+ self.embedding = EmbeddingLayer(features)
33
+
34
+ self.bottoms = nn.ModuleList(MLP(self.input_dims, output_layer=False, **bottom_params) for i in range(self.n_task))
35
+ self.towers = nn.ModuleList(MLP(bottom_params["dims"][-1], **tower_params_list[i]) for i in range(self.n_task))
36
+
37
+ self.info_gates = nn.ModuleList(MLP(bottom_params["dims"][-1], output_layer=False, dims=[bottom_params["dims"][-1]]) for i in range(self.n_task - 1))
38
+ self.aits = nn.ModuleList(AttentionLayer(bottom_params["dims"][-1]) for _ in range(self.n_task - 1))
39
+
40
+ def forward(self, x):
41
+ embed_x = self.embedding(x, self.features, squeeze_dim=True) # [batch_size, *]
42
+ input_towers = [self.bottoms[i](embed_x) for i in range(self.n_task)] # [i]:[batch_size, bottom_dims[-1]]
43
+ for i in range(1, self.n_task): # for task 1:n-1
44
+ # [batch_size,1,bottom_dims[-1]]
45
+ info = self.info_gates[i - 1](input_towers[i - 1]).unsqueeze(1)
46
+ # [batch_size, 2, bottom_dims[-1]]
47
+ ait_input = torch.cat([input_towers[i].unsqueeze(1), info], dim=1)
48
+ input_towers[i] = self.aits[i - 1](ait_input)
49
+
50
+ ys = []
51
+ for input_tower, tower in zip(input_towers, self.towers):
52
+ y = tower(input_tower)
53
+ ys.append(torch.sigmoid(y))
54
+ return torch.cat(ys, dim=1)
55
+
56
+
57
+ class AttentionLayer(nn.Module):
58
+ """attention for info tranfer
59
+
60
+ Args:
61
+ dim (int): attention dim
62
+
63
+ Shape:
64
+ Input: (batch_size, 2, dim)
65
+ Output: (batch_size, dim)
66
+ """
67
+
68
+ def __init__(self, dim=32):
69
+ super().__init__()
70
+ self.dim = dim
71
+ self.q_layer = nn.Linear(dim, dim, bias=False)
72
+ self.k_layer = nn.Linear(dim, dim, bias=False)
73
+ self.v_layer = nn.Linear(dim, dim, bias=False)
74
+ self.softmax = nn.Softmax(dim=1)
75
+
76
+ def forward(self, x):
77
+ Q = self.q_layer(x)
78
+ K = self.k_layer(x)
79
+ V = self.v_layer(x)
80
+ a = torch.sum(torch.mul(Q, K), -1) / torch.sqrt(torch.tensor(self.dim))
81
+ a = self.softmax(a)
82
+ outputs = torch.sum(torch.mul(torch.unsqueeze(a, -1), V), dim=1)
83
+ return outputs
@@ -1,7 +1,7 @@
1
1
  """
2
2
  Date: create on 04/05/2022
3
- References:
4
- paper: (SIGIR'2018) Entire Space Multi-Task Model: An Effective Approach for Estimating Post-Click Conversion Rate
3
+ References:
4
+ paper: (SIGIR'2018) Entire Space Multi-Task Model: An Effective Approach for Estimating Post-Click Conversion Rate
5
5
  url: https://arxiv.org/abs/1804.07931
6
6
  Authors: Mincai Lai, laimincai@shanghaitech.edu.cn
7
7
  """
@@ -18,8 +18,8 @@ class ESMM(nn.Module):
18
18
  Args:
19
19
  user_features (list): the list of `Feature Class`, training by shared bottom and tower module. It means the user features.
20
20
  item_features (list): the list of `Feature Class`, training by shared bottom and tower module. It means the item features.
21
- cvr_params (dict): the params of the CVR Tower module, keys include:`{"dims":list, "activation":str, "dropout":float, "output_layer":bool`}
22
- ctr_params (dict): the params of the CTR Tower module, keys include:`{"dims":list, "activation":str, "dropout":float, "output_layer":bool`}
21
+ cvr_params (dict): the params of the CVR Tower module, keys include:`{"dims":list, "activation":str, "dropout":float`}
22
+ ctr_params (dict): the params of the CTR Tower module, keys include:`{"dims":list, "activation":str, "dropout":float`}
23
23
  """
24
24
 
25
25
  def __init__(self, user_features, item_features, cvr_params, ctr_params):
@@ -27,19 +27,30 @@ class ESMM(nn.Module):
27
27
  self.user_features = user_features
28
28
  self.item_features = item_features
29
29
  self.embedding = EmbeddingLayer(user_features + item_features)
30
- self.tower_dims = user_features[0].embed_dim + item_features[0].embed_dim #the dims after Field-wise Pooling Layer
30
+ self.tower_dims = len(user_features) * user_features[0].embed_dim + len(item_features) * item_features[0].embed_dim
31
31
  self.tower_cvr = MLP(self.tower_dims, **cvr_params)
32
32
  self.tower_ctr = MLP(self.tower_dims, **ctr_params)
33
33
 
34
34
  def forward(self, x):
35
- embed_user_features = self.embedding(x, self.user_features, squeeze_dim=False).sum(dim=1) #[batch_size, embed_dim]
36
- embed_item_features = self.embedding(x, self.item_features, squeeze_dim=False).sum(dim=1) #[batch_size, embed_dim]
35
+ # # Field-wise Pooling Layer for user and item
36
+ # embed_user_features = self.embedding(x, self.user_features, squeeze_dim=False).sum(dim=1) #[batch_size, embed_dim]
37
+ # embed_item_features = self.embedding(x, self.item_features,
38
+ # squeeze_dim=False).sum(dim=1) #[batch_size, embed_dim]
39
+
40
+ # Here we concat all the features instead of field-wise pooling them
41
+ # [batch_size, num_features, embed_dim] --> [batch_size, num_features * embed_dim]
42
+ _batch_size = self.embedding(x, self.user_features, squeeze_dim=False).shape[0]
43
+ embed_user_features = self.embedding(x, self.user_features, squeeze_dim=False).reshape(_batch_size, -1)
44
+ embed_item_features = self.embedding(x, self.item_features, squeeze_dim=False).reshape(_batch_size, -1)
45
+
46
+ # print('embed_user_features', embed_user_features.shape)
47
+
37
48
  input_tower = torch.cat((embed_user_features, embed_item_features), dim=1)
38
49
  cvr_logit = self.tower_cvr(input_tower)
39
50
  ctr_logit = self.tower_ctr(input_tower)
40
51
  cvr_pred = torch.sigmoid(cvr_logit)
41
52
  ctr_pred = torch.sigmoid(ctr_logit)
42
- ctcvr_pred = torch.mul(cvr_pred, cvr_pred)
53
+ ctcvr_pred = torch.mul(ctr_pred, cvr_pred)
43
54
 
44
55
  ys = [cvr_pred, ctr_pred, ctcvr_pred]
45
56
  return torch.cat(ys, dim=1)
@@ -1,6 +1,6 @@
1
1
  """
2
2
  Date: create on 04/05/2022
3
- References:
3
+ References:
4
4
  paper: (KDD'2018) Modeling Task Relationships in Multi-task Learning with Multi-gate Mixture-of-Experts
5
5
  url: https://dl.acm.org/doi/pdf/10.1145/3219819.3220007
6
6
  Authors: Mincai Lai, laimincai@shanghaitech.edu.cn
@@ -19,7 +19,7 @@ class MMOE(nn.Module):
19
19
  features (list): the list of `Feature Class`, training by the expert and tower module.
20
20
  task_types (list): types of tasks, only support `["classfication", "regression"]`.
21
21
  n_expert (int): the number of expert net.
22
- expert_params (dict): the params of all the expert module, keys include:`{"dims":list, "activation":str, "dropout":float}, keep `{"output_layer":False}`.
22
+ expert_params (dict): the params of all the expert module, keys include:`{"dims":list, "activation":str, "dropout":float}.
23
23
  tower_params_list (list): the list of tower params dict, the keys same as expert_params.
24
24
  """
25
25
 
@@ -31,22 +31,28 @@ class MMOE(nn.Module):
31
31
  self.n_expert = n_expert
32
32
  self.embedding = EmbeddingLayer(features)
33
33
  self.input_dims = sum([fea.embed_dim for fea in features])
34
- self.experts = nn.ModuleList(MLP(self.input_dims, **{**expert_params, **{"output_layer": False}}) for i in range(self.n_expert))
35
- self.gates = nn.ModuleList(MLP(self.input_dims, **{"dims": [self.n_expert], "activation": "softmax", "output_layer": False}) for i in range(self.n_task)) #n_gate = n_task
34
+ self.experts = nn.ModuleList(MLP(self.input_dims, output_layer=False, **expert_params) for i in range(self.n_expert))
35
+ self.gates = nn.ModuleList(MLP(self.input_dims, output_layer=False, **{"dims": [self.n_expert], "activation": "softmax"}) for i in range(self.n_task)) # n_gate = n_task
36
36
  self.towers = nn.ModuleList(MLP(expert_params["dims"][-1], **tower_params_list[i]) for i in range(self.n_task))
37
37
  self.predict_layers = nn.ModuleList(PredictionLayer(task_type) for task_type in task_types)
38
38
 
39
39
  def forward(self, x):
40
- embed_x = self.embedding(x, self.features, squeeze_dim=True) #[batch_size, input_dims]
41
- expert_outs = [expert(embed_x).unsqueeze(1) for expert in self.experts] #expert_out[i]: [batch_size, 1, expert_dims[-1]]
42
- expert_outs = torch.cat(expert_outs, dim=1) #[batch_size, n_expert, expert_dims[-1]]
43
- gate_outs = [gate(embed_x).unsqueeze(-1) for gate in self.gates] #gate_out[i]: [batch_size, n_expert, 1]
40
+ # [batch_size, input_dims]
41
+ embed_x = self.embedding(x, self.features, squeeze_dim=True)
42
+ # expert_out[i]: [batch_size, 1, expert_dims[-1]]
43
+ expert_outs = [expert(embed_x).unsqueeze(1) for expert in self.experts]
44
+ # [batch_size, n_expert, expert_dims[-1]]
45
+ expert_outs = torch.cat(expert_outs, dim=1)
46
+ # gate_out[i]: [batch_size, n_expert, 1]
47
+ gate_outs = [gate(embed_x).unsqueeze(-1) for gate in self.gates]
44
48
 
45
49
  ys = []
46
50
  for gate_out, tower, predict_layer in zip(gate_outs, self.towers, self.predict_layers):
47
- expert_weight = torch.mul(gate_out, expert_outs) #[batch_size, n_expert, expert_dims[-1]]
48
- expert_pooling = torch.sum(expert_weight, dim=1) #[batch_size, expert_dims[-1]]
49
- tower_out = tower(expert_pooling) #[batch_size, 1]
50
- y = predict_layer(tower_out) #logit -> proba
51
+ # [batch_size, n_expert, expert_dims[-1]]
52
+ expert_weight = torch.mul(gate_out, expert_outs)
53
+ # [batch_size, expert_dims[-1]]
54
+ expert_pooling = torch.sum(expert_weight, dim=1)
55
+ tower_out = tower(expert_pooling) # [batch_size, 1]
56
+ y = predict_layer(tower_out) # logit -> proba
51
57
  ys.append(y)
52
58
  return torch.cat(ys, dim=1)