torch-rechub 0.0.3__py3-none-any.whl → 0.0.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. torch_rechub/__init__.py +14 -0
  2. torch_rechub/basic/activation.py +54 -54
  3. torch_rechub/basic/callback.py +33 -33
  4. torch_rechub/basic/features.py +87 -94
  5. torch_rechub/basic/initializers.py +92 -92
  6. torch_rechub/basic/layers.py +994 -720
  7. torch_rechub/basic/loss_func.py +223 -34
  8. torch_rechub/basic/metaoptimizer.py +76 -72
  9. torch_rechub/basic/metric.py +251 -250
  10. torch_rechub/models/generative/__init__.py +6 -0
  11. torch_rechub/models/generative/hllm.py +249 -0
  12. torch_rechub/models/generative/hstu.py +189 -0
  13. torch_rechub/models/matching/__init__.py +13 -11
  14. torch_rechub/models/matching/comirec.py +193 -188
  15. torch_rechub/models/matching/dssm.py +72 -66
  16. torch_rechub/models/matching/dssm_facebook.py +77 -79
  17. torch_rechub/models/matching/dssm_senet.py +28 -16
  18. torch_rechub/models/matching/gru4rec.py +85 -87
  19. torch_rechub/models/matching/mind.py +103 -101
  20. torch_rechub/models/matching/narm.py +82 -76
  21. torch_rechub/models/matching/sasrec.py +143 -140
  22. torch_rechub/models/matching/sine.py +148 -151
  23. torch_rechub/models/matching/stamp.py +81 -83
  24. torch_rechub/models/matching/youtube_dnn.py +75 -71
  25. torch_rechub/models/matching/youtube_sbc.py +98 -98
  26. torch_rechub/models/multi_task/__init__.py +7 -5
  27. torch_rechub/models/multi_task/aitm.py +83 -84
  28. torch_rechub/models/multi_task/esmm.py +56 -55
  29. torch_rechub/models/multi_task/mmoe.py +58 -58
  30. torch_rechub/models/multi_task/ple.py +116 -130
  31. torch_rechub/models/multi_task/shared_bottom.py +45 -45
  32. torch_rechub/models/ranking/__init__.py +14 -11
  33. torch_rechub/models/ranking/afm.py +65 -63
  34. torch_rechub/models/ranking/autoint.py +102 -0
  35. torch_rechub/models/ranking/bst.py +61 -63
  36. torch_rechub/models/ranking/dcn.py +38 -38
  37. torch_rechub/models/ranking/dcn_v2.py +59 -69
  38. torch_rechub/models/ranking/deepffm.py +131 -123
  39. torch_rechub/models/ranking/deepfm.py +43 -42
  40. torch_rechub/models/ranking/dien.py +191 -191
  41. torch_rechub/models/ranking/din.py +93 -91
  42. torch_rechub/models/ranking/edcn.py +101 -117
  43. torch_rechub/models/ranking/fibinet.py +42 -50
  44. torch_rechub/models/ranking/widedeep.py +41 -41
  45. torch_rechub/trainers/__init__.py +4 -3
  46. torch_rechub/trainers/ctr_trainer.py +288 -128
  47. torch_rechub/trainers/match_trainer.py +336 -170
  48. torch_rechub/trainers/matching.md +3 -0
  49. torch_rechub/trainers/mtl_trainer.py +356 -207
  50. torch_rechub/trainers/seq_trainer.py +427 -0
  51. torch_rechub/utils/data.py +492 -360
  52. torch_rechub/utils/hstu_utils.py +198 -0
  53. torch_rechub/utils/match.py +457 -274
  54. torch_rechub/utils/model_utils.py +233 -0
  55. torch_rechub/utils/mtl.py +136 -126
  56. torch_rechub/utils/onnx_export.py +220 -0
  57. torch_rechub/utils/visualization.py +271 -0
  58. torch_rechub-0.0.5.dist-info/METADATA +402 -0
  59. torch_rechub-0.0.5.dist-info/RECORD +64 -0
  60. {torch_rechub-0.0.3.dist-info → torch_rechub-0.0.5.dist-info}/WHEEL +1 -2
  61. {torch_rechub-0.0.3.dist-info → torch_rechub-0.0.5.dist-info/licenses}/LICENSE +21 -21
  62. torch_rechub-0.0.3.dist-info/METADATA +0 -177
  63. torch_rechub-0.0.3.dist-info/RECORD +0 -55
  64. torch_rechub-0.0.3.dist-info/top_level.txt +0 -1
@@ -1,151 +1,148 @@
1
- """
2
- Date: created on 03/07/2022
3
- References:
4
- paper: Sparse-Interest Network for Sequential Recommendation
5
- url: https://arxiv.org/abs/2102.09267
6
- code: https://github.com/Qiaoyut/SINE/blob/master/model.py
7
- Authors: Bo Kang, klinux@live.com
8
- """
9
-
10
- import torch
11
- import torch.nn.functional as F
12
- from torch import einsum
13
-
14
- class SINE(torch.nn.Module):
15
- """The match model was proposed in `Sparse-Interest Network for Sequential Recommendation` paper.
16
-
17
- Args:
18
- history_features (list[str]): training history feature names, this is for indexing the historical sequences from input dictionary
19
- item_features (list[str]): item feature names, this is for indexing the items from input dictionary
20
- neg_item_features (list[str]): neg item feature names, this for indexing negative items from input dictionary
21
- num_items (int): number of items in the data
22
- embedding_dim (int): dimensionality of the embeddings
23
- hidden_dim (int): dimensionality of the hidden layer in self attention modules
24
- num_concept (int): number of concept, also called conceptual prototypes
25
- num_intention (int): number of (user) specific intentions out of the concepts
26
- seq_max_len (int): max sequence length of input item sequence
27
- num_heads int): number of attention heads in self attention modules, default to 1
28
- temperature (float): temperature factor in the similarity measure, default to 1.0
29
- """
30
- def __init__(self, history_features, item_features, neg_item_features, num_items, embedding_dim, hidden_dim, num_concept, num_intention, seq_max_len, num_heads=1, temperature=1.0):
31
- super().__init__()
32
- self.item_features = item_features
33
- self.history_features = history_features
34
- self.neg_item_features = neg_item_features
35
- self.temperature = temperature
36
- self.num_concept = num_concept
37
- self.num_intention = num_intention
38
- self.seq_max_len = seq_max_len
39
-
40
- std = 1e-4
41
- self.item_embedding = torch.nn.Embedding(num_items, embedding_dim)
42
- torch.nn.init.normal_(self.item_embedding.weight, 0, std)
43
- self.concept_embedding = torch.nn.Embedding(num_concept, embedding_dim)
44
- torch.nn.init.normal_(self.concept_embedding.weight, 0, std)
45
- self.position_embedding = torch.nn.Embedding(seq_max_len, embedding_dim)
46
- torch.nn.init.normal_(self.position_embedding.weight, 0, std)
47
-
48
-
49
- self.w_1 = torch.nn.Parameter(torch.rand(embedding_dim, hidden_dim), requires_grad=True)
50
- self.w_2 = torch.nn.Parameter(torch.rand(hidden_dim, num_heads), requires_grad=True)
51
-
52
- self.w_3 = torch.nn.Parameter(torch.rand(embedding_dim, embedding_dim), requires_grad=True)
53
-
54
- self.w_k1 = torch.nn.Parameter(torch.rand(embedding_dim, hidden_dim), requires_grad=True)
55
- self.w_k2 = torch.nn.Parameter(torch.rand(hidden_dim, num_intention), requires_grad=True)
56
-
57
- self.w_4 = torch.nn.Parameter(torch.rand(embedding_dim, hidden_dim), requires_grad=True)
58
- self.w_5 = torch.nn.Parameter(torch.rand(hidden_dim, num_heads), requires_grad=True)
59
-
60
- self.mode = None
61
-
62
- def forward(self, x):
63
- user_embedding = self.user_tower(x)
64
- item_embedding = self.item_tower(x)
65
- if self.mode == "user":
66
- return user_embedding
67
- if self.mode == "item":
68
- return item_embedding
69
-
70
- y = torch.mul(user_embedding, item_embedding).sum(dim=-1)
71
-
72
- # # compute covariance regularizer
73
- # M = torch.cov(self.concept_embedding.weight, correction=0)
74
- # l_c = (torch.norm(M, p='fro')**2 - torch.norm(torch.diag(M), p='fro')**2)/2
75
-
76
- return y
77
-
78
- def user_tower(self, x):
79
- if self.mode == "item":
80
- return None
81
-
82
- # sparse interests extraction
83
- ## user specific historical item embedding X^u
84
- hist_item = x[self.history_features[0]]
85
- x_u = self.item_embedding(hist_item) + self.position_embedding.weight.unsqueeze(0)
86
- x_u_mask = (x[self.history_features[0]] > 0).long()
87
-
88
- ## user specific conceptual prototypes C^u
89
- ### attention a
90
- h_1 = einsum('bse, ed -> bsd', x_u, self.w_1).tanh()
91
- a_hist = F.softmax(einsum('bsd, dh -> bsh', h_1, self.w_2) + -1.e9 * (1 - x_u_mask.unsqueeze(-1).float()), dim=1)
92
-
93
- ### virtual concept vector z_u
94
- z_u = einsum("bse, bsh -> be", x_u, a_hist)
95
-
96
- ### similarity between user's concept vector and entire conceptual prototypes s^u
97
- s_u = einsum("be, te -> bt", z_u, self.concept_embedding.weight)
98
- s_u_top_k = torch.topk(s_u, self.num_intention)
99
-
100
- ### final C^u
101
- c_u = einsum("bk, bke -> bke", torch.sigmoid(s_u_top_k.values), self.concept_embedding(s_u_top_k.indices))
102
-
103
- ## user intention assignment P_{k|t}
104
- p_u = F.softmax(einsum("bse, bke -> bks", F.normalize(x_u @ self.w_3, dim=-1), F.normalize(c_u, p=2, dim=-1)), dim=1)
105
-
106
- ## attention weighing P_{t|k}
107
- h_2 = einsum('bse, ed -> bsd', x_u, self.w_k1).tanh()
108
- a_concept_k = F.softmax(einsum('bsd, dk -> bsk', h_2, self.w_k2) + -1.e9 * (1 - x_u_mask.unsqueeze(-1).float()), dim=1)
109
-
110
- ## multiple interests encoding \phi_\theta^k(x^u)
111
- phi_u = einsum("bks, bse -> bke", p_u * a_concept_k.permute(0, 2, 1), x_u)
112
-
113
-
114
- # adaptive interest aggregation
115
- ## intention aware input behavior \hat{X^u}
116
- x_u_hat = einsum('bks, bke -> bse', p_u, c_u)
117
-
118
- ## user's next intention C^u_{apt}
119
- h_3 = einsum('bse, ed -> bsd', x_u_hat, self.w_4).tanh()
120
- c_u_apt = F.normalize(
121
- einsum(
122
- "bs, bse -> be",
123
- F.softmax(einsum('bsd, dh -> bsh', h_3, self.w_5).reshape(-1, self.seq_max_len) + -1.e9 * (1 - x_u_mask.float()), dim=1),
124
- x_u_hat
125
- ), -1)
126
-
127
- ## aggregation weights e_k^u
128
- e_u = F.softmax(einsum('be, bke -> bk', c_u_apt, phi_u) / self.temperature, dim=1)
129
-
130
-
131
- # final user representation v^u
132
- v_u = einsum('bk, bke -> be', e_u, phi_u)
133
-
134
- if self.mode == "user":
135
- return v_u
136
- return v_u.unsqueeze(1)
137
-
138
- def item_tower(self, x):
139
- if self.mode == "user":
140
- return None
141
- pos_embedding = self.item_embedding(x[self.item_features[0]]).unsqueeze(1)
142
- if self.mode == "item": #inference embedding mode
143
- return pos_embedding.squeeze(1) #[batch_size, embed_dim]
144
- neg_embeddings = self.item_embedding(x[self.neg_item_features[0]]).squeeze(1) #[batch_size, n_neg_items, embed_dim]
145
-
146
- return torch.cat((pos_embedding, neg_embeddings), dim=1) #[batch_size, 1+n_neg_items, embed_dim]
147
-
148
- def gen_mask(self, x):
149
- his_list = x[self.history_features[0].name]
150
- mask = (his_list > 0).long()
151
- return mask
1
+ """
2
+ Date: created on 03/07/2022
3
+ References:
4
+ paper: Sparse-Interest Network for Sequential Recommendation
5
+ url: https://arxiv.org/abs/2102.09267
6
+ code: https://github.com/Qiaoyut/SINE/blob/master/model.py
7
+ Authors: Bo Kang, klinux@live.com
8
+ """
9
+
10
+ import torch
11
+ import torch.nn.functional as F
12
+ from torch import einsum
13
+
14
+
15
+ class SINE(torch.nn.Module):
16
+ """The match model was proposed in `Sparse-Interest Network for Sequential Recommendation` paper.
17
+
18
+ Args:
19
+ history_features (list[str]): training history feature names, this is for indexing the historical sequences from input dictionary
20
+ item_features (list[str]): item feature names, this is for indexing the items from input dictionary
21
+ neg_item_features (list[str]): neg item feature names, this for indexing negative items from input dictionary
22
+ num_items (int): number of items in the data
23
+ embedding_dim (int): dimensionality of the embeddings
24
+ hidden_dim (int): dimensionality of the hidden layer in self attention modules
25
+ num_concept (int): number of concept, also called conceptual prototypes
26
+ num_intention (int): number of (user) specific intentions out of the concepts
27
+ seq_max_len (int): max sequence length of input item sequence
28
+ num_heads (int): number of attention heads in self attention modules, default to 1
29
+ temperature (float): temperature factor in the similarity measure, default to 1.0
30
+ """
31
+
32
+ def __init__(self, history_features, item_features, neg_item_features, num_items, embedding_dim, hidden_dim, num_concept, num_intention, seq_max_len, num_heads=1, temperature=1.0):
33
+ super().__init__()
34
+ self.item_features = item_features
35
+ self.history_features = history_features
36
+ self.neg_item_features = neg_item_features
37
+ self.temperature = temperature
38
+ self.num_concept = num_concept
39
+ self.num_intention = num_intention
40
+ self.seq_max_len = seq_max_len
41
+
42
+ std = 1e-4
43
+ self.item_embedding = torch.nn.Embedding(num_items, embedding_dim)
44
+ torch.nn.init.normal_(self.item_embedding.weight, 0, std)
45
+ self.concept_embedding = torch.nn.Embedding(num_concept, embedding_dim)
46
+ torch.nn.init.normal_(self.concept_embedding.weight, 0, std)
47
+ self.position_embedding = torch.nn.Embedding(seq_max_len, embedding_dim)
48
+ torch.nn.init.normal_(self.position_embedding.weight, 0, std)
49
+
50
+ self.w_1 = torch.nn.Parameter(torch.rand(embedding_dim, hidden_dim), requires_grad=True)
51
+ self.w_2 = torch.nn.Parameter(torch.rand(hidden_dim, num_heads), requires_grad=True)
52
+
53
+ self.w_3 = torch.nn.Parameter(torch.rand(embedding_dim, embedding_dim), requires_grad=True)
54
+
55
+ self.w_k1 = torch.nn.Parameter(torch.rand(embedding_dim, hidden_dim), requires_grad=True)
56
+ self.w_k2 = torch.nn.Parameter(torch.rand(hidden_dim, num_intention), requires_grad=True)
57
+
58
+ self.w_4 = torch.nn.Parameter(torch.rand(embedding_dim, hidden_dim), requires_grad=True)
59
+ self.w_5 = torch.nn.Parameter(torch.rand(hidden_dim, num_heads), requires_grad=True)
60
+
61
+ self.mode = None
62
+
63
+ def forward(self, x):
64
+ user_embedding = self.user_tower(x)
65
+ item_embedding = self.item_tower(x)
66
+ if self.mode == "user":
67
+ return user_embedding
68
+ if self.mode == "item":
69
+ return item_embedding
70
+
71
+ y = torch.mul(user_embedding, item_embedding).sum(dim=-1)
72
+
73
+ # # compute covariance regularizer
74
+ # M = torch.cov(self.concept_embedding.weight, correction=0)
75
+ # l_c = (torch.norm(M, p='fro')**2 - torch.norm(torch.diag(M), p='fro')**2)/2
76
+
77
+ return y
78
+
79
+ def user_tower(self, x):
80
+ if self.mode == "item":
81
+ return None
82
+
83
+
84
+ # sparse interests extraction
85
+ # # user specific historical item embedding X^u
86
+ hist_item = x[self.history_features[0]]
87
+ x_u = self.item_embedding(hist_item) + \
88
+ self.position_embedding.weight.unsqueeze(0)
89
+ x_u_mask = (x[self.history_features[0]] > 0).long()
90
+
91
+ # # user specific conceptual prototypes C^u
92
+ # ## attention a
93
+ h_1 = einsum('bse, ed -> bsd', x_u, self.w_1).tanh()
94
+ a_hist = F.softmax(einsum('bsd, dh -> bsh', h_1, self.w_2) + -1.e9 * (1 - x_u_mask.unsqueeze(-1).float()), dim=1)
95
+
96
+ # ## virtual concept vector z_u
97
+ z_u = einsum("bse, bsh -> be", x_u, a_hist)
98
+
99
+ # ## similarity between user's concept vector and entire conceptual prototypes s^u
100
+ s_u = einsum("be, te -> bt", z_u, self.concept_embedding.weight)
101
+ s_u_top_k = torch.topk(s_u, self.num_intention)
102
+
103
+ # ## final C^u
104
+ c_u = einsum("bk, bke -> bke", torch.sigmoid(s_u_top_k.values), self.concept_embedding(s_u_top_k.indices))
105
+
106
+ # # user intention assignment P_{k|t}
107
+ p_u = F.softmax(einsum("bse, bke -> bks", F.normalize(x_u @ self.w_3, dim=-1), F.normalize(c_u, p=2, dim=-1)), dim=1)
108
+
109
+ # # attention weighing P_{t|k}
110
+ h_2 = einsum('bse, ed -> bsd', x_u, self.w_k1).tanh()
111
+ a_concept_k = F.softmax(einsum('bsd, dk -> bsk', h_2, self.w_k2) + -1.e9 * (1 - x_u_mask.unsqueeze(-1).float()), dim=1)
112
+
113
+ # # multiple interests encoding \phi_\theta^k(x^u)
114
+ phi_u = einsum("bks, bse -> bke", p_u * a_concept_k.permute(0, 2, 1), x_u)
115
+
116
+ # adaptive interest aggregation
117
+ # # intention aware input behavior \hat{X^u}
118
+ x_u_hat = einsum('bks, bke -> bse', p_u, c_u)
119
+
120
+ # # user's next intention C^u_{apt}
121
+ h_3 = einsum('bse, ed -> bsd', x_u_hat, self.w_4).tanh()
122
+ c_u_apt = F.normalize(einsum("bs, bse -> be", F.softmax(einsum('bsd, dh -> bsh', h_3, self.w_5).reshape(-1, self.seq_max_len) + -1.e9 * (1 - x_u_mask.float()), dim=1), x_u_hat), -1)
123
+
124
+ # # aggregation weights e_k^u
125
+ e_u = F.softmax(einsum('be, bke -> bk', c_u_apt, phi_u) / self.temperature, dim=1)
126
+
127
+ # final user representation v^u
128
+ v_u = einsum('bk, bke -> be', e_u, phi_u)
129
+
130
+ if self.mode == "user":
131
+ return v_u
132
+ return v_u.unsqueeze(1)
133
+
134
+ def item_tower(self, x):
135
+ if self.mode == "user":
136
+ return None
137
+ pos_embedding = self.item_embedding(x[self.item_features[0]]).unsqueeze(1)
138
+ if self.mode == "item": # inference embedding mode
139
+ return pos_embedding.squeeze(1) # [batch_size, embed_dim]
140
+ neg_embeddings = self.item_embedding(x[self.neg_item_features[0]]).squeeze(1) # [batch_size, n_neg_items, embed_dim]
141
+
142
+ # [batch_size, 1+n_neg_items, embed_dim]
143
+ return torch.cat((pos_embedding, neg_embeddings), dim=1)
144
+
145
+ def gen_mask(self, x):
146
+ his_list = x[self.history_features[0].name]
147
+ mask = (his_list > 0).long()
148
+ return mask
@@ -1,83 +1,81 @@
1
- """
2
- Date: created on 17/09/2022
3
- References:
4
- paper: STAMP: Short-Term Attention/Memory Priority Model for Session-based Recommendation
5
- url: https://dl.acm.org/doi/10.1145/3219819.3219950
6
- official Tensorflow implementation: https://github.com/uestcnlp/STAMP
7
- Authors: Bo Kang, klinux@live.com
8
- """
9
-
10
- import torch
11
- import torch.nn.functional as F
12
- import torch.nn as nn
13
-
14
-
15
- class STAMP(nn.Module):
16
- def __init__(self, item_history_feature, weight_std, emb_std):
17
- super(STAMP, self).__init__()
18
-
19
- # item embedding layer
20
- self.item_history_feature = item_history_feature
21
- n_items, item_emb_dim, = item_history_feature.vocab_size, item_history_feature.embed_dim
22
- self.item_emb = nn.Embedding(n_items, item_emb_dim, padding_idx=0)
23
-
24
- # weights and biases for attention computation
25
- self.w_0 = nn.Parameter(torch.zeros(item_emb_dim, 1))
26
- self.w_1_t = nn.Parameter(torch.zeros(item_emb_dim, item_emb_dim))
27
- self.w_2_t = nn.Parameter(torch.zeros(item_emb_dim, item_emb_dim))
28
- self.w_3_t = nn.Parameter(torch.zeros(item_emb_dim, item_emb_dim))
29
- self.b_a = nn.Parameter(torch.zeros(item_emb_dim))
30
- self._init_parameter_weights(weight_std)
31
-
32
- # mlp layers
33
- self.f_s = nn.Sequential(nn.Tanh(), nn.Linear(item_emb_dim, item_emb_dim))
34
- self.f_t = nn.Sequential(nn.Tanh(), nn.Linear(item_emb_dim, item_emb_dim))
35
- self.emb_std = emb_std
36
- self.apply(self._init_module_weights)
37
-
38
-
39
- def _init_parameter_weights(self, weight_std):
40
- nn.init.normal_(self.w_0, std=weight_std)
41
- nn.init.normal_(self.w_1_t, std=weight_std)
42
- nn.init.normal_(self.w_2_t, std=weight_std)
43
- nn.init.normal_(self.w_3_t, std=weight_std)
44
-
45
-
46
- def _init_module_weights(self, module):
47
- if isinstance(module, nn.Linear):
48
- module.weight.data.normal_(std=self.emb_std)
49
- if module.bias is not None:
50
- module.bias.data.zero_()
51
- elif isinstance(module, nn.Embedding):
52
- module.weight.data.normal_(std=self.emb_std)
53
-
54
-
55
- def forward(self, input_dict):
56
- # Index the embeddings for the items in the session
57
- input = input_dict[self.item_history_feature.name]
58
- value_mask = (input != 0).unsqueeze(-1)
59
- value_counts = value_mask.sum(dim=1, keepdim=True).squeeze(-1)
60
- item_emb_batch = self.item_emb(input) * value_mask
61
-
62
- # Index the embeddings of the latest clicked items
63
- x_t = self.item_emb(torch.gather(input, 1, value_counts-1))
64
-
65
- # Eq. 2, user's general interest in the current session
66
- m_s = ((item_emb_batch).sum(1) / value_counts).unsqueeze(1)
67
-
68
- # Eq. 7, compute attention coefficient
69
- a = F.normalize(torch.exp(torch.sigmoid(item_emb_batch@self.w_1_t + x_t@self.w_2_t + m_s@self.w_3_t + self.b_a) @ self.w_0) * value_mask, p=1, dim=1)
70
-
71
- # Eq. 8, compute user's attention-based interests
72
- m_a = (a * item_emb_batch).sum(1) + m_s.squeeze(1)
73
-
74
- # Eq. 3, compute the output state of the general interest
75
- h_s = self.f_s(m_a)
76
-
77
- # Eq. 9, compute the output state of the short-term interest
78
- h_t = self.f_t(x_t).squeeze(1)
79
-
80
- # Eq. 4, compute candidate scores
81
- z = h_s * h_t @ self.item_emb.weight.T
82
-
83
- return z
1
+ """
2
+ Date: created on 17/09/2022
3
+ References:
4
+ paper: STAMP: Short-Term Attention/Memory Priority Model for Session-based Recommendation
5
+ url: https://dl.acm.org/doi/10.1145/3219819.3219950
6
+ official Tensorflow implementation: https://github.com/uestcnlp/STAMP
7
+ Authors: Bo Kang, klinux@live.com
8
+ """
9
+
10
+ import torch
11
+ import torch.nn as nn
12
+ import torch.nn.functional as F
13
+
14
+
15
+ class STAMP(nn.Module):
16
+
17
+ def __init__(self, item_history_feature, weight_std, emb_std):
18
+ super(STAMP, self).__init__()
19
+
20
+ # item embedding layer
21
+ self.item_history_feature = item_history_feature
22
+ n_items, item_emb_dim, = item_history_feature.vocab_size, item_history_feature.embed_dim
23
+ self.item_emb = nn.Embedding(n_items, item_emb_dim, padding_idx=0)
24
+
25
+ # weights and biases for attention computation
26
+ self.w_0 = nn.Parameter(torch.zeros(item_emb_dim, 1))
27
+ self.w_1_t = nn.Parameter(torch.zeros(item_emb_dim, item_emb_dim))
28
+ self.w_2_t = nn.Parameter(torch.zeros(item_emb_dim, item_emb_dim))
29
+ self.w_3_t = nn.Parameter(torch.zeros(item_emb_dim, item_emb_dim))
30
+ self.b_a = nn.Parameter(torch.zeros(item_emb_dim))
31
+ self._init_parameter_weights(weight_std)
32
+
33
+ # mlp layers
34
+ self.f_s = nn.Sequential(nn.Tanh(), nn.Linear(item_emb_dim, item_emb_dim))
35
+ self.f_t = nn.Sequential(nn.Tanh(), nn.Linear(item_emb_dim, item_emb_dim))
36
+ self.emb_std = emb_std
37
+ self.apply(self._init_module_weights)
38
+
39
+ def _init_parameter_weights(self, weight_std):
40
+ nn.init.normal_(self.w_0, std=weight_std)
41
+ nn.init.normal_(self.w_1_t, std=weight_std)
42
+ nn.init.normal_(self.w_2_t, std=weight_std)
43
+ nn.init.normal_(self.w_3_t, std=weight_std)
44
+
45
+ def _init_module_weights(self, module):
46
+ if isinstance(module, nn.Linear):
47
+ module.weight.data.normal_(std=self.emb_std)
48
+ if module.bias is not None:
49
+ module.bias.data.zero_()
50
+ elif isinstance(module, nn.Embedding):
51
+ module.weight.data.normal_(std=self.emb_std)
52
+
53
+ def forward(self, input_dict):
54
+ # Index the embeddings for the items in the session
55
+ input = input_dict[self.item_history_feature.name]
56
+ value_mask = (input != 0).unsqueeze(-1)
57
+ value_counts = value_mask.sum(dim=1, keepdim=True).squeeze(-1)
58
+ item_emb_batch = self.item_emb(input) * value_mask
59
+
60
+ # Index the embeddings of the latest clicked items
61
+ x_t = self.item_emb(torch.gather(input, 1, value_counts - 1))
62
+
63
+ # Eq. 2, user's general interest in the current session
64
+ m_s = ((item_emb_batch).sum(1) / value_counts).unsqueeze(1)
65
+
66
+ # Eq. 7, compute attention coefficient
67
+ a = F.normalize(torch.exp(torch.sigmoid(item_emb_batch @ self.w_1_t + x_t @ self.w_2_t + m_s @ self.w_3_t + self.b_a) @ self.w_0) * value_mask, p=1, dim=1)
68
+
69
+ # Eq. 8, compute user's attention-based interests
70
+ m_a = (a * item_emb_batch).sum(1) + m_s.squeeze(1)
71
+
72
+ # Eq. 3, compute the output state of the general interest
73
+ h_s = self.f_s(m_a)
74
+
75
+ # Eq. 9, compute the output state of the short-term interest
76
+ h_t = self.f_t(x_t).squeeze(1)
77
+
78
+ # Eq. 4, compute candidate scores
79
+ z = h_s * h_t @ self.item_emb.weight.T
80
+
81
+ return z
@@ -1,71 +1,75 @@
1
- """
2
- Date: create on 23/05/2022
3
- References:
4
- paper: (RecSys'2016) Deep Neural Networks for YouTube Recommendations
5
- url: https://dl.acm.org/doi/10.1145/2959100.2959190
6
- Authors: Mincai Lai, laimincai@shanghaitech.edu.cn
7
- """
8
-
9
- import torch
10
- import torch.nn.functional as F
11
- from ...basic.layers import MLP, EmbeddingLayer
12
-
13
-
14
- class YoutubeDNN(torch.nn.Module):
15
- """The match model mentioned in `Deep Neural Networks for YouTube Recommendations` paper.
16
- It's a DSSM match model trained by global softmax loss on list-wise samples.
17
- Note in origin paper, it's without item dnn tower and train item embedding directly.
18
-
19
- Args:
20
- user_features (list[Feature Class]): training by the user tower module.
21
- item_features (list[Feature Class]): training by the embedding table, it's the item id feature.
22
- neg_item_feature (list[Feature Class]): training by the embedding table, it's the negative items id feature.
23
- user_params (dict): the params of the User Tower module, keys include:`{"dims":list, "activation":str, "dropout":float, "output_layer":bool`}.
24
- temperature (float): temperature factor for similarity score, default to 1.0.
25
- """
26
-
27
- def __init__(self, user_features, item_features, neg_item_feature, user_params, temperature=1.0):
28
- super().__init__()
29
- self.user_features = user_features
30
- self.item_features = item_features
31
- self.neg_item_feature = neg_item_feature
32
- self.temperature = temperature
33
- self.user_dims = sum([fea.embed_dim for fea in user_features])
34
- self.embedding = EmbeddingLayer(user_features + item_features)
35
- self.user_mlp = MLP(self.user_dims, output_layer=False, **user_params)
36
- self.mode = None
37
-
38
- def forward(self, x):
39
- user_embedding = self.user_tower(x)
40
- item_embedding = self.item_tower(x)
41
- if self.mode == "user":
42
- return user_embedding
43
- if self.mode == "item":
44
- return item_embedding
45
-
46
- # calculate cosine score
47
- y = torch.mul(user_embedding, item_embedding).sum(dim=2)
48
- y = y / self.temperature
49
- return y
50
-
51
- def user_tower(self, x):
52
- if self.mode == "item":
53
- return None
54
- input_user = self.embedding(x, self.user_features, squeeze_dim=True) #[batch_size, num_features*deep_dims]
55
- user_embedding = self.user_mlp(input_user).unsqueeze(1) #[batch_size, 1, embed_dim]
56
- user_embedding = F.normalize(user_embedding, p=2, dim=2)
57
- if self.mode == "user":
58
- return user_embedding.squeeze(1) #inference embedding mode -> [batch_size, embed_dim]
59
- return user_embedding
60
-
61
- def item_tower(self, x):
62
- if self.mode == "user":
63
- return None
64
- pos_embedding = self.embedding(x, self.item_features, squeeze_dim=False) #[batch_size, 1, embed_dim]
65
- pos_embedding = F.normalize(pos_embedding, p=2, dim=2)
66
- if self.mode == "item": #inference embedding mode
67
- return pos_embedding.squeeze(1) #[batch_size, embed_dim]
68
- neg_embeddings = self.embedding(x, self.neg_item_feature,
69
- squeeze_dim=False).squeeze(1) #[batch_size, n_neg_items, embed_dim]
70
- neg_embeddings = F.normalize(neg_embeddings, p=2, dim=2)
71
- return torch.cat((pos_embedding, neg_embeddings), dim=1) #[batch_size, 1+n_neg_items, embed_dim]
1
+ """
2
+ Date: create on 23/05/2022
3
+ References:
4
+ paper: (RecSys'2016) Deep Neural Networks for YouTube Recommendations
5
+ url: https://dl.acm.org/doi/10.1145/2959100.2959190
6
+ Authors: Mincai Lai, laimincai@shanghaitech.edu.cn
7
+ """
8
+
9
+ import torch
10
+ import torch.nn.functional as F
11
+
12
+ from ...basic.layers import MLP, EmbeddingLayer
13
+
14
+
15
+ class YoutubeDNN(torch.nn.Module):
16
+ """The match model mentioned in `Deep Neural Networks for YouTube Recommendations` paper.
17
+ It's a DSSM match model trained by global softmax loss on list-wise samples.
18
+ Note in origin paper, it's without item dnn tower and train item embedding directly.
19
+
20
+ Args:
21
+ user_features (list[Feature Class]): training by the user tower module.
22
+ item_features (list[Feature Class]): training by the embedding table, it's the item id feature.
23
+ neg_item_feature (list[Feature Class]): training by the embedding table, it's the negative items id feature.
24
+ user_params (dict): the params of the User Tower module, keys include:`{"dims":list, "activation":str, "dropout":float, "output_layer":bool`}.
25
+ temperature (float): temperature factor for similarity score, default to 1.0.
26
+ """
27
+
28
+ def __init__(self, user_features, item_features, neg_item_feature, user_params, temperature=1.0):
29
+ super().__init__()
30
+ self.user_features = user_features
31
+ self.item_features = item_features
32
+ self.neg_item_feature = neg_item_feature
33
+ self.temperature = temperature
34
+ self.user_dims = sum([fea.embed_dim for fea in user_features])
35
+ self.embedding = EmbeddingLayer(user_features + item_features)
36
+ self.user_mlp = MLP(self.user_dims, output_layer=False, **user_params)
37
+ self.mode = None
38
+
39
+ def forward(self, x):
40
+ user_embedding = self.user_tower(x)
41
+ item_embedding = self.item_tower(x)
42
+ if self.mode == "user":
43
+ return user_embedding
44
+ if self.mode == "item":
45
+ return item_embedding
46
+
47
+
48
+ # calculate cosine score
49
+ y = torch.mul(user_embedding, item_embedding).sum(dim=2)
50
+ y = y / self.temperature
51
+ return y
52
+
53
+ def user_tower(self, x):
54
+ if self.mode == "item":
55
+ return None
56
+ # [batch_size, num_features*deep_dims]
57
+ input_user = self.embedding(x, self.user_features, squeeze_dim=True)
58
+ user_embedding = self.user_mlp(input_user).unsqueeze(1) # [batch_size, 1, embed_dim]
59
+ user_embedding = F.normalize(user_embedding, p=2, dim=2)
60
+ if self.mode == "user":
61
+ # inference embedding mode -> [batch_size, embed_dim]
62
+ return user_embedding.squeeze(1)
63
+ return user_embedding
64
+
65
+ def item_tower(self, x):
66
+ if self.mode == "user":
67
+ return None
68
+ pos_embedding = self.embedding(x, self.item_features, squeeze_dim=False) # [batch_size, 1, embed_dim]
69
+ pos_embedding = F.normalize(pos_embedding, p=2, dim=2)
70
+ if self.mode == "item": # inference embedding mode
71
+ return pos_embedding.squeeze(1) # [batch_size, embed_dim]
72
+ neg_embeddings = self.embedding(x, self.neg_item_feature, squeeze_dim=False).squeeze(1) # [batch_size, n_neg_items, embed_dim]
73
+ neg_embeddings = F.normalize(neg_embeddings, p=2, dim=2)
74
+ # [batch_size, 1+n_neg_items, embed_dim]
75
+ return torch.cat((pos_embedding, neg_embeddings), dim=1)