torch-rechub 0.0.3__py3-none-any.whl → 0.0.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. torch_rechub/__init__.py +14 -0
  2. torch_rechub/basic/activation.py +54 -54
  3. torch_rechub/basic/callback.py +33 -33
  4. torch_rechub/basic/features.py +87 -94
  5. torch_rechub/basic/initializers.py +92 -92
  6. torch_rechub/basic/layers.py +994 -720
  7. torch_rechub/basic/loss_func.py +223 -34
  8. torch_rechub/basic/metaoptimizer.py +76 -72
  9. torch_rechub/basic/metric.py +251 -250
  10. torch_rechub/models/generative/__init__.py +6 -0
  11. torch_rechub/models/generative/hllm.py +249 -0
  12. torch_rechub/models/generative/hstu.py +189 -0
  13. torch_rechub/models/matching/__init__.py +13 -11
  14. torch_rechub/models/matching/comirec.py +193 -188
  15. torch_rechub/models/matching/dssm.py +72 -66
  16. torch_rechub/models/matching/dssm_facebook.py +77 -79
  17. torch_rechub/models/matching/dssm_senet.py +28 -16
  18. torch_rechub/models/matching/gru4rec.py +85 -87
  19. torch_rechub/models/matching/mind.py +103 -101
  20. torch_rechub/models/matching/narm.py +82 -76
  21. torch_rechub/models/matching/sasrec.py +143 -140
  22. torch_rechub/models/matching/sine.py +148 -151
  23. torch_rechub/models/matching/stamp.py +81 -83
  24. torch_rechub/models/matching/youtube_dnn.py +75 -71
  25. torch_rechub/models/matching/youtube_sbc.py +98 -98
  26. torch_rechub/models/multi_task/__init__.py +7 -5
  27. torch_rechub/models/multi_task/aitm.py +83 -84
  28. torch_rechub/models/multi_task/esmm.py +56 -55
  29. torch_rechub/models/multi_task/mmoe.py +58 -58
  30. torch_rechub/models/multi_task/ple.py +116 -130
  31. torch_rechub/models/multi_task/shared_bottom.py +45 -45
  32. torch_rechub/models/ranking/__init__.py +14 -11
  33. torch_rechub/models/ranking/afm.py +65 -63
  34. torch_rechub/models/ranking/autoint.py +102 -0
  35. torch_rechub/models/ranking/bst.py +61 -63
  36. torch_rechub/models/ranking/dcn.py +38 -38
  37. torch_rechub/models/ranking/dcn_v2.py +59 -69
  38. torch_rechub/models/ranking/deepffm.py +131 -123
  39. torch_rechub/models/ranking/deepfm.py +43 -42
  40. torch_rechub/models/ranking/dien.py +191 -191
  41. torch_rechub/models/ranking/din.py +93 -91
  42. torch_rechub/models/ranking/edcn.py +101 -117
  43. torch_rechub/models/ranking/fibinet.py +42 -50
  44. torch_rechub/models/ranking/widedeep.py +41 -41
  45. torch_rechub/trainers/__init__.py +4 -3
  46. torch_rechub/trainers/ctr_trainer.py +288 -128
  47. torch_rechub/trainers/match_trainer.py +336 -170
  48. torch_rechub/trainers/matching.md +3 -0
  49. torch_rechub/trainers/mtl_trainer.py +356 -207
  50. torch_rechub/trainers/seq_trainer.py +427 -0
  51. torch_rechub/utils/data.py +492 -360
  52. torch_rechub/utils/hstu_utils.py +198 -0
  53. torch_rechub/utils/match.py +457 -274
  54. torch_rechub/utils/model_utils.py +233 -0
  55. torch_rechub/utils/mtl.py +136 -126
  56. torch_rechub/utils/onnx_export.py +220 -0
  57. torch_rechub/utils/visualization.py +271 -0
  58. torch_rechub-0.0.5.dist-info/METADATA +402 -0
  59. torch_rechub-0.0.5.dist-info/RECORD +64 -0
  60. {torch_rechub-0.0.3.dist-info → torch_rechub-0.0.5.dist-info}/WHEEL +1 -2
  61. {torch_rechub-0.0.3.dist-info → torch_rechub-0.0.5.dist-info/licenses}/LICENSE +21 -21
  62. torch_rechub-0.0.3.dist-info/METADATA +0 -177
  63. torch_rechub-0.0.3.dist-info/RECORD +0 -55
  64. torch_rechub-0.0.3.dist-info/top_level.txt +0 -1
@@ -1,98 +1,98 @@
1
- """
2
- Date: create on 23/05/2022
3
- References:
4
- paper: (RecSys'2019) Sampling-Bias-Corrected Neural Modeling for Large Corpus Item Recommendations
5
- url: https://dl.acm.org/doi/10.1145/3298689.3346996
6
- Authors: Mincai Lai, laimincai@shanghaitech.edu.cn
7
- """
8
-
9
- import torch
10
- import torch.nn.functional as F
11
- from ...basic.layers import MLP, EmbeddingLayer
12
- import numpy as np
13
-
14
-
15
- class YoutubeSBC(torch.nn.Module):
16
- """Sampling-Bias-Corrected Neural Modeling for Matching by Youtube.
17
- It's a DSSM match model trained by In-batch softmax loss on list-wise samples, and add sample debias module.
18
-
19
- Args:
20
- user_features (list[Feature Class]): training by the user tower module.
21
- item_features (list[Feature Class]): training by the item tower module.
22
- sample_weight_feature (list[Feature Class]): used for sampling bias corrected in training.
23
- user_params (dict): the params of the User Tower module, keys include:`{"dims":list, "activation":str, "dropout":float, "output_layer":bool`}.
24
- item_params (dict): the params of the Item Tower module, keys include:`{"dims":list, "activation":str, "dropout":float, "output_layer":bool`}.
25
- batch_size (int): same as batch size of DataLoader, used in in-batch sampling
26
- n_neg (int): the number of negative sample for every positive sample, default to 3. Note it's must smaller than batch_size.
27
- temperature (float): temperature factor for similarity score, default to 1.0.
28
- """
29
-
30
- def __init__(self,
31
- user_features,
32
- item_features,
33
- sample_weight_feature,
34
- user_params,
35
- item_params,
36
- batch_size,
37
- n_neg=3,
38
- temperature=1.0):
39
- super().__init__()
40
- self.user_features = user_features
41
- self.item_features = item_features
42
- self.sample_weight_feature = sample_weight_feature
43
- self.n_neg = n_neg
44
- self.temperature = temperature
45
- self.user_dims = sum([fea.embed_dim for fea in user_features])
46
- self.item_dims = sum([fea.embed_dim for fea in item_features])
47
- self.batch_size = batch_size
48
- self.embedding = EmbeddingLayer(user_features + item_features + sample_weight_feature)
49
- self.user_mlp = MLP(self.user_dims, output_layer=False, **user_params)
50
- self.item_mlp = MLP(self.item_dims, output_layer=False, **item_params)
51
- self.mode = None
52
-
53
- # in-batch sampling index
54
- self.index0 = np.repeat(np.arange(batch_size), n_neg + 1)
55
- self.index1 = np.concatenate([np.arange(i, i + n_neg + 1) for i in range(batch_size)])
56
- self.index1[np.where(self.index1 >= batch_size)] -= batch_size
57
-
58
- def forward(self, x):
59
- user_embedding = self.user_tower(x) # (batch_size, embedding_dim)
60
- item_embedding = self.item_tower(x) # (batch_size, embedding_dim)
61
- if self.mode == "user":
62
- return user_embedding
63
- if self.mode == "item":
64
- return item_embedding
65
-
66
- # pred[i, j] means predicted score that user_i give to item_j
67
- pred = torch.cosine_similarity(user_embedding.unsqueeze(1), item_embedding, dim=2) # (batch_size, batch_size)
68
-
69
- # get sample weight of items in this batch
70
- sample_weight = self.embedding(x, self.sample_weight_feature, squeeze_dim=True).squeeze(1) # (batch_size)
71
- scores = pred - torch.log(sample_weight) #Sampling Bias Corrected, using broadcast. (batch_size, batch_size)
72
-
73
- if user_embedding.shape[0] * (self.n_neg + 1) != self.index0.shape[0]: # last batch
74
- batch_size = user_embedding.shape[0]
75
- index0 = self.index0[:batch_size * (self.n_neg + 1)]
76
- index1 = self.index1[:batch_size * (self.n_neg + 1)]
77
- index0[np.where(index0 >= batch_size)] -= batch_size
78
- index1[np.where(index1 >= batch_size)] -= batch_size
79
- scores = scores[index0, index1] # (batch_size, 1 + self.n_neg)
80
- else:
81
- scores = scores[self.index0, self.index1] # (batch_size, 1 + self.n_neg)
82
-
83
- scores = scores / self.temperature
84
- return scores.view(-1, self.n_neg + 1) #(batch_size, 1 + self.n_neg)
85
-
86
- def user_tower(self, x):
87
- if self.mode == "item":
88
- return None
89
- input_user = self.embedding(x, self.user_features, squeeze_dim=True) #[batch_size, num_features*deep_dims]
90
- user_embedding = self.user_mlp(input_user) #[batch_size, user_params["dims"][-1]]
91
- return user_embedding
92
-
93
- def item_tower(self, x):
94
- if self.mode == "user":
95
- return None
96
- input_item = self.embedding(x, self.item_features, squeeze_dim=True) #[batch_size, num_features*embed_dim]
97
- item_embedding = self.item_mlp(input_item) #[batch_size, item_params["dims"][-1]]
98
- return item_embedding
1
+ """
2
+ Date: create on 23/05/2022
3
+ References:
4
+ paper: (RecSys'2019) Sampling-Bias-Corrected Neural Modeling for Large Corpus Item Recommendations
5
+ url: https://dl.acm.org/doi/10.1145/3298689.3346996
6
+ Authors: Mincai Lai, laimincai@shanghaitech.edu.cn
7
+ """
8
+
9
+ import numpy as np
10
+ import torch
11
+ import torch.nn.functional as F
12
+
13
+ from ...basic.layers import MLP, EmbeddingLayer
14
+
15
+
16
+ class YoutubeSBC(torch.nn.Module):
17
+ """Sampling-Bias-Corrected Neural Modeling for Matching by Youtube.
18
+ It's a DSSM match model trained by In-batch softmax loss on list-wise samples, and add sample debias module.
19
+
20
+ Args:
21
+ user_features (list[Feature Class]): training by the user tower module.
22
+ item_features (list[Feature Class]): training by the item tower module.
23
+ sample_weight_feature (list[Feature Class]): used for sampling bias corrected in training.
24
+ user_params (dict): the params of the User Tower module, keys include:`{"dims":list, "activation":str, "dropout":float, "output_layer":bool`}.
25
+ item_params (dict): the params of the Item Tower module, keys include:`{"dims":list, "activation":str, "dropout":float, "output_layer":bool`}.
26
+ batch_size (int): same as batch size of DataLoader, used in in-batch sampling
27
+ n_neg (int): the number of negative sample for every positive sample, default to 3. Note it's must smaller than batch_size.
28
+ temperature (float): temperature factor for similarity score, default to 1.0.
29
+ """
30
+
31
+ def __init__(self, user_features, item_features, sample_weight_feature, user_params, item_params, batch_size, n_neg=3, temperature=1.0):
32
+ super().__init__()
33
+ self.user_features = user_features
34
+ self.item_features = item_features
35
+ self.sample_weight_feature = sample_weight_feature
36
+ self.n_neg = n_neg
37
+ self.temperature = temperature
38
+ self.user_dims = sum([fea.embed_dim for fea in user_features])
39
+ self.item_dims = sum([fea.embed_dim for fea in item_features])
40
+ self.batch_size = batch_size
41
+ self.embedding = EmbeddingLayer(user_features + item_features + sample_weight_feature)
42
+ self.user_mlp = MLP(self.user_dims, output_layer=False, **user_params)
43
+ self.item_mlp = MLP(self.item_dims, output_layer=False, **item_params)
44
+ self.mode = None
45
+
46
+ # in-batch sampling index
47
+ self.index0 = np.repeat(np.arange(batch_size), n_neg + 1)
48
+ self.index1 = np.concatenate([np.arange(i, i + n_neg + 1) for i in range(batch_size)])
49
+ self.index1[np.where(self.index1 >= batch_size)] -= batch_size
50
+
51
+ def forward(self, x):
52
+ user_embedding = self.user_tower(x) # (batch_size, embedding_dim)
53
+ item_embedding = self.item_tower(x) # (batch_size, embedding_dim)
54
+ if self.mode == "user":
55
+ return user_embedding
56
+ if self.mode == "item":
57
+ return item_embedding
58
+
59
+
60
+ # pred[i, j] means predicted score that user_i give to item_j
61
+ pred = torch.cosine_similarity(user_embedding.unsqueeze(1), item_embedding, dim=2) # (batch_size, batch_size)
62
+
63
+ # get sample weight of items in this batch
64
+ sample_weight = self.embedding(x, self.sample_weight_feature, squeeze_dim=True).squeeze(1) # (batch_size)
65
+ # Sampling Bias Corrected, using broadcast. (batch_size, batch_size)
66
+ scores = pred - torch.log(sample_weight)
67
+
68
+ if user_embedding.shape[0] * (self.n_neg + 1) != self.index0.shape[0]: # last batch
69
+ batch_size = user_embedding.shape[0]
70
+ index0 = self.index0[:batch_size * (self.n_neg + 1)]
71
+ index1 = self.index1[:batch_size * (self.n_neg + 1)]
72
+ index0[np.where(index0 >= batch_size)] -= batch_size
73
+ index1[np.where(index1 >= batch_size)] -= batch_size
74
+ scores = scores[index0, index1] # (batch_size, 1 + self.n_neg)
75
+ else:
76
+ # (batch_size, 1 + self.n_neg)
77
+ scores = scores[self.index0, self.index1]
78
+
79
+ scores = scores / self.temperature
80
+ return scores.view(-1, self.n_neg + 1) # (batch_size, 1 + self.n_neg)
81
+
82
+ def user_tower(self, x):
83
+ if self.mode == "item":
84
+ return None
85
+ # [batch_size, num_features*deep_dims]
86
+ input_user = self.embedding(x, self.user_features, squeeze_dim=True)
87
+ # [batch_size, user_params["dims"][-1]]
88
+ user_embedding = self.user_mlp(input_user)
89
+ return user_embedding
90
+
91
+ def item_tower(self, x):
92
+ if self.mode == "user":
93
+ return None
94
+ # [batch_size, num_features*embed_dim]
95
+ input_item = self.embedding(x, self.item_features, squeeze_dim=True)
96
+ # [batch_size, item_params["dims"][-1]]
97
+ item_embedding = self.item_mlp(input_item)
98
+ return item_embedding
@@ -1,5 +1,7 @@
1
- from .shared_bottom import SharedBottom
2
- from .esmm import ESMM
3
- from .mmoe import MMOE
4
- from .ple import PLE
5
- from .aitm import AITM
1
+ __all__ = ['SharedBottom', 'ESMM', 'MMOE', 'PLE', 'AITM']
2
+
3
+ from .aitm import AITM
4
+ from .esmm import ESMM
5
+ from .mmoe import MMOE
6
+ from .ple import PLE
7
+ from .shared_bottom import SharedBottom
@@ -1,84 +1,83 @@
1
- """
2
- Date: create on 14/05/2022
3
- References:
4
- paper: (KDD'2021) Modeling the Sequential Dependence among Audience Multi-step Conversions with Multi-task Learning in Targeted Display Advertising
5
- url: https://arxiv.org/abs/2105.08489
6
- code: https://github.com/adtalos/AITM-torch
7
- Authors: Mincai Lai, laimincai@shanghaitech.edu.cn
8
- """
9
-
10
- import torch
11
- import torch.nn as nn
12
-
13
- from ...basic.layers import MLP, EmbeddingLayer
14
-
15
-
16
- class AITM(nn.Module):
17
- """ Adaptive Information Transfer Multi-task (AITM) framework.
18
- all the task type must be binary classificatioon.
19
-
20
- Args:
21
- features (list[Feature Class]): training by the whole module.
22
- n_task (int): the number of binary classificatioon task.
23
- bottom_params (dict): the params of all the botwer expert module, keys include:`{"dims":list, "activation":str, "dropout":float}`.
24
- tower_params_list (list): the list of tower params dict, the keys same as expert_params.
25
- """
26
-
27
- def __init__(self, features, n_task, bottom_params, tower_params_list):
28
- super().__init__()
29
- self.features = features
30
- self.n_task = n_task
31
- self.input_dims = sum([fea.embed_dim for fea in features])
32
- self.embedding = EmbeddingLayer(features)
33
-
34
- self.bottoms = nn.ModuleList(
35
- MLP(self.input_dims, output_layer=False, **bottom_params) for i in range(self.n_task))
36
- self.towers = nn.ModuleList(MLP(bottom_params["dims"][-1], **tower_params_list[i]) for i in range(self.n_task))
37
-
38
- self.info_gates = nn.ModuleList(
39
- MLP(bottom_params["dims"][-1], output_layer=False, dims=[bottom_params["dims"][-1]])
40
- for i in range(self.n_task - 1))
41
- self.aits = nn.ModuleList(AttentionLayer(bottom_params["dims"][-1]) for _ in range(self.n_task - 1))
42
-
43
- def forward(self, x):
44
- embed_x = self.embedding(x, self.features, squeeze_dim=True) #[batch_size, *]
45
- input_towers = [self.bottoms[i](embed_x) for i in range(self.n_task)] #[i]:[batch_size, bottom_dims[-1]]
46
- for i in range(1, self.n_task): #for task 1:n-1
47
- info = self.info_gates[i - 1](input_towers[i - 1]).unsqueeze(1) #[batch_size,1,bottom_dims[-1]]
48
- ait_input = torch.cat([input_towers[i].unsqueeze(1), info], dim=1) #[batch_size, 2, bottom_dims[-1]]
49
- input_towers[i] = self.aits[i - 1](ait_input)
50
-
51
- ys = []
52
- for input_tower, tower in zip(input_towers, self.towers):
53
- y = tower(input_tower)
54
- ys.append(torch.sigmoid(y))
55
- return torch.cat(ys, dim=1)
56
-
57
-
58
- class AttentionLayer(nn.Module):
59
- """attention for info tranfer
60
-
61
- Args:
62
- dim (int): attention dim
63
-
64
- Shape:
65
- Input: (batch_size, 2, dim)
66
- Output: (batch_size, dim)
67
- """
68
-
69
- def __init__(self, dim=32):
70
- super().__init__()
71
- self.dim = dim
72
- self.q_layer = nn.Linear(dim, dim, bias=False)
73
- self.k_layer = nn.Linear(dim, dim, bias=False)
74
- self.v_layer = nn.Linear(dim, dim, bias=False)
75
- self.softmax = nn.Softmax(dim=1)
76
-
77
- def forward(self, x):
78
- Q = self.q_layer(x)
79
- K = self.k_layer(x)
80
- V = self.v_layer(x)
81
- a = torch.sum(torch.mul(Q, K), -1) / torch.sqrt(torch.tensor(self.dim))
82
- a = self.softmax(a)
83
- outputs = torch.sum(torch.mul(torch.unsqueeze(a, -1), V), dim=1)
84
- return outputs
1
+ """
2
+ Date: create on 14/05/2022
3
+ References:
4
+ paper: (KDD'2021) Modeling the Sequential Dependence among Audience Multi-step Conversions with Multi-task Learning in Targeted Display Advertising
5
+ url: https://arxiv.org/abs/2105.08489
6
+ code: https://github.com/adtalos/AITM-torch
7
+ Authors: Mincai Lai, laimincai@shanghaitech.edu.cn
8
+ """
9
+
10
+ import torch
11
+ import torch.nn as nn
12
+
13
+ from ...basic.layers import MLP, EmbeddingLayer
14
+
15
+
16
+ class AITM(nn.Module):
17
+ """ Adaptive Information Transfer Multi-task (AITM) framework.
18
+ all the task type must be binary classificatioon.
19
+
20
+ Args:
21
+ features (list[Feature Class]): training by the whole module.
22
+ n_task (int): the number of binary classificatioon task.
23
+ bottom_params (dict): the params of all the botwer expert module, keys include:`{"dims":list, "activation":str, "dropout":float}`.
24
+ tower_params_list (list): the list of tower params dict, the keys same as expert_params.
25
+ """
26
+
27
+ def __init__(self, features, n_task, bottom_params, tower_params_list):
28
+ super().__init__()
29
+ self.features = features
30
+ self.n_task = n_task
31
+ self.input_dims = sum([fea.embed_dim for fea in features])
32
+ self.embedding = EmbeddingLayer(features)
33
+
34
+ self.bottoms = nn.ModuleList(MLP(self.input_dims, output_layer=False, **bottom_params) for i in range(self.n_task))
35
+ self.towers = nn.ModuleList(MLP(bottom_params["dims"][-1], **tower_params_list[i]) for i in range(self.n_task))
36
+
37
+ self.info_gates = nn.ModuleList(MLP(bottom_params["dims"][-1], output_layer=False, dims=[bottom_params["dims"][-1]]) for i in range(self.n_task - 1))
38
+ self.aits = nn.ModuleList(AttentionLayer(bottom_params["dims"][-1]) for _ in range(self.n_task - 1))
39
+
40
+ def forward(self, x):
41
+ embed_x = self.embedding(x, self.features, squeeze_dim=True) # [batch_size, *]
42
+ input_towers = [self.bottoms[i](embed_x) for i in range(self.n_task)] # [i]:[batch_size, bottom_dims[-1]]
43
+ for i in range(1, self.n_task): # for task 1:n-1
44
+ # [batch_size,1,bottom_dims[-1]]
45
+ info = self.info_gates[i - 1](input_towers[i - 1]).unsqueeze(1)
46
+ # [batch_size, 2, bottom_dims[-1]]
47
+ ait_input = torch.cat([input_towers[i].unsqueeze(1), info], dim=1)
48
+ input_towers[i] = self.aits[i - 1](ait_input)
49
+
50
+ ys = []
51
+ for input_tower, tower in zip(input_towers, self.towers):
52
+ y = tower(input_tower)
53
+ ys.append(torch.sigmoid(y))
54
+ return torch.cat(ys, dim=1)
55
+
56
+
57
+ class AttentionLayer(nn.Module):
58
+ """attention for info tranfer
59
+
60
+ Args:
61
+ dim (int): attention dim
62
+
63
+ Shape:
64
+ Input: (batch_size, 2, dim)
65
+ Output: (batch_size, dim)
66
+ """
67
+
68
+ def __init__(self, dim=32):
69
+ super().__init__()
70
+ self.dim = dim
71
+ self.q_layer = nn.Linear(dim, dim, bias=False)
72
+ self.k_layer = nn.Linear(dim, dim, bias=False)
73
+ self.v_layer = nn.Linear(dim, dim, bias=False)
74
+ self.softmax = nn.Softmax(dim=1)
75
+
76
+ def forward(self, x):
77
+ Q = self.q_layer(x)
78
+ K = self.k_layer(x)
79
+ V = self.v_layer(x)
80
+ a = torch.sum(torch.mul(Q, K), -1) / torch.sqrt(torch.tensor(self.dim))
81
+ a = self.softmax(a)
82
+ outputs = torch.sum(torch.mul(torch.unsqueeze(a, -1), V), dim=1)
83
+ return outputs
@@ -1,55 +1,56 @@
1
- """
2
- Date: create on 04/05/2022
3
- References:
4
- paper: (SIGIR'2018) Entire Space Multi-Task Model: An Effective Approach for Estimating Post-Click Conversion Rate
5
- url: https://arxiv.org/abs/1804.07931
6
- Authors: Mincai Lai, laimincai@shanghaitech.edu.cn
7
- """
8
-
9
- import torch
10
- import torch.nn as nn
11
-
12
- from ...basic.layers import MLP, EmbeddingLayer
13
-
14
-
15
- class ESMM(nn.Module):
16
- """Entire Space Multi-Task Model
17
-
18
- Args:
19
- user_features (list): the list of `Feature Class`, training by shared bottom and tower module. It means the user features.
20
- item_features (list): the list of `Feature Class`, training by shared bottom and tower module. It means the item features.
21
- cvr_params (dict): the params of the CVR Tower module, keys include:`{"dims":list, "activation":str, "dropout":float`}
22
- ctr_params (dict): the params of the CTR Tower module, keys include:`{"dims":list, "activation":str, "dropout":float`}
23
- """
24
-
25
- def __init__(self, user_features, item_features, cvr_params, ctr_params):
26
- super().__init__()
27
- self.user_features = user_features
28
- self.item_features = item_features
29
- self.embedding = EmbeddingLayer(user_features + item_features)
30
- self.tower_dims = len(user_features) * user_features[0].embed_dim + len(item_features) * item_features[0].embed_dim
31
- self.tower_cvr = MLP(self.tower_dims, **cvr_params)
32
- self.tower_ctr = MLP(self.tower_dims, **ctr_params)
33
-
34
- def forward(self, x):
35
- # # Field-wise Pooling Layer for user and item
36
- # embed_user_features = self.embedding(x, self.user_features, squeeze_dim=False).sum(dim=1) #[batch_size, embed_dim]
37
- # embed_item_features = self.embedding(x, self.item_features, squeeze_dim=False).sum(dim=1) #[batch_size, embed_dim]
38
-
39
- # Here we concat all the features instead of field-wise pooling them
40
- # [batch_size, num_features, embed_dim] --> [batch_size, num_features * embed_dim]
41
- _batch_size = self.embedding(x, self.user_features, squeeze_dim=False).shape[0]
42
- embed_user_features = self.embedding(x, self.user_features, squeeze_dim=False).reshape(_batch_size, -1)
43
- embed_item_features = self.embedding(x, self.item_features, squeeze_dim=False).reshape(_batch_size, -1)
44
-
45
- # print('embed_user_features', embed_user_features.shape)
46
-
47
- input_tower = torch.cat((embed_user_features, embed_item_features), dim=1)
48
- cvr_logit = self.tower_cvr(input_tower)
49
- ctr_logit = self.tower_ctr(input_tower)
50
- cvr_pred = torch.sigmoid(cvr_logit)
51
- ctr_pred = torch.sigmoid(ctr_logit)
52
- ctcvr_pred = torch.mul(ctr_pred, cvr_pred)
53
-
54
- ys = [cvr_pred, ctr_pred, ctcvr_pred]
55
- return torch.cat(ys, dim=1)
1
+ """
2
+ Date: create on 04/05/2022
3
+ References:
4
+ paper: (SIGIR'2018) Entire Space Multi-Task Model: An Effective Approach for Estimating Post-Click Conversion Rate
5
+ url: https://arxiv.org/abs/1804.07931
6
+ Authors: Mincai Lai, laimincai@shanghaitech.edu.cn
7
+ """
8
+
9
+ import torch
10
+ import torch.nn as nn
11
+
12
+ from ...basic.layers import MLP, EmbeddingLayer
13
+
14
+
15
+ class ESMM(nn.Module):
16
+ """Entire Space Multi-Task Model
17
+
18
+ Args:
19
+ user_features (list): the list of `Feature Class`, training by shared bottom and tower module. It means the user features.
20
+ item_features (list): the list of `Feature Class`, training by shared bottom and tower module. It means the item features.
21
+ cvr_params (dict): the params of the CVR Tower module, keys include:`{"dims":list, "activation":str, "dropout":float`}
22
+ ctr_params (dict): the params of the CTR Tower module, keys include:`{"dims":list, "activation":str, "dropout":float`}
23
+ """
24
+
25
+ def __init__(self, user_features, item_features, cvr_params, ctr_params):
26
+ super().__init__()
27
+ self.user_features = user_features
28
+ self.item_features = item_features
29
+ self.embedding = EmbeddingLayer(user_features + item_features)
30
+ self.tower_dims = len(user_features) * user_features[0].embed_dim + len(item_features) * item_features[0].embed_dim
31
+ self.tower_cvr = MLP(self.tower_dims, **cvr_params)
32
+ self.tower_ctr = MLP(self.tower_dims, **ctr_params)
33
+
34
+ def forward(self, x):
35
+ # # Field-wise Pooling Layer for user and item
36
+ # embed_user_features = self.embedding(x, self.user_features, squeeze_dim=False).sum(dim=1) #[batch_size, embed_dim]
37
+ # embed_item_features = self.embedding(x, self.item_features,
38
+ # squeeze_dim=False).sum(dim=1) #[batch_size, embed_dim]
39
+
40
+ # Here we concat all the features instead of field-wise pooling them
41
+ # [batch_size, num_features, embed_dim] --> [batch_size, num_features * embed_dim]
42
+ _batch_size = self.embedding(x, self.user_features, squeeze_dim=False).shape[0]
43
+ embed_user_features = self.embedding(x, self.user_features, squeeze_dim=False).reshape(_batch_size, -1)
44
+ embed_item_features = self.embedding(x, self.item_features, squeeze_dim=False).reshape(_batch_size, -1)
45
+
46
+ # print('embed_user_features', embed_user_features.shape)
47
+
48
+ input_tower = torch.cat((embed_user_features, embed_item_features), dim=1)
49
+ cvr_logit = self.tower_cvr(input_tower)
50
+ ctr_logit = self.tower_ctr(input_tower)
51
+ cvr_pred = torch.sigmoid(cvr_logit)
52
+ ctr_pred = torch.sigmoid(ctr_logit)
53
+ ctcvr_pred = torch.mul(ctr_pred, cvr_pred)
54
+
55
+ ys = [cvr_pred, ctr_pred, ctcvr_pred]
56
+ return torch.cat(ys, dim=1)
@@ -1,58 +1,58 @@
1
- """
2
- Date: create on 04/05/2022
3
- References:
4
- paper: (KDD'2018) Modeling Task Relationships in Multi-task Learning with Multi-gate Mixture-of-Experts
5
- url: https://dl.acm.org/doi/pdf/10.1145/3219819.3220007
6
- Authors: Mincai Lai, laimincai@shanghaitech.edu.cn
7
- """
8
-
9
- import torch
10
- import torch.nn as nn
11
-
12
- from ...basic.layers import MLP, EmbeddingLayer, PredictionLayer
13
-
14
-
15
- class MMOE(nn.Module):
16
- """Multi-gate Mixture-of-Experts model.
17
-
18
- Args:
19
- features (list): the list of `Feature Class`, training by the expert and tower module.
20
- task_types (list): types of tasks, only support `["classfication", "regression"]`.
21
- n_expert (int): the number of expert net.
22
- expert_params (dict): the params of all the expert module, keys include:`{"dims":list, "activation":str, "dropout":float}.
23
- tower_params_list (list): the list of tower params dict, the keys same as expert_params.
24
- """
25
-
26
- def __init__(self, features, task_types, n_expert, expert_params, tower_params_list):
27
- super().__init__()
28
- self.features = features
29
- self.task_types = task_types
30
- self.n_task = len(task_types)
31
- self.n_expert = n_expert
32
- self.embedding = EmbeddingLayer(features)
33
- self.input_dims = sum([fea.embed_dim for fea in features])
34
- self.experts = nn.ModuleList(
35
- MLP(self.input_dims, output_layer=False, **expert_params) for i in range(self.n_expert))
36
- self.gates = nn.ModuleList(
37
- MLP(self.input_dims, output_layer=False, **{
38
- "dims": [self.n_expert],
39
- "activation": "softmax"
40
- }) for i in range(self.n_task)) #n_gate = n_task
41
- self.towers = nn.ModuleList(MLP(expert_params["dims"][-1], **tower_params_list[i]) for i in range(self.n_task))
42
- self.predict_layers = nn.ModuleList(PredictionLayer(task_type) for task_type in task_types)
43
-
44
- def forward(self, x):
45
- embed_x = self.embedding(x, self.features, squeeze_dim=True) #[batch_size, input_dims]
46
- expert_outs = [expert(embed_x).unsqueeze(1) for expert in self.experts
47
- ] #expert_out[i]: [batch_size, 1, expert_dims[-1]]
48
- expert_outs = torch.cat(expert_outs, dim=1) #[batch_size, n_expert, expert_dims[-1]]
49
- gate_outs = [gate(embed_x).unsqueeze(-1) for gate in self.gates] #gate_out[i]: [batch_size, n_expert, 1]
50
-
51
- ys = []
52
- for gate_out, tower, predict_layer in zip(gate_outs, self.towers, self.predict_layers):
53
- expert_weight = torch.mul(gate_out, expert_outs) #[batch_size, n_expert, expert_dims[-1]]
54
- expert_pooling = torch.sum(expert_weight, dim=1) #[batch_size, expert_dims[-1]]
55
- tower_out = tower(expert_pooling) #[batch_size, 1]
56
- y = predict_layer(tower_out) #logit -> proba
57
- ys.append(y)
58
- return torch.cat(ys, dim=1)
1
+ """
2
+ Date: create on 04/05/2022
3
+ References:
4
+ paper: (KDD'2018) Modeling Task Relationships in Multi-task Learning with Multi-gate Mixture-of-Experts
5
+ url: https://dl.acm.org/doi/pdf/10.1145/3219819.3220007
6
+ Authors: Mincai Lai, laimincai@shanghaitech.edu.cn
7
+ """
8
+
9
+ import torch
10
+ import torch.nn as nn
11
+
12
+ from ...basic.layers import MLP, EmbeddingLayer, PredictionLayer
13
+
14
+
15
+ class MMOE(nn.Module):
16
+ """Multi-gate Mixture-of-Experts model.
17
+
18
+ Args:
19
+ features (list): the list of `Feature Class`, training by the expert and tower module.
20
+ task_types (list): types of tasks, only support `["classfication", "regression"]`.
21
+ n_expert (int): the number of expert net.
22
+ expert_params (dict): the params of all the expert module, keys include:`{"dims":list, "activation":str, "dropout":float}.
23
+ tower_params_list (list): the list of tower params dict, the keys same as expert_params.
24
+ """
25
+
26
+ def __init__(self, features, task_types, n_expert, expert_params, tower_params_list):
27
+ super().__init__()
28
+ self.features = features
29
+ self.task_types = task_types
30
+ self.n_task = len(task_types)
31
+ self.n_expert = n_expert
32
+ self.embedding = EmbeddingLayer(features)
33
+ self.input_dims = sum([fea.embed_dim for fea in features])
34
+ self.experts = nn.ModuleList(MLP(self.input_dims, output_layer=False, **expert_params) for i in range(self.n_expert))
35
+ self.gates = nn.ModuleList(MLP(self.input_dims, output_layer=False, **{"dims": [self.n_expert], "activation": "softmax"}) for i in range(self.n_task)) # n_gate = n_task
36
+ self.towers = nn.ModuleList(MLP(expert_params["dims"][-1], **tower_params_list[i]) for i in range(self.n_task))
37
+ self.predict_layers = nn.ModuleList(PredictionLayer(task_type) for task_type in task_types)
38
+
39
+ def forward(self, x):
40
+ # [batch_size, input_dims]
41
+ embed_x = self.embedding(x, self.features, squeeze_dim=True)
42
+ # expert_out[i]: [batch_size, 1, expert_dims[-1]]
43
+ expert_outs = [expert(embed_x).unsqueeze(1) for expert in self.experts]
44
+ # [batch_size, n_expert, expert_dims[-1]]
45
+ expert_outs = torch.cat(expert_outs, dim=1)
46
+ # gate_out[i]: [batch_size, n_expert, 1]
47
+ gate_outs = [gate(embed_x).unsqueeze(-1) for gate in self.gates]
48
+
49
+ ys = []
50
+ for gate_out, tower, predict_layer in zip(gate_outs, self.towers, self.predict_layers):
51
+ # [batch_size, n_expert, expert_dims[-1]]
52
+ expert_weight = torch.mul(gate_out, expert_outs)
53
+ # [batch_size, expert_dims[-1]]
54
+ expert_pooling = torch.sum(expert_weight, dim=1)
55
+ tower_out = tower(expert_pooling) # [batch_size, 1]
56
+ y = predict_layer(tower_out) # logit -> proba
57
+ ys.append(y)
58
+ return torch.cat(ys, dim=1)