torch-rechub 0.0.3__py3-none-any.whl → 0.0.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. torch_rechub/__init__.py +14 -0
  2. torch_rechub/basic/activation.py +54 -54
  3. torch_rechub/basic/callback.py +33 -33
  4. torch_rechub/basic/features.py +87 -94
  5. torch_rechub/basic/initializers.py +92 -92
  6. torch_rechub/basic/layers.py +994 -720
  7. torch_rechub/basic/loss_func.py +223 -34
  8. torch_rechub/basic/metaoptimizer.py +76 -72
  9. torch_rechub/basic/metric.py +251 -250
  10. torch_rechub/models/generative/__init__.py +6 -0
  11. torch_rechub/models/generative/hllm.py +249 -0
  12. torch_rechub/models/generative/hstu.py +189 -0
  13. torch_rechub/models/matching/__init__.py +13 -11
  14. torch_rechub/models/matching/comirec.py +193 -188
  15. torch_rechub/models/matching/dssm.py +72 -66
  16. torch_rechub/models/matching/dssm_facebook.py +77 -79
  17. torch_rechub/models/matching/dssm_senet.py +28 -16
  18. torch_rechub/models/matching/gru4rec.py +85 -87
  19. torch_rechub/models/matching/mind.py +103 -101
  20. torch_rechub/models/matching/narm.py +82 -76
  21. torch_rechub/models/matching/sasrec.py +143 -140
  22. torch_rechub/models/matching/sine.py +148 -151
  23. torch_rechub/models/matching/stamp.py +81 -83
  24. torch_rechub/models/matching/youtube_dnn.py +75 -71
  25. torch_rechub/models/matching/youtube_sbc.py +98 -98
  26. torch_rechub/models/multi_task/__init__.py +7 -5
  27. torch_rechub/models/multi_task/aitm.py +83 -84
  28. torch_rechub/models/multi_task/esmm.py +56 -55
  29. torch_rechub/models/multi_task/mmoe.py +58 -58
  30. torch_rechub/models/multi_task/ple.py +116 -130
  31. torch_rechub/models/multi_task/shared_bottom.py +45 -45
  32. torch_rechub/models/ranking/__init__.py +14 -11
  33. torch_rechub/models/ranking/afm.py +65 -63
  34. torch_rechub/models/ranking/autoint.py +102 -0
  35. torch_rechub/models/ranking/bst.py +61 -63
  36. torch_rechub/models/ranking/dcn.py +38 -38
  37. torch_rechub/models/ranking/dcn_v2.py +59 -69
  38. torch_rechub/models/ranking/deepffm.py +131 -123
  39. torch_rechub/models/ranking/deepfm.py +43 -42
  40. torch_rechub/models/ranking/dien.py +191 -191
  41. torch_rechub/models/ranking/din.py +93 -91
  42. torch_rechub/models/ranking/edcn.py +101 -117
  43. torch_rechub/models/ranking/fibinet.py +42 -50
  44. torch_rechub/models/ranking/widedeep.py +41 -41
  45. torch_rechub/trainers/__init__.py +4 -3
  46. torch_rechub/trainers/ctr_trainer.py +288 -128
  47. torch_rechub/trainers/match_trainer.py +336 -170
  48. torch_rechub/trainers/matching.md +3 -0
  49. torch_rechub/trainers/mtl_trainer.py +356 -207
  50. torch_rechub/trainers/seq_trainer.py +427 -0
  51. torch_rechub/utils/data.py +492 -360
  52. torch_rechub/utils/hstu_utils.py +198 -0
  53. torch_rechub/utils/match.py +457 -274
  54. torch_rechub/utils/model_utils.py +233 -0
  55. torch_rechub/utils/mtl.py +136 -126
  56. torch_rechub/utils/onnx_export.py +220 -0
  57. torch_rechub/utils/visualization.py +271 -0
  58. torch_rechub-0.0.5.dist-info/METADATA +402 -0
  59. torch_rechub-0.0.5.dist-info/RECORD +64 -0
  60. {torch_rechub-0.0.3.dist-info → torch_rechub-0.0.5.dist-info}/WHEEL +1 -2
  61. {torch_rechub-0.0.3.dist-info → torch_rechub-0.0.5.dist-info/licenses}/LICENSE +21 -21
  62. torch_rechub-0.0.3.dist-info/METADATA +0 -177
  63. torch_rechub-0.0.3.dist-info/RECORD +0 -55
  64. torch_rechub-0.0.3.dist-info/top_level.txt +0 -1
@@ -1,130 +1,116 @@
1
- """
2
- Date: create on 05/05/2022
3
- References:
4
- paper: (RecSys'2020) Progressive Layered Extraction (PLE): A Novel Multi-Task Learning (MTL) Model for Personalized Recommendations
5
- url: https://dl.acm.org/doi/abs/10.1145/3383313.3412236
6
- Authors: Mincai Lai, laimincai@shanghaitech.edu.cn
7
- """
8
-
9
- import torch
10
- import torch.nn as nn
11
-
12
- from ...basic.layers import MLP, EmbeddingLayer, PredictionLayer
13
-
14
-
15
- class PLE(nn.Module):
16
- """Progressive Layered Extraction model.
17
-
18
- Args:
19
- features (list): the list of `Feature Class`, training by the expert and tower module.
20
- task_types (list): types of tasks, only support `["classfication", "regression"]`.
21
- n_level (int): the number of CGC layer.
22
- n_expert_specific (int): the number of task-specific expert net.
23
- n_expert_shared (int): the number of task-shared expert net.
24
- expert_params (dict): the params of all the expert module, keys include:`{"dims":list, "activation":str, "dropout":float}.
25
- tower_params_list (list): the list of tower params dict, the keys same as expert_params.
26
- """
27
-
28
- def __init__(self, features, task_types, n_level, n_expert_specific, n_expert_shared, expert_params,
29
- tower_params_list):
30
- super().__init__()
31
- self.features = features
32
- self.n_task = len(task_types)
33
- self.task_types = task_types
34
- self.n_level = n_level
35
- self.input_dims = sum([fea.embed_dim for fea in features])
36
- self.embedding = EmbeddingLayer(features)
37
- self.cgc_layers = nn.ModuleList(
38
- CGC(i + 1, n_level, self.n_task, n_expert_specific, n_expert_shared, self.input_dims, expert_params)
39
- for i in range(n_level))
40
- self.towers = nn.ModuleList(
41
- MLP(expert_params["dims"][-1], output_layer=False, **tower_params_list[i]) for i in range(self.n_task))
42
- self.predict_layers = nn.ModuleList(PredictionLayer(task_type) for task_type in task_types)
43
-
44
- def forward(self, x):
45
- embed_x = self.embedding(x, self.features, squeeze_dim=True) #[batch_size, input_dims]
46
- ple_inputs = [embed_x] * (self.n_task + 1)
47
- ple_outs = []
48
- for i in range(self.n_level):
49
- ple_outs = self.cgc_layers[i](ple_inputs) #ple_outs[i]: [batch_size, expert_dims[-1]]
50
- ple_inputs = ple_outs
51
- #predict
52
- ys = []
53
- for ple_out, tower, predict_layer in zip(ple_outs, self.towers, self.predict_layers):
54
- tower_out = tower(ple_out) #[batch_size, 1]
55
- y = predict_layer(tower_out) #logit -> proba
56
- ys.append(y)
57
- return torch.cat(ys, dim=1)
58
-
59
-
60
- class CGC(nn.Module):
61
- """Customized Gate Control (CGC) Model mentioned in PLE paper.
62
-
63
- Args:
64
- cur_level (int): the current level of CGC in PLE.
65
- n_level (int): the number of CGC layer.
66
- n_task (int): the number of tasks.
67
- n_expert_specific (int): the number of task-specific expert net.
68
- n_expert_shared (int): the number of task-shared expert net.
69
- input_dims (int): the input dims of the xpert module in current CGC layer.
70
- expert_params (dict): the params of all the expert module, keys include:`{"dims":list, "activation":str, "dropout":float}.
71
- """
72
-
73
- def __init__(self, cur_level, n_level, n_task, n_expert_specific, n_expert_shared, input_dims, expert_params):
74
- super().__init__()
75
- self.cur_level = cur_level # the CGC level of PLE
76
- self.n_level = n_level
77
- self.n_task = n_task
78
- self.n_expert_specific = n_expert_specific
79
- self.n_expert_shared = n_expert_shared
80
- self.n_expert_all = n_expert_specific * self.n_task + n_expert_shared
81
- input_dims = input_dims if cur_level == 1 else expert_params["dims"][
82
- -1] #the first layer expert dim is the input data dim other expert dim
83
- self.experts_specific = nn.ModuleList(
84
- MLP(input_dims, output_layer=False, **expert_params) for _ in range(self.n_task * self.n_expert_specific))
85
- self.experts_shared = nn.ModuleList(
86
- MLP(input_dims, output_layer=False, **expert_params) for _ in range(self.n_expert_shared))
87
- self.gates_specific = nn.ModuleList(
88
- MLP(
89
- input_dims, **{
90
- "dims": [self.n_expert_specific + self.n_expert_shared],
91
- "activation": "softmax",
92
- "output_layer": False
93
- }) for _ in range(self.n_task)) #n_gate_specific = n_task
94
- if cur_level < n_level:
95
- self.gate_shared = MLP(input_dims, **{
96
- "dims": [self.n_expert_all],
97
- "activation": "softmax",
98
- "output_layer": False
99
- }) #n_gate_specific = n_task
100
-
101
- def forward(self, x_list):
102
- expert_specific_outs = [] #expert_out[i]: [batch_size, 1, expert_dims[-1]]
103
- for i in range(self.n_task):
104
- expert_specific_outs.extend([
105
- expert(x_list[i]).unsqueeze(1)
106
- for expert in self.experts_specific[i * self.n_expert_specific:(i + 1) * self.n_expert_specific]
107
- ])
108
- expert_shared_outs = [expert(x_list[-1]).unsqueeze(1) for expert in self.experts_shared
109
- ] #x_list[-1]: the input for shared experts
110
- gate_specific_outs = [gate(x_list[i]).unsqueeze(-1) for i, gate in enumerate(self.gates_specific)
111
- ] #gate_out[i]: [batch_size, n_expert_specific+n_expert_shared, 1]
112
- cgc_outs = []
113
- for i, gate_out in enumerate(gate_specific_outs):
114
- cur_expert_list = expert_specific_outs[i * self.n_expert_specific:(i + 1) *
115
- self.n_expert_specific] + expert_shared_outs
116
- expert_concat = torch.cat(cur_expert_list,
117
- dim=1) #[batch_size, n_expert_specific+n_expert_shared, expert_dims[-1]]
118
- expert_weight = torch.mul(gate_out,
119
- expert_concat) #[batch_size, n_expert_specific+n_expert_shared, expert_dims[-1]]
120
- expert_pooling = torch.sum(expert_weight, dim=1) #[batch_size, expert_dims[-1]]
121
- cgc_outs.append(expert_pooling) #length: n_task
122
- if self.cur_level < self.n_level: #not the last layer
123
- gate_shared_out = self.gate_shared(x_list[-1]).unsqueeze(-1) #[batch_size, n_expert_all, 1]
124
- expert_concat = torch.cat(expert_specific_outs + expert_shared_outs,
125
- dim=1) #[batch_size, n_expert_all, expert_dims[-1]]
126
- expert_weight = torch.mul(gate_shared_out, expert_concat) #[batch_size, n_expert_all, expert_dims[-1]]
127
- expert_pooling = torch.sum(expert_weight, dim=1) #[batch_size, expert_dims[-1]]
128
- cgc_outs.append(expert_pooling) #length: n_task+1
129
-
130
- return cgc_outs
1
+ """
2
+ Date: create on 05/05/2022
3
+ References:
4
+ paper: (RecSys'2020) Progressive Layered Extraction (PLE): A Novel Multi-Task Learning (MTL) Model for Personalized Recommendations
5
+ url: https://dl.acm.org/doi/abs/10.1145/3383313.3412236
6
+ Authors: Mincai Lai, laimincai@shanghaitech.edu.cn
7
+ """
8
+
9
+ import torch
10
+ import torch.nn as nn
11
+
12
+ from ...basic.layers import MLP, EmbeddingLayer, PredictionLayer
13
+
14
+
15
+ class PLE(nn.Module):
16
+ """Progressive Layered Extraction model.
17
+
18
+ Args:
19
+ features (list): the list of `Feature Class`, training by the expert and tower module.
20
+ task_types (list): types of tasks, only support `["classfication", "regression"]`.
21
+ n_level (int): the number of CGC layer.
22
+ n_expert_specific (int): the number of task-specific expert net.
23
+ n_expert_shared (int): the number of task-shared expert net.
24
+ expert_params (dict): the params of all the expert module, keys include:`{"dims":list, "activation":str, "dropout":float}.
25
+ tower_params_list (list): the list of tower params dict, the keys same as expert_params.
26
+ """
27
+
28
+ def __init__(self, features, task_types, n_level, n_expert_specific, n_expert_shared, expert_params, tower_params_list):
29
+ super().__init__()
30
+ self.features = features
31
+ self.n_task = len(task_types)
32
+ self.task_types = task_types
33
+ self.n_level = n_level
34
+ self.input_dims = sum([fea.embed_dim for fea in features])
35
+ self.embedding = EmbeddingLayer(features)
36
+ self.cgc_layers = nn.ModuleList(CGC(i + 1, n_level, self.n_task, n_expert_specific, n_expert_shared, self.input_dims, expert_params) for i in range(n_level))
37
+ self.towers = nn.ModuleList(MLP(expert_params["dims"][-1], output_layer=False, **tower_params_list[i]) for i in range(self.n_task))
38
+ self.predict_layers = nn.ModuleList(PredictionLayer(task_type) for task_type in task_types)
39
+
40
+ def forward(self, x):
41
+ # [batch_size, input_dims]
42
+ embed_x = self.embedding(x, self.features, squeeze_dim=True)
43
+ ple_inputs = [embed_x] * (self.n_task + 1)
44
+ ple_outs = []
45
+ for i in range(self.n_level):
46
+ # ple_outs[i]: [batch_size, expert_dims[-1]]
47
+ ple_outs = self.cgc_layers[i](ple_inputs)
48
+ ple_inputs = ple_outs
49
+
50
+
51
+ # predict
52
+ ys = []
53
+ for ple_out, tower, predict_layer in zip(ple_outs, self.towers, self.predict_layers):
54
+ tower_out = tower(ple_out) # [batch_size, 1]
55
+ y = predict_layer(tower_out) # logit -> proba
56
+ ys.append(y)
57
+ return torch.cat(ys, dim=1)
58
+
59
+
60
+ class CGC(nn.Module):
61
+ """Customized Gate Control (CGC) Model mentioned in PLE paper.
62
+
63
+ Args:
64
+ cur_level (int): the current level of CGC in PLE.
65
+ n_level (int): the number of CGC layer.
66
+ n_task (int): the number of tasks.
67
+ n_expert_specific (int): the number of task-specific expert net.
68
+ n_expert_shared (int): the number of task-shared expert net.
69
+ input_dims (int): the input dims of the xpert module in current CGC layer.
70
+ expert_params (dict): the params of all the expert module, keys include:`{"dims":list, "activation":str, "dropout":float}.
71
+ """
72
+
73
+ def __init__(self, cur_level, n_level, n_task, n_expert_specific, n_expert_shared, input_dims, expert_params):
74
+ super().__init__()
75
+ self.cur_level = cur_level # the CGC level of PLE
76
+ self.n_level = n_level
77
+ self.n_task = n_task
78
+ self.n_expert_specific = n_expert_specific
79
+ self.n_expert_shared = n_expert_shared
80
+ self.n_expert_all = n_expert_specific * self.n_task + n_expert_shared
81
+ # the first layer expert dim is the input data dim other expert dim
82
+ input_dims = input_dims if cur_level == 1 else expert_params["dims"][-1]
83
+ self.experts_specific = nn.ModuleList(MLP(input_dims, output_layer=False, **expert_params) for _ in range(self.n_task * self.n_expert_specific))
84
+ self.experts_shared = nn.ModuleList(MLP(input_dims, output_layer=False, **expert_params) for _ in range(self.n_expert_shared))
85
+ self.gates_specific = nn.ModuleList(MLP(input_dims, **{"dims": [self.n_expert_specific + self.n_expert_shared], "activation": "softmax", "output_layer": False}) for _ in range(self.n_task)) # n_gate_specific = n_task
86
+ if cur_level < n_level:
87
+ self.gate_shared = MLP(input_dims, **{"dims": [self.n_expert_all], "activation": "softmax", "output_layer": False}) # n_gate_specific = n_task
88
+
89
+ def forward(self, x_list):
90
+ expert_specific_outs = [] # expert_out[i]: [batch_size, 1, expert_dims[-1]]
91
+ for i in range(self.n_task):
92
+ expert_specific_outs.extend([expert(x_list[i]).unsqueeze(1) for expert in self.experts_specific[i * self.n_expert_specific:(i + 1) * self.n_expert_specific]])
93
+ # x_list[-1]: the input for shared experts
94
+ expert_shared_outs = [expert(x_list[-1]).unsqueeze(1) for expert in self.experts_shared]
95
+ # gate_out[i]: [batch_size, n_expert_specific+n_expert_shared, 1]
96
+ gate_specific_outs = [gate(x_list[i]).unsqueeze(-1) for i, gate in enumerate(self.gates_specific)]
97
+ cgc_outs = []
98
+ for i, gate_out in enumerate(gate_specific_outs):
99
+ cur_expert_list = expert_specific_outs[i * self.n_expert_specific:(i + 1) * self.n_expert_specific] + expert_shared_outs
100
+ # [batch_size, n_expert_specific+n_expert_shared, expert_dims[-1]]
101
+ expert_concat = torch.cat(cur_expert_list, dim=1)
102
+ # [batch_size, n_expert_specific+n_expert_shared, expert_dims[-1]]
103
+ expert_weight = torch.mul(gate_out, expert_concat)
104
+ # [batch_size, expert_dims[-1]]
105
+ expert_pooling = torch.sum(expert_weight, dim=1)
106
+ cgc_outs.append(expert_pooling) # length: n_task
107
+ if self.cur_level < self.n_level: # not the last layer
108
+ gate_shared_out = self.gate_shared(x_list[-1]).unsqueeze(-1) # [batch_size, n_expert_all, 1]
109
+ expert_concat = torch.cat(expert_specific_outs + expert_shared_outs, dim=1) # [batch_size, n_expert_all, expert_dims[-1]]
110
+ # [batch_size, n_expert_all, expert_dims[-1]]
111
+ expert_weight = torch.mul(gate_shared_out, expert_concat)
112
+ # [batch_size, expert_dims[-1]]
113
+ expert_pooling = torch.sum(expert_weight, dim=1)
114
+ cgc_outs.append(expert_pooling) # length: n_task+1
115
+
116
+ return cgc_outs
@@ -1,45 +1,45 @@
1
- """
2
- Date: create on 04/05/2022
3
- Reference:
4
- paper: Caruana, R. (1997). Multitask learning. Machine learning, 28(1), 41-75.
5
- Authors: Mincai Lai, laimincai@shanghaitech.edu.cn
6
- """
7
-
8
- import torch
9
- import torch.nn as nn
10
-
11
- from ...basic.layers import MLP, EmbeddingLayer, PredictionLayer
12
-
13
-
14
- class SharedBottom(nn.Module):
15
- """Shared Bottom multi task model.
16
-
17
- Args:
18
- features (list): the list of `Feature Class`, training by the bottom and tower module.
19
- task_types (list): types of tasks, only support `["classfication", "regression"]`.
20
- bottom_params (dict): the params of the last MLP module, keys include:`{"dims":list, "activation":str, "dropout":float}, keep `{"output_layer":False}`.
21
- tower_params_list (list): the list of tower params dict, the keys same as bottom_params.
22
- """
23
-
24
- def __init__(self, features, task_types, bottom_params, tower_params_list):
25
- super().__init__()
26
- self.features = features
27
- self.task_types = task_types
28
- self.embedding = EmbeddingLayer(features)
29
- self.bottom_dims = sum([fea.embed_dim for fea in features])
30
-
31
- self.bottom_mlp = MLP(self.bottom_dims, **{**bottom_params, **{"output_layer": False}})
32
- self.towers = nn.ModuleList(
33
- MLP(bottom_params["dims"][-1], **tower_params_list[i]) for i in range(len(task_types)))
34
- self.predict_layers = nn.ModuleList(PredictionLayer(task_type) for task_type in task_types)
35
-
36
- def forward(self, x):
37
- input_bottom = self.embedding(x, self.features, squeeze_dim=True)
38
- x = self.bottom_mlp(input_bottom)
39
-
40
- ys = []
41
- for tower, predict_layer in zip(self.towers, self.predict_layers):
42
- tower_out = tower(x)
43
- y = predict_layer(tower_out) #regression->keep, binary classification->sigmoid
44
- ys.append(y)
45
- return torch.cat(ys, dim=1)
1
+ """
2
+ Date: create on 04/05/2022
3
+ Reference:
4
+ paper: Caruana, R. (1997). Multitask learning. Machine learning, 28(1), 41-75.
5
+ Authors: Mincai Lai, laimincai@shanghaitech.edu.cn
6
+ """
7
+
8
+ import torch
9
+ import torch.nn as nn
10
+
11
+ from ...basic.layers import MLP, EmbeddingLayer, PredictionLayer
12
+
13
+
14
+ class SharedBottom(nn.Module):
15
+ """Shared Bottom multi task model.
16
+
17
+ Args:
18
+ features (list): the list of `Feature Class`, training by the bottom and tower module.
19
+ task_types (list): types of tasks, only support `["classfication", "regression"]`.
20
+ bottom_params (dict): the params of the last MLP module, keys include:`{"dims":list, "activation":str, "dropout":float}, keep `{"output_layer":False}`.
21
+ tower_params_list (list): the list of tower params dict, the keys same as bottom_params.
22
+ """
23
+
24
+ def __init__(self, features, task_types, bottom_params, tower_params_list):
25
+ super().__init__()
26
+ self.features = features
27
+ self.task_types = task_types
28
+ self.embedding = EmbeddingLayer(features)
29
+ self.bottom_dims = sum([fea.embed_dim for fea in features])
30
+
31
+ self.bottom_mlp = MLP(self.bottom_dims, **{**bottom_params, **{"output_layer": False}})
32
+ self.towers = nn.ModuleList(MLP(bottom_params["dims"][-1], **tower_params_list[i]) for i in range(len(task_types)))
33
+ self.predict_layers = nn.ModuleList(PredictionLayer(task_type) for task_type in task_types)
34
+
35
+ def forward(self, x):
36
+ input_bottom = self.embedding(x, self.features, squeeze_dim=True)
37
+ x = self.bottom_mlp(input_bottom)
38
+
39
+ ys = []
40
+ for tower, predict_layer in zip(self.towers, self.predict_layers):
41
+ tower_out = tower(x)
42
+ # regression->keep, binary classification->sigmoid
43
+ y = predict_layer(tower_out)
44
+ ys.append(y)
45
+ return torch.cat(ys, dim=1)
@@ -1,11 +1,14 @@
1
- from .widedeep import WideDeep
2
- from .deepfm import DeepFM
3
- from .din import DIN
4
- from .dcn import DCN
5
- from .dcn_v2 import DCNv2
6
- from .edcn import EDCN
7
- from .deepffm import DeepFFM, FatDeepFFM
8
- from .fibinet import FiBiNet
9
- from .bst import BST
10
- from .afm import AFM
11
- from .dien import DIEN
1
+ __all__ = ['WideDeep', 'DeepFM', 'DCN', 'DCNv2', 'EDCN', 'AFM', 'FiBiNet', 'DeepFFM', 'BST', 'DIN', 'DIEN', 'FatDeepFFM', 'AutoInt']
2
+
3
+ from .afm import AFM
4
+ from .autoint import AutoInt
5
+ from .bst import BST
6
+ from .dcn import DCN
7
+ from .dcn_v2 import DCNv2
8
+ from .deepffm import DeepFFM, FatDeepFFM
9
+ from .deepfm import DeepFM
10
+ from .dien import DIEN
11
+ from .din import DIN
12
+ from .edcn import EDCN
13
+ from .fibinet import FiBiNet
14
+ from .widedeep import WideDeep
@@ -1,63 +1,65 @@
1
- """
2
- Date: create on 23/04/2024
3
- References:
4
- paper: (IJCAI'2017) Attentional Factorization Machines:Learning the Weight of Feature Interactions via Attention Networks
5
- url: https://arxiv.org/abs/1708.04617
6
- Authors: Tao Fan, thisisevy@foxmail.com
7
- """
8
-
9
- import torch
10
- from ...basic.layers import FM, MLP, LR, EmbeddingLayer
11
- from torch import nn
12
- from torch.nn import Parameter, init
13
-
14
-
15
- class AFM(nn.Module):
16
- """Attentional Factorization Machine Model
17
-
18
- Args:
19
- fm_features (list): the list of `Feature Class`, training by the fm part module.
20
- embed_dim (int): the dimension of input embedding.
21
- t (int): the size of the hidden layer in the attention network.
22
- """
23
-
24
- def __init__(self, fm_features, embed_dim, t=64):
25
- super(AFM, self).__init__()
26
- self.fm_features = fm_features
27
- self.embed_dim = embed_dim
28
- self.fm_dims = sum([fea.embed_dim for fea in fm_features])
29
- self.linear = LR(self.fm_dims) # 1-odrder interaction
30
- self.fm = FM(reduce_sum=False) # 2-odrder interaction
31
- self.embedding = EmbeddingLayer(fm_features)
32
-
33
- # 注意力计算中的线性层
34
- self.attention_liner = nn.Linear(self.embed_dim, t)
35
- # AFM公式中的h
36
- self.h = init.xavier_uniform_(Parameter(torch.empty(t, 1)))
37
- # AFM公式中的p
38
- self.p = init.xavier_uniform_(Parameter(torch.empty(self.embed_dim, 1)))
39
-
40
- def attention(self, y_fm):
41
- # embs: [ batch_size, k ]
42
- # [ batch_size, t ]
43
- y_fm = self.attention_liner(y_fm)
44
- # [ batch_size, t ]
45
- y_fm = torch.relu(y_fm)
46
- # [ batch_size, 1 ]
47
- y_fm = torch.matmul(y_fm, self.h)
48
- # [ batch_size, 1 ]
49
- atts = torch.softmax(y_fm, dim=1)
50
- return atts
51
-
52
- def forward(self, x):
53
- input_fm = self.embedding(x, self.fm_features, squeeze_dim=False) # [batch_size, num_fields, embed_dim]
54
-
55
- y_linear = self.linear(input_fm.flatten(start_dim=1))
56
- y_fm = self.fm(input_fm)
57
- # 得到注意力
58
- atts = self.attention(y_fm)
59
- # [ batch_size, 1 ]
60
- outs = torch.matmul(atts * y_fm, self.p)
61
- # print(y_linear.size(), outs.size())
62
- y = y_linear + outs
63
- return torch.sigmoid(y.squeeze(1))
1
+ """
2
+ Date: create on 23/04/2024
3
+ References:
4
+ paper: (IJCAI'2017) Attentional Factorization Machines:Learning the Weight of Feature Interactions via Attention Networks
5
+ url: https://arxiv.org/abs/1708.04617
6
+ Authors: Tao Fan, thisisevy@foxmail.com
7
+ """
8
+
9
+ import torch
10
+ from torch import nn
11
+ from torch.nn import Parameter, init
12
+
13
+ from ...basic.layers import FM, LR, MLP, EmbeddingLayer
14
+
15
+
16
+ class AFM(nn.Module):
17
+ """Attentional Factorization Machine Model
18
+
19
+ Args:
20
+ fm_features (list): the list of `Feature Class`, training by the fm part module.
21
+ embed_dim (int): the dimension of input embedding.
22
+ t (int): the size of the hidden layer in the attention network.
23
+ """
24
+
25
+ def __init__(self, fm_features, embed_dim, t=64):
26
+ super(AFM, self).__init__()
27
+ self.fm_features = fm_features
28
+ self.embed_dim = embed_dim
29
+ self.fm_dims = sum([fea.embed_dim for fea in fm_features])
30
+ self.linear = LR(self.fm_dims) # 1-odrder interaction
31
+ self.fm = FM(reduce_sum=False) # 2-odrder interaction
32
+ self.embedding = EmbeddingLayer(fm_features)
33
+
34
+ # 注意力计算中的线性层
35
+ self.attention_liner = nn.Linear(self.embed_dim, t)
36
+ # AFM公式中的h
37
+ self.h = init.xavier_uniform_(Parameter(torch.empty(t, 1)))
38
+ # AFM公式中的p
39
+ self.p = init.xavier_uniform_(Parameter(torch.empty(self.embed_dim, 1)))
40
+
41
+ def attention(self, y_fm):
42
+ # embs: [ batch_size, k ]
43
+ # [ batch_size, t ]
44
+ y_fm = self.attention_liner(y_fm)
45
+ # [ batch_size, t ]
46
+ y_fm = torch.relu(y_fm)
47
+ # [ batch_size, 1 ]
48
+ y_fm = torch.matmul(y_fm, self.h)
49
+ # [ batch_size, 1 ]
50
+ atts = torch.softmax(y_fm, dim=1)
51
+ return atts
52
+
53
+ def forward(self, x):
54
+ # [batch_size, num_fields, embed_dim]
55
+ input_fm = self.embedding(x, self.fm_features, squeeze_dim=False)
56
+
57
+ y_linear = self.linear(input_fm.flatten(start_dim=1))
58
+ y_fm = self.fm(input_fm)
59
+ # 得到注意力
60
+ atts = self.attention(y_fm)
61
+ # [ batch_size, 1 ]
62
+ outs = torch.matmul(atts * y_fm, self.p)
63
+ # print(y_linear.size(), outs.size())
64
+ y = y_linear + outs
65
+ return torch.sigmoid(y.squeeze(1))
@@ -0,0 +1,102 @@
1
+ """
2
+ Date: create on 14/11/2025
3
+ References:
4
+ paper: (CIKM'2019) AutoInt: Automatic Feature Interaction Learning via Self-Attentive Neural Networks
5
+ url: https://arxiv.org/abs/1810.11921
6
+ Authors: Yang Zhou, zyaztec@gmail.com
7
+ """
8
+
9
+ import torch
10
+ import torch.nn as nn
11
+
12
+ from ...basic.layers import LR, MLP, EmbeddingLayer, InteractingLayer
13
+
14
+
15
+ class AutoInt(torch.nn.Module):
16
+ """AutoInt Model
17
+
18
+ Args:
19
+ sparse_features (list): the list of `SparseFeature` Class
20
+ dense_features (list): the list of `DenseFeature` Class
21
+ num_layers (int): number of interacting layers
22
+ num_heads (int): number of attention heads
23
+ dropout (float): dropout rate for attention
24
+ mlp_params (dict): parameters for MLP, keys: {"dims":list, "activation":str,
25
+ "dropout":float, "output_layer":bool"}
26
+ """
27
+
28
+ def __init__(self, sparse_features, dense_features, num_layers=3, num_heads=2, dropout=0.0, mlp_params=None):
29
+ super(AutoInt, self).__init__()
30
+ self.sparse_features = sparse_features
31
+
32
+ self.dense_features = dense_features if dense_features is not None else []
33
+ embed_dims = [fea.embed_dim for fea in self.sparse_features]
34
+ self.embed_dim = embed_dims[0]
35
+ if len(self.sparse_features) == 0:
36
+ raise ValueError("AutoInt requires at least one sparse feature to determine embed_dim.")
37
+
38
+ # field nums = sparse + dense
39
+ self.num_sparse = len(self.sparse_features)
40
+ self.num_dense = len(self.dense_features)
41
+ self.num_fields = self.num_sparse + self.num_dense
42
+
43
+ # total dims = num_fields * embed_dim
44
+ self.dims = self.num_fields * self.embed_dim
45
+ self.num_layers = num_layers
46
+
47
+ self.sparse_embedding = EmbeddingLayer(self.sparse_features)
48
+
49
+ # dense feature embedding
50
+ self.dense_embeddings = nn.ModuleDict()
51
+ for fea in self.dense_features:
52
+ self.dense_embeddings[fea.name] = nn.Linear(1, self.embed_dim, bias=False)
53
+
54
+ self.interacting_layers = torch.nn.ModuleList([InteractingLayer(self.embed_dim, num_heads=num_heads, dropout=dropout, residual=True) for _ in range(num_layers)])
55
+
56
+ self.linear = LR(self.dims)
57
+
58
+ self.attn_linear = nn.Linear(self.dims, 1)
59
+
60
+ if mlp_params is not None:
61
+ self.use_mlp = True
62
+ self.mlp = MLP(self.dims, **mlp_params)
63
+ else:
64
+ self.use_mlp = False
65
+
66
+ def forward(self, x):
67
+ # sparse feature embedding: [B, num_sparse, embed_dim]
68
+ sparse_emb = self.sparse_embedding(x, self.sparse_features, squeeze_dim=False)
69
+
70
+ dense_emb_list = []
71
+ for fea in self.dense_features:
72
+ v = x[fea.name].float().view(-1, 1, 1)
73
+ dense_emb = self.dense_embeddings[fea.name](v) # [B, 1, embed_dim]
74
+ dense_emb_list.append(dense_emb)
75
+
76
+ if len(dense_emb_list) > 0:
77
+ dense_emb = torch.cat(dense_emb_list, dim=1) # [B, num_dense, d]
78
+ embed_x = torch.cat([sparse_emb, dense_emb], dim=1) # [B, num_fields, d]
79
+ else:
80
+ embed_x = sparse_emb # [B, num_sparse, d]
81
+
82
+ embed_x_flatten = embed_x.flatten(start_dim=1) # [B, num_fields * embed_dim]
83
+
84
+ # Multi-head self-attention layers
85
+ attn_out = embed_x
86
+ for layer in self.interacting_layers:
87
+ attn_out = layer(attn_out) # [B, num_fields, embed_dim]
88
+
89
+ # Attention linear
90
+ attn_out_flatten = attn_out.flatten(start_dim=1) # [B, num_fields * embed_dim]
91
+ y_attn = self.attn_linear(attn_out_flatten) # [B, 1]
92
+
93
+ # Linear part
94
+ y_linear = self.linear(embed_x_flatten) # [B, 1]
95
+
96
+ # Deep MLP
97
+ y = y_attn + y_linear
98
+ if self.use_mlp:
99
+ y_deep = self.mlp(embed_x_flatten) # [B, 1]
100
+ y = y + y_deep
101
+
102
+ return torch.sigmoid(y.squeeze(1))