torch-rechub 0.0.1__py3-none-any.whl → 0.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- torch_rechub/basic/activation.py +54 -52
- torch_rechub/basic/callback.py +32 -32
- torch_rechub/basic/features.py +94 -57
- torch_rechub/basic/initializers.py +92 -0
- torch_rechub/basic/layers.py +720 -240
- torch_rechub/basic/loss_func.py +34 -0
- torch_rechub/basic/metaoptimizer.py +72 -0
- torch_rechub/basic/metric.py +250 -0
- torch_rechub/models/matching/__init__.py +11 -0
- torch_rechub/models/matching/comirec.py +188 -0
- torch_rechub/models/matching/dssm.py +66 -0
- torch_rechub/models/matching/dssm_facebook.py +79 -0
- torch_rechub/models/matching/dssm_senet.py +75 -0
- torch_rechub/models/matching/gru4rec.py +87 -0
- torch_rechub/models/matching/mind.py +101 -0
- torch_rechub/models/matching/narm.py +76 -0
- torch_rechub/models/matching/sasrec.py +140 -0
- torch_rechub/models/matching/sine.py +151 -0
- torch_rechub/models/matching/stamp.py +83 -0
- torch_rechub/models/matching/youtube_dnn.py +71 -0
- torch_rechub/models/matching/youtube_sbc.py +98 -0
- torch_rechub/models/multi_task/__init__.py +5 -4
- torch_rechub/models/multi_task/aitm.py +84 -0
- torch_rechub/models/multi_task/esmm.py +55 -45
- torch_rechub/models/multi_task/mmoe.py +58 -52
- torch_rechub/models/multi_task/ple.py +130 -104
- torch_rechub/models/multi_task/shared_bottom.py +45 -44
- torch_rechub/models/ranking/__init__.py +11 -3
- torch_rechub/models/ranking/afm.py +63 -0
- torch_rechub/models/ranking/bst.py +63 -0
- torch_rechub/models/ranking/dcn.py +38 -0
- torch_rechub/models/ranking/dcn_v2.py +69 -0
- torch_rechub/models/ranking/deepffm.py +123 -0
- torch_rechub/models/ranking/deepfm.py +41 -41
- torch_rechub/models/ranking/dien.py +191 -0
- torch_rechub/models/ranking/din.py +91 -81
- torch_rechub/models/ranking/edcn.py +117 -0
- torch_rechub/models/ranking/fibinet.py +50 -0
- torch_rechub/models/ranking/widedeep.py +41 -41
- torch_rechub/trainers/__init__.py +2 -1
- torch_rechub/trainers/{trainer.py → ctr_trainer.py} +128 -111
- torch_rechub/trainers/match_trainer.py +170 -0
- torch_rechub/trainers/mtl_trainer.py +206 -144
- torch_rechub/utils/__init__.py +0 -0
- torch_rechub/utils/data.py +360 -0
- torch_rechub/utils/match.py +274 -0
- torch_rechub/utils/mtl.py +126 -0
- {torch_rechub-0.0.1.dist-info → torch_rechub-0.0.3.dist-info}/LICENSE +21 -21
- torch_rechub-0.0.3.dist-info/METADATA +177 -0
- torch_rechub-0.0.3.dist-info/RECORD +55 -0
- {torch_rechub-0.0.1.dist-info → torch_rechub-0.0.3.dist-info}/WHEEL +1 -1
- torch_rechub/basic/utils.py +0 -168
- torch_rechub-0.0.1.dist-info/METADATA +0 -105
- torch_rechub-0.0.1.dist-info/RECORD +0 -26
- {torch_rechub-0.0.1.dist-info → torch_rechub-0.0.3.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Date: create on 09/01/2022
|
|
3
|
+
References:
|
|
4
|
+
paper: (WWW'21) Dcn v2: Improved deep & cross network and practical lessons for web-scale learning to rank systems
|
|
5
|
+
url: https://arxiv.org/abs/2008.13535
|
|
6
|
+
Authors: lailai, lailai_zxy@tju.edu.cn
|
|
7
|
+
"""
|
|
8
|
+
import torch
|
|
9
|
+
from ...basic.layers import LR, MLP,CrossNetV2, CrossNetMix, EmbeddingLayer
|
|
10
|
+
|
|
11
|
+
class DCNv2(torch.nn.Module):
|
|
12
|
+
"""Deep & Cross Network with a mixture of low-rank architecture
|
|
13
|
+
|
|
14
|
+
Args:
|
|
15
|
+
features (list[Feature Class]): training by the whole module.
|
|
16
|
+
n_cross_layers (int) : the number of layers of feature intersection layers
|
|
17
|
+
mlp_params (dict): the params of the last MLP module, keys include:`{"dims":list, "activation":str, "dropout":float, "output_layer":bool`}
|
|
18
|
+
use_low_rank_mixture (bool): True, whether to use a mixture of low-rank architecture
|
|
19
|
+
low_rank (int): the rank size of low-rank matrices
|
|
20
|
+
num_experts (int): the number of expert networks
|
|
21
|
+
"""
|
|
22
|
+
def __init__(self,
|
|
23
|
+
features,
|
|
24
|
+
n_cross_layers,
|
|
25
|
+
mlp_params,
|
|
26
|
+
model_structure="parallel",
|
|
27
|
+
use_low_rank_mixture=True,
|
|
28
|
+
low_rank=32,
|
|
29
|
+
num_experts=4,
|
|
30
|
+
**kwargs):
|
|
31
|
+
super(DCNv2, self).__init__()
|
|
32
|
+
self.features = features
|
|
33
|
+
self.dims = sum([fea.embed_dim for fea in features])
|
|
34
|
+
self.embedding = EmbeddingLayer(features)
|
|
35
|
+
if use_low_rank_mixture:
|
|
36
|
+
self.crossnet = CrossNetMix(self.dims, n_cross_layers, low_rank=low_rank, num_experts=num_experts)
|
|
37
|
+
else:
|
|
38
|
+
self.crossnet = CrossNetV2(self.dims, n_cross_layers)
|
|
39
|
+
self.model_structure = model_structure
|
|
40
|
+
assert self.model_structure in ["crossnet_only", "stacked", "parallel"], \
|
|
41
|
+
"model_structure={} not supported!".format(self.model_structure)
|
|
42
|
+
if self.model_structure == "stacked":
|
|
43
|
+
self.stacked_dnn = MLP(self.dims,
|
|
44
|
+
output_layer=False,
|
|
45
|
+
** mlp_params)
|
|
46
|
+
final_dim = mlp_params["dims"][-1]
|
|
47
|
+
if self.model_structure == "parallel":
|
|
48
|
+
self.parallel_dnn = MLP(self.dims,
|
|
49
|
+
output_layer = False,
|
|
50
|
+
** mlp_params)
|
|
51
|
+
final_dim = mlp_params["dims"][-1] + self.dims
|
|
52
|
+
if self.model_structure == "crossnet_only": # only CrossNet
|
|
53
|
+
final_dim = self.dims
|
|
54
|
+
self.linear = LR(final_dim)
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def forward(self, x):
|
|
58
|
+
embed_x = self.embedding(x, self.features, squeeze_dim=True)
|
|
59
|
+
cross_out = self.crossnet(embed_x)
|
|
60
|
+
if self.model_structure == "crossnet_only":
|
|
61
|
+
final_out = cross_out
|
|
62
|
+
elif self.model_structure == "stacked":
|
|
63
|
+
final_out = self.stacked_dnn(cross_out)
|
|
64
|
+
elif self.model_structure == "parallel":
|
|
65
|
+
dnn_out = self.parallel_dnn(embed_x)
|
|
66
|
+
final_out = torch.cat([cross_out, dnn_out], dim=1)
|
|
67
|
+
y_pred = self.linear(final_out)
|
|
68
|
+
y_pred = torch.sigmoid(y_pred.squeeze(1))
|
|
69
|
+
return y_pred
|
|
@@ -0,0 +1,123 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Date: created on 31/07/2022
|
|
3
|
+
References:
|
|
4
|
+
paper: FAT-DeepFFM: Field Attentive Deep Field-aware Factorization Machine
|
|
5
|
+
url: https://arxiv.org/abs/1905.06336
|
|
6
|
+
Authors: Bo Kang, klinux@live.com
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import torch
|
|
10
|
+
import torch.nn as nn
|
|
11
|
+
|
|
12
|
+
from ...basic.layers import CEN, EmbeddingLayer, FFM, MLP
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class DeepFFM(nn.Module):
|
|
16
|
+
"""The DeepFFM model, mentioned on the `webpage
|
|
17
|
+
<https://cs.nju.edu.cn/31/60/c1654a209248/page.htm>` which is the first
|
|
18
|
+
work that introduces FFM model into neural CTR system. It is also described
|
|
19
|
+
in the `FAT-DeepFFM paper <https://arxiv.org/abs/1905.06336>`.
|
|
20
|
+
|
|
21
|
+
Args:
|
|
22
|
+
linear_features (list): the list of `Feature Class`, fed to the linear module.
|
|
23
|
+
cross_features (list): the list of `Feature Class`, fed to the ffm module.
|
|
24
|
+
embed_dim (int): the dimensionality of categorical value embedding.
|
|
25
|
+
mlp_params (dict): the params of the last MLP module, keys include:`{"dims":list, "activation":str, "dropout":float, "output_layer":bool`}
|
|
26
|
+
"""
|
|
27
|
+
def __init__(self, linear_features, cross_features, embed_dim, mlp_params):
|
|
28
|
+
super().__init__()
|
|
29
|
+
self.linear_features = linear_features
|
|
30
|
+
self.cross_features = cross_features
|
|
31
|
+
|
|
32
|
+
self.num_fields = len(cross_features)
|
|
33
|
+
self.num_field_cross = self.num_fields * (self.num_fields - 1) // 2
|
|
34
|
+
|
|
35
|
+
self.ffm = FFM(num_fields=self.num_fields, reduce_sum=False)
|
|
36
|
+
self.mlp_out = MLP(self.num_field_cross * embed_dim, **mlp_params)
|
|
37
|
+
|
|
38
|
+
self.linear_embedding = EmbeddingLayer(linear_features)
|
|
39
|
+
self.ffm_embedding = EmbeddingLayer(cross_features)
|
|
40
|
+
|
|
41
|
+
self.b =torch.nn.Parameter(torch.zeros(1))
|
|
42
|
+
|
|
43
|
+
# This keeping constant value in module on correct device
|
|
44
|
+
# url: https://discuss.pytorch.org/t/keeping-constant-value-in-module-on-correct-device/10129
|
|
45
|
+
fields_offset = torch.arange(0, self.num_fields, dtype=torch.long)
|
|
46
|
+
self.register_buffer('fields_offset', fields_offset)
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def forward(self, x):
|
|
50
|
+
# compute scores from the linear part of the model, where input is the raw features (Eq. 5, FAT-DeepFFM)
|
|
51
|
+
y_linear = self.linear_embedding(x, self.linear_features, squeeze_dim=True).sum(1, keepdim=True) #[batch_size, 1]
|
|
52
|
+
|
|
53
|
+
# gather the embeddings. Each feature value corresponds to multiple embeddings, with multiplicity equal to number of features/fields.
|
|
54
|
+
# output shape [batch_size, num_field, num_field, emb_dim]
|
|
55
|
+
x_ffm = {fea.name: x[fea.name].unsqueeze(1) * self.num_fields + self.fields_offset for fea in self.cross_features}
|
|
56
|
+
input_ffm = self.ffm_embedding(x_ffm, self.cross_features, squeeze_dim=False)
|
|
57
|
+
|
|
58
|
+
# compute second order field-aware feature crossings, output shape [batch_size, num_field_cross, emb_dim]
|
|
59
|
+
em = self.ffm(input_ffm)
|
|
60
|
+
|
|
61
|
+
# compute scores from the ffm part of the model, output shape [batch_size, 1]
|
|
62
|
+
y_ffm = self.mlp_out(em.flatten(start_dim=1))
|
|
63
|
+
|
|
64
|
+
# compute final prediction
|
|
65
|
+
y = y_linear + y_ffm
|
|
66
|
+
return torch.sigmoid(y.squeeze(1) + self.b)
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
class FatDeepFFM(nn.Module):
|
|
70
|
+
"""The FAT-DeepFFM model, mentioned in the `FAT-DeepFFM paper
|
|
71
|
+
<https://arxiv.org/abs/1905.06336>`. It combines DeepFFM with
|
|
72
|
+
Compose-Excitation Network (CENet) field attention mechanism
|
|
73
|
+
to highlight the importance of second-order feature crosses.
|
|
74
|
+
|
|
75
|
+
Args:
|
|
76
|
+
linear_features (list): the list of `Feature Class`, fed to the linear module.
|
|
77
|
+
cross_features (list): the list of `Feature Class`, fed to the ffm module.
|
|
78
|
+
embed_dim (int): the dimensionality of categorical value embedding.
|
|
79
|
+
reduction_ratio (int): the between the dimensions of input layer and hidden layer of the CEN MLP module.
|
|
80
|
+
mlp_params (dict): the params of the last MLP module, keys include:`{"dims":list, "activation":str, "dropout":float, "output_layer":bool`}
|
|
81
|
+
"""
|
|
82
|
+
|
|
83
|
+
def __init__(self, linear_features, cross_features, embed_dim, reduction_ratio, mlp_params):
|
|
84
|
+
super().__init__()
|
|
85
|
+
self.linear_features = linear_features
|
|
86
|
+
self.cross_features = cross_features
|
|
87
|
+
|
|
88
|
+
self.num_fields = len(cross_features)
|
|
89
|
+
self.num_field_cross = self.num_fields * (self.num_fields - 1) // 2
|
|
90
|
+
|
|
91
|
+
self.ffm = FFM(num_fields=self.num_fields, reduce_sum=False)
|
|
92
|
+
self.cen = CEN(embed_dim, self.num_field_cross, reduction_ratio)
|
|
93
|
+
self.mlp_out = MLP(self.num_field_cross * embed_dim, **mlp_params)
|
|
94
|
+
|
|
95
|
+
self.linear_embedding = EmbeddingLayer(linear_features)
|
|
96
|
+
self.ffm_embedding = EmbeddingLayer(cross_features)
|
|
97
|
+
|
|
98
|
+
self.b =torch.nn.Parameter(torch.zeros(1))
|
|
99
|
+
|
|
100
|
+
fields_offset = torch.arange(0, self.num_fields, dtype=torch.long)
|
|
101
|
+
self.register_buffer('fields_offset', fields_offset)
|
|
102
|
+
|
|
103
|
+
def forward(self, x):
|
|
104
|
+
# compute scores from the linear part of the model, where input is the raw features (Eq. 5, FAT-DeepFFM)
|
|
105
|
+
y_linear = self.linear_embedding(x, self.linear_features, squeeze_dim=True).sum(1, keepdim=True) #[batch_size, 1]
|
|
106
|
+
|
|
107
|
+
# gather the embeddings. Each feature value corresponds to multiple embeddings, with multiplicity is equal to the number of features/fields.
|
|
108
|
+
# output shape [batch_size, num_field, num_field, emb_dim]
|
|
109
|
+
x_ffm = {fea.name: x[fea.name].unsqueeze(1) * self.num_fields + self.fields_offset for fea in self.cross_features}
|
|
110
|
+
input_ffm = self.ffm_embedding(x_ffm, self.cross_features, squeeze_dim=False)
|
|
111
|
+
|
|
112
|
+
# compute second order field-aware feature crossings, output shape [batch_size, num_field_cross, emb_dim]
|
|
113
|
+
em = self.ffm(input_ffm)
|
|
114
|
+
|
|
115
|
+
# rescale FFM embeddings with field attention (Eq.10), output shape [batch_size, num_field_cross * emb_dim]
|
|
116
|
+
aem = self.cen(em)
|
|
117
|
+
|
|
118
|
+
# compute scores from the ffm part of the model, output shape [batch_size, 1]
|
|
119
|
+
y_ffm = self.mlp_out(aem)
|
|
120
|
+
|
|
121
|
+
# compute final prediction
|
|
122
|
+
y = y_linear + y_ffm
|
|
123
|
+
return torch.sigmoid(y.squeeze(1) + self.b)
|
|
@@ -1,42 +1,42 @@
|
|
|
1
|
-
"""
|
|
2
|
-
Date: create on 22/04/2022
|
|
3
|
-
References:
|
|
4
|
-
paper: (IJCAI'2017) DeepFM: A Factorization-Machine based Neural Network for CTR Prediction
|
|
5
|
-
url: https://arxiv.org/abs/1703.04247
|
|
6
|
-
Authors: Mincai Lai, laimincai@shanghaitech.edu.cn
|
|
7
|
-
"""
|
|
8
|
-
|
|
9
|
-
import torch
|
|
10
|
-
|
|
11
|
-
from ...basic.layers import FM, MLP, LR, EmbeddingLayer
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
class DeepFM(torch.nn.Module):
|
|
15
|
-
"""Deep Factorization Machine Model
|
|
16
|
-
|
|
17
|
-
Args:
|
|
18
|
-
deep_features (list): the list of `Feature Class`, training by the deep part module.
|
|
19
|
-
fm_features (list): the list of `Feature Class`, training by the fm part module.
|
|
20
|
-
mlp_params (dict): the params of the last MLP module, keys include:`{"dims":list, "activation":str, "dropout":float, "output_layer":bool`}
|
|
21
|
-
"""
|
|
22
|
-
|
|
23
|
-
def __init__(self, deep_features, fm_features, mlp_params):
|
|
24
|
-
super(DeepFM, self).__init__()
|
|
25
|
-
self.deep_features = deep_features
|
|
26
|
-
self.fm_features = fm_features
|
|
27
|
-
self.deep_dims = sum([fea.embed_dim for fea in deep_features])
|
|
28
|
-
self.fm_dims = sum([fea.embed_dim for fea in fm_features])
|
|
29
|
-
self.linear = LR(self.fm_dims) # 1-odrder interaction
|
|
30
|
-
self.fm = FM(reduce_sum=True) # 2-odrder interaction
|
|
31
|
-
self.embedding = EmbeddingLayer(deep_features + fm_features)
|
|
32
|
-
self.mlp = MLP(self.deep_dims, **mlp_params)
|
|
33
|
-
|
|
34
|
-
def forward(self, x):
|
|
35
|
-
input_deep = self.embedding(x, self.deep_features, squeeze_dim=True) #[batch_size, deep_dims]
|
|
36
|
-
input_fm = self.embedding(x, self.fm_features, squeeze_dim=False) #[batch_size, num_fields, embed_dim]
|
|
37
|
-
|
|
38
|
-
y_linear = self.linear(input_fm.flatten(start_dim=1))
|
|
39
|
-
y_fm = self.fm(input_fm)
|
|
40
|
-
y_deep = self.mlp(input_deep) #[batch_size, 1]
|
|
41
|
-
y = y_linear + y_fm + y_deep
|
|
1
|
+
"""
|
|
2
|
+
Date: create on 22/04/2022
|
|
3
|
+
References:
|
|
4
|
+
paper: (IJCAI'2017) DeepFM: A Factorization-Machine based Neural Network for CTR Prediction
|
|
5
|
+
url: https://arxiv.org/abs/1703.04247
|
|
6
|
+
Authors: Mincai Lai, laimincai@shanghaitech.edu.cn
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import torch
|
|
10
|
+
|
|
11
|
+
from ...basic.layers import FM, MLP, LR, EmbeddingLayer
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class DeepFM(torch.nn.Module):
|
|
15
|
+
"""Deep Factorization Machine Model
|
|
16
|
+
|
|
17
|
+
Args:
|
|
18
|
+
deep_features (list): the list of `Feature Class`, training by the deep part module.
|
|
19
|
+
fm_features (list): the list of `Feature Class`, training by the fm part module.
|
|
20
|
+
mlp_params (dict): the params of the last MLP module, keys include:`{"dims":list, "activation":str, "dropout":float, "output_layer":bool`}
|
|
21
|
+
"""
|
|
22
|
+
|
|
23
|
+
def __init__(self, deep_features, fm_features, mlp_params):
|
|
24
|
+
super(DeepFM, self).__init__()
|
|
25
|
+
self.deep_features = deep_features
|
|
26
|
+
self.fm_features = fm_features
|
|
27
|
+
self.deep_dims = sum([fea.embed_dim for fea in deep_features])
|
|
28
|
+
self.fm_dims = sum([fea.embed_dim for fea in fm_features])
|
|
29
|
+
self.linear = LR(self.fm_dims) # 1-odrder interaction
|
|
30
|
+
self.fm = FM(reduce_sum=True) # 2-odrder interaction
|
|
31
|
+
self.embedding = EmbeddingLayer(deep_features + fm_features)
|
|
32
|
+
self.mlp = MLP(self.deep_dims, **mlp_params)
|
|
33
|
+
|
|
34
|
+
def forward(self, x):
|
|
35
|
+
input_deep = self.embedding(x, self.deep_features, squeeze_dim=True) #[batch_size, deep_dims]
|
|
36
|
+
input_fm = self.embedding(x, self.fm_features, squeeze_dim=False) #[batch_size, num_fields, embed_dim]
|
|
37
|
+
|
|
38
|
+
y_linear = self.linear(input_fm.flatten(start_dim=1))
|
|
39
|
+
y_fm = self.fm(input_fm)
|
|
40
|
+
y_deep = self.mlp(input_deep) #[batch_size, 1]
|
|
41
|
+
y = y_linear + y_fm + y_deep
|
|
42
42
|
return torch.sigmoid(y.squeeze(1))
|
|
@@ -0,0 +1,191 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Date: create on 01/05/2024
|
|
3
|
+
References:
|
|
4
|
+
paper: (AAAI'2019) Deep Interest Evolution Network for Click-Through Rate Prediction
|
|
5
|
+
url: https://arxiv.org/pdf/1809.03672
|
|
6
|
+
Authors: Tao Fan, thisisevy@foxmail.com
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import torch
|
|
10
|
+
from torch import nn
|
|
11
|
+
from torch.nn import Parameter, init
|
|
12
|
+
|
|
13
|
+
from ...basic.layers import MLP, EmbeddingLayer
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class AUGRU(nn.Module):
|
|
17
|
+
|
|
18
|
+
def __init__(self, embed_dim):
|
|
19
|
+
super(AUGRU, self).__init__()
|
|
20
|
+
self.embed_dim = embed_dim
|
|
21
|
+
# 初始化AUGRU单元
|
|
22
|
+
self.augru_cell = AUGRU_Cell(self.embed_dim)
|
|
23
|
+
|
|
24
|
+
def forward(self, x, item):
|
|
25
|
+
'''
|
|
26
|
+
:param x: 输入的序列向量,维度为 [ batch_size, seq_lens, embed_dim ]
|
|
27
|
+
:param item: 目标物品的向量
|
|
28
|
+
:return: outs: 所有AUGRU单元输出的隐藏向量[ batch_size, seq_lens, embed_dim ]
|
|
29
|
+
h: 最后一个AUGRU单元输出的隐藏向量[ batch_size, embed_dim ]
|
|
30
|
+
'''
|
|
31
|
+
outs = []
|
|
32
|
+
h = None
|
|
33
|
+
# 开始循环,x.shape[1]是序列的长度
|
|
34
|
+
for i in range(x.shape[1]):
|
|
35
|
+
if h == None:
|
|
36
|
+
# 初始化第一层的输入h
|
|
37
|
+
h = Parameter(torch.rand(x.shape[0], self.embed_dim).to(x.device))
|
|
38
|
+
h = self.augru_cell(x[:, i], h, item)
|
|
39
|
+
outs.append(torch.unsqueeze(h, dim=1))
|
|
40
|
+
outs = torch.cat(outs, dim=1)
|
|
41
|
+
return outs, h
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
# AUGRU单元
|
|
45
|
+
class AUGRU_Cell(nn.Module):
|
|
46
|
+
|
|
47
|
+
def __init__(self, embed_dim):
|
|
48
|
+
"""
|
|
49
|
+
:param embed_dim: 输入向量的维度
|
|
50
|
+
"""
|
|
51
|
+
super(AUGRU_Cell, self).__init__()
|
|
52
|
+
|
|
53
|
+
# 初始化更新门的模型参数
|
|
54
|
+
self.Wu = Parameter(torch.rand(embed_dim, embed_dim))
|
|
55
|
+
self.Uu = Parameter(torch.rand(embed_dim, embed_dim))
|
|
56
|
+
self.bu = init.xavier_uniform_(Parameter(torch.empty(1, embed_dim)))
|
|
57
|
+
|
|
58
|
+
# 初始化重置门的模型参数
|
|
59
|
+
self.Wr = init.xavier_uniform_(Parameter(torch.empty(embed_dim, embed_dim)))
|
|
60
|
+
self.Ur = init.xavier_uniform_(Parameter(torch.empty(embed_dim, embed_dim)))
|
|
61
|
+
self.br = init.xavier_uniform_(Parameter(torch.empty(1, embed_dim)))
|
|
62
|
+
|
|
63
|
+
# 初始化计算h~的模型参数
|
|
64
|
+
self.Wh = init.xavier_uniform_(Parameter(torch.empty(embed_dim, embed_dim)))
|
|
65
|
+
self.Uh = init.xavier_uniform_(Parameter(torch.empty(embed_dim, embed_dim)))
|
|
66
|
+
self.bh = init.xavier_uniform_(Parameter(torch.empty(1, embed_dim)))
|
|
67
|
+
|
|
68
|
+
# 初始化注意计算里的模型参数
|
|
69
|
+
self.Wa = init.xavier_uniform_(Parameter(torch.empty(embed_dim, embed_dim)))
|
|
70
|
+
|
|
71
|
+
# 注意力的计算
|
|
72
|
+
def attention(self, x, item):
|
|
73
|
+
'''
|
|
74
|
+
:param x: 输入的序列中第t个向量 [ batch_size, embed_dim ]
|
|
75
|
+
:param item: 目标物品的向量 [ batch_size, embed_dim ]
|
|
76
|
+
:return: 注意力权重 [ batch_size, 1 ]
|
|
77
|
+
'''
|
|
78
|
+
hW = torch.matmul(x, self.Wa)
|
|
79
|
+
hWi = torch.sum(hW * item, dim=1)
|
|
80
|
+
hWi = torch.unsqueeze(hWi, 1)
|
|
81
|
+
return torch.softmax(hWi, dim=1)
|
|
82
|
+
|
|
83
|
+
def forward(self, x, h_1, item):
|
|
84
|
+
'''
|
|
85
|
+
:param x: 输入的序列中第t个物品向量 [ batch_size, embed_dim ]
|
|
86
|
+
:param h_1: 上一个AUGRU单元输出的隐藏向量 [ batch_size, embed_dim ]
|
|
87
|
+
:param item: 目标物品的向量 [ batch_size, embed_dim ]
|
|
88
|
+
:return: h 当前层输出的隐藏向量 [ batch_size, embed_dim ]
|
|
89
|
+
'''
|
|
90
|
+
# [ batch_size, embed_dim ]
|
|
91
|
+
u = torch.sigmoid(torch.matmul(x, self.Wu) + torch.matmul(h_1, self.Uu) + self.bu)
|
|
92
|
+
# [ batch_size, embed_dim ]
|
|
93
|
+
r = torch.sigmoid(torch.matmul(x, self.Wr) + torch.matmul(h_1, self.Ur) + self.br)
|
|
94
|
+
# [ batch_size, embed_dim ]
|
|
95
|
+
h_hat = torch.tanh(torch.matmul(x, self.Wh) + r * torch.matmul(h_1, self.Uh) + self.bh)
|
|
96
|
+
# [ batch_size, 1 ]
|
|
97
|
+
a = self.attention(x, item)
|
|
98
|
+
# [ batch_size, embed_dim ]
|
|
99
|
+
u_hat = a * u
|
|
100
|
+
# [ batch_size, embed_dim ]
|
|
101
|
+
h = (1 - u_hat) * h_1 + u_hat * h_hat
|
|
102
|
+
# [ batch_size, embed_dim ]
|
|
103
|
+
return h
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
class DIEN(nn.Module):
|
|
107
|
+
"""Deep Interest Evolution Network
|
|
108
|
+
Args:
|
|
109
|
+
features (list): the list of `Feature Class`. training by MLP. It means the user profile features and context features in origin paper, exclude history and target features.
|
|
110
|
+
history_features (list): the list of `Feature Class`,training by ActivationUnit. It means the user behaviour sequence features, eg.item id sequence, shop id sequence.
|
|
111
|
+
target_features (list): the list of `Feature Class`, training by ActivationUnit. It means the target feature which will execute target-attention with history feature.
|
|
112
|
+
mlp_params (dict): the params of the last MLP module, keys include:`{"dims":list, "activation":str, "dropout":float, "output_layer":bool`}
|
|
113
|
+
history_labels (list): the list of history_features whether it is clicked history or not. It should be 0 or 1.
|
|
114
|
+
alpha (float): the weighting of auxiliary loss.
|
|
115
|
+
"""
|
|
116
|
+
|
|
117
|
+
def __init__(self, features, history_features, target_features, mlp_params, history_labels,
|
|
118
|
+
alpha=0.2):
|
|
119
|
+
super().__init__()
|
|
120
|
+
self.alpha = alpha # 计算辅助损失函数时的权重
|
|
121
|
+
self.features = features
|
|
122
|
+
self.history_features = history_features
|
|
123
|
+
self.target_features = target_features
|
|
124
|
+
self.num_history_features = len(history_features)
|
|
125
|
+
self.all_dims = sum([fea.embed_dim for fea in features + history_features + target_features])
|
|
126
|
+
# self.GRU = nn.GRU(batch_first=True)
|
|
127
|
+
self.embedding = EmbeddingLayer(features + history_features + target_features)
|
|
128
|
+
self.interest_extractor_layers = nn.ModuleList(
|
|
129
|
+
[nn.GRU(fea.embed_dim, fea.embed_dim, batch_first=True) for fea in self.history_features])
|
|
130
|
+
self.interest_evolving_layers = nn.ModuleList(
|
|
131
|
+
[AUGRU(fea.embed_dim) for fea in self.history_features])
|
|
132
|
+
|
|
133
|
+
self.mlp = MLP(self.all_dims, activation="dice", **mlp_params)
|
|
134
|
+
self.history_labels = torch.Tensor(history_labels)
|
|
135
|
+
self.BCELoss = nn.BCELoss()
|
|
136
|
+
# # 注意力计算中的线性层
|
|
137
|
+
# self.attention_liner = nn.Linear(self.embed_dim, t)
|
|
138
|
+
# # AFM公式中的h
|
|
139
|
+
# self.h = init.xavier_uniform_(Parameter(torch.empty(t, 1)))
|
|
140
|
+
# # AFM公式中的p
|
|
141
|
+
# self.p = init.xavier_uniform_(Parameter(torch.empty(self.embed_dim, 1)))
|
|
142
|
+
|
|
143
|
+
def auxiliary(self, outs, history_features, history_labels):
|
|
144
|
+
'''
|
|
145
|
+
:param history_features: 历史序列物品的向量 [ batch_size, len_seqs, dim ]
|
|
146
|
+
:param outs: 兴趣抽取层GRU网络输出的outs [ batch_size, len_seqs, dim ]
|
|
147
|
+
:param history_labels: 历史序列物品标注 [ batch_size, len_seqs, 1 ]
|
|
148
|
+
:return: 辅助损失函数
|
|
149
|
+
'''
|
|
150
|
+
# [ batch_size * len_seqs, dim ]
|
|
151
|
+
history_features = history_features.reshape(-1, history_features.shape[2])
|
|
152
|
+
# [ batch_size * len_seqs, dim ]
|
|
153
|
+
outs = outs.reshape(-1, outs.shape[2])
|
|
154
|
+
# [ batch_size * len_seqs ]
|
|
155
|
+
out = torch.sum(outs * history_features, dim=1)
|
|
156
|
+
# [ batch_size * len_seqs, 1 ]
|
|
157
|
+
out = torch.unsqueeze(torch.sigmoid(out), 1)
|
|
158
|
+
# [ batch_size * len_seqs,1 ]
|
|
159
|
+
history_labels = history_labels.reshape(-1, 1).float()
|
|
160
|
+
return self.BCELoss(out, history_labels)
|
|
161
|
+
|
|
162
|
+
def forward(self, x):
|
|
163
|
+
embed_x_features = self.embedding(x, self.features) # (batch_size, num_features, emb_dim)
|
|
164
|
+
embed_x_history = self.embedding(
|
|
165
|
+
x, self.history_features) # (batch_size, num_history_features, seq_length, emb_dim)
|
|
166
|
+
embed_x_target = self.embedding(x, self.target_features) # (batch_size, num_target_features, emb_dim)
|
|
167
|
+
|
|
168
|
+
interest_extractor = []
|
|
169
|
+
auxi_loss = 0
|
|
170
|
+
for i in range(self.num_history_features):
|
|
171
|
+
outs, _ = self.interest_extractor_layers[i](embed_x_history[:, i, :, :])
|
|
172
|
+
# 利用GRU输出的outs得到辅助损失函数
|
|
173
|
+
auxi_loss += self.auxiliary(outs, embed_x_history[:, i, :, :], self.history_labels)
|
|
174
|
+
interest_extractor.append(outs.unsqueeze(1)) # (batch_size, 1, seq_length, emb_dim)
|
|
175
|
+
interest_extractor = torch.cat(interest_extractor,
|
|
176
|
+
dim=1) # (batch_size, num_history_features, seq_length, emb_dim)
|
|
177
|
+
interest_evolving = []
|
|
178
|
+
for i in range(self.num_history_features):
|
|
179
|
+
_, h = self.interest_evolving_layers[i](interest_extractor[:, i, :, :], embed_x_target[:, i, :])
|
|
180
|
+
interest_evolving.append(h.unsqueeze(1)) # (batch_size, 1, emb_dim)
|
|
181
|
+
interest_evolving = torch.cat(interest_evolving, dim=1) # (batch_size, num_history_features, emb_dim)
|
|
182
|
+
|
|
183
|
+
mlp_in = torch.cat([
|
|
184
|
+
interest_evolving.flatten(start_dim=1),
|
|
185
|
+
embed_x_target.flatten(start_dim=1),
|
|
186
|
+
embed_x_features.flatten(start_dim=1)
|
|
187
|
+
],
|
|
188
|
+
dim=1) # (batch_size, N)
|
|
189
|
+
y = self.mlp(mlp_in)
|
|
190
|
+
|
|
191
|
+
return torch.sigmoid(y.squeeze(1)), self.alpha * auxi_loss
|
|
@@ -1,81 +1,91 @@
|
|
|
1
|
-
"""
|
|
2
|
-
Date: create on 23/04/2022, update on 30/04/2022
|
|
3
|
-
References:
|
|
4
|
-
paper: (KDD'2018) Deep Interest Network for Click-Through Rate Prediction
|
|
5
|
-
url: https://arxiv.org/abs/1706.06978
|
|
6
|
-
code: https://github.com/huawei-noah/benchmark/blob/main/FuxiCTR/fuxictr/pytorch/models/DIN.py
|
|
7
|
-
Authors: Mincai Lai, laimincai@shanghaitech.edu.cn
|
|
8
|
-
"""
|
|
9
|
-
|
|
10
|
-
import torch
|
|
11
|
-
import torch.nn as nn
|
|
12
|
-
|
|
13
|
-
from ...basic.layers import EmbeddingLayer, MLP
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
class DIN(nn.Module):
|
|
17
|
-
"""Deep Interest Network
|
|
18
|
-
Args:
|
|
19
|
-
features (list): the list of `Feature Class`. training by MLP. It means the user profile features and context features in origin paper, exclude history and target features.
|
|
20
|
-
history_features (list): the list of `Feature Class`,training by ActivationUnit. It means the user behaviour sequence features, eg.item id sequence, shop id sequence.
|
|
21
|
-
target_features (list): the list of `Feature Class`, training by ActivationUnit. It means the target feature which will execute target-attention with history feature.
|
|
22
|
-
mlp_params (dict): the params of the last MLP module, keys include:`{"dims":list, "activation":str, "dropout":float, "output_layer":bool`}
|
|
23
|
-
attention_mlp_params (dict): the params of the ActivationUnit module, keys include:`{"dims":list, "activation":str, "dropout":float, "use_softmax":bool`}
|
|
24
|
-
"""
|
|
25
|
-
|
|
26
|
-
def __init__(self, features, history_features, target_features, mlp_params
|
|
27
|
-
super().__init__()
|
|
28
|
-
self.features = features
|
|
29
|
-
self.history_features = history_features
|
|
30
|
-
self.target_features = target_features
|
|
31
|
-
self.num_history_features = len(history_features)
|
|
32
|
-
self.all_dims = sum([fea.embed_dim for fea in features + history_features + target_features])
|
|
33
|
-
|
|
34
|
-
self.embedding = EmbeddingLayer(features + history_features + target_features)
|
|
35
|
-
self.attention_layers = nn.ModuleList(
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
1
|
+
"""
|
|
2
|
+
Date: create on 23/04/2022, update on 30/04/2022
|
|
3
|
+
References:
|
|
4
|
+
paper: (KDD'2018) Deep Interest Network for Click-Through Rate Prediction
|
|
5
|
+
url: https://arxiv.org/abs/1706.06978
|
|
6
|
+
code: https://github.com/huawei-noah/benchmark/blob/main/FuxiCTR/fuxictr/pytorch/models/DIN.py
|
|
7
|
+
Authors: Mincai Lai, laimincai@shanghaitech.edu.cn
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
import torch
|
|
11
|
+
import torch.nn as nn
|
|
12
|
+
|
|
13
|
+
from ...basic.layers import EmbeddingLayer, MLP
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class DIN(nn.Module):
|
|
17
|
+
"""Deep Interest Network
|
|
18
|
+
Args:
|
|
19
|
+
features (list): the list of `Feature Class`. training by MLP. It means the user profile features and context features in origin paper, exclude history and target features.
|
|
20
|
+
history_features (list): the list of `Feature Class`,training by ActivationUnit. It means the user behaviour sequence features, eg.item id sequence, shop id sequence.
|
|
21
|
+
target_features (list): the list of `Feature Class`, training by ActivationUnit. It means the target feature which will execute target-attention with history feature.
|
|
22
|
+
mlp_params (dict): the params of the last MLP module, keys include:`{"dims":list, "activation":str, "dropout":float, "output_layer":bool`}
|
|
23
|
+
attention_mlp_params (dict): the params of the ActivationUnit module, keys include:`{"dims":list, "activation":str, "dropout":float, "use_softmax":bool`}
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
def __init__(self, features, history_features, target_features, mlp_params, attention_mlp_params):
|
|
27
|
+
super().__init__()
|
|
28
|
+
self.features = features
|
|
29
|
+
self.history_features = history_features
|
|
30
|
+
self.target_features = target_features
|
|
31
|
+
self.num_history_features = len(history_features)
|
|
32
|
+
self.all_dims = sum([fea.embed_dim for fea in features + history_features + target_features])
|
|
33
|
+
|
|
34
|
+
self.embedding = EmbeddingLayer(features + history_features + target_features)
|
|
35
|
+
self.attention_layers = nn.ModuleList(
|
|
36
|
+
[ActivationUnit(fea.embed_dim, **attention_mlp_params) for fea in self.history_features])
|
|
37
|
+
self.mlp = MLP(self.all_dims, activation="dice", **mlp_params)
|
|
38
|
+
|
|
39
|
+
def forward(self, x):
|
|
40
|
+
embed_x_features = self.embedding(x, self.features) #(batch_size, num_features, emb_dim)
|
|
41
|
+
embed_x_history = self.embedding(
|
|
42
|
+
x, self.history_features) #(batch_size, num_history_features, seq_length, emb_dim)
|
|
43
|
+
embed_x_target = self.embedding(x, self.target_features) #(batch_size, num_target_features, emb_dim)
|
|
44
|
+
attention_pooling = []
|
|
45
|
+
for i in range(self.num_history_features):
|
|
46
|
+
attention_seq = self.attention_layers[i](embed_x_history[:, i, :, :], embed_x_target[:, i, :])
|
|
47
|
+
attention_pooling.append(attention_seq.unsqueeze(1)) #(batch_size, 1, emb_dim)
|
|
48
|
+
attention_pooling = torch.cat(attention_pooling, dim=1) #(batch_size, num_history_features, emb_dim)
|
|
49
|
+
|
|
50
|
+
mlp_in = torch.cat([
|
|
51
|
+
attention_pooling.flatten(start_dim=1),
|
|
52
|
+
embed_x_target.flatten(start_dim=1),
|
|
53
|
+
embed_x_features.flatten(start_dim=1)
|
|
54
|
+
],
|
|
55
|
+
dim=1) #(batch_size, N)
|
|
56
|
+
|
|
57
|
+
y = self.mlp(mlp_in)
|
|
58
|
+
return torch.sigmoid(y.squeeze(1))
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
class ActivationUnit(nn.Module):
|
|
62
|
+
"""Activation Unit Layer mentioned in DIN paper, it is a Target Attention method.
|
|
63
|
+
|
|
64
|
+
Args:
|
|
65
|
+
embed_dim (int): the length of embedding vector.
|
|
66
|
+
history (tensor):
|
|
67
|
+
Shape:
|
|
68
|
+
- Input: `(batch_size, seq_length, emb_dim)`
|
|
69
|
+
- Output: `(batch_size, emb_dim)`
|
|
70
|
+
"""
|
|
71
|
+
|
|
72
|
+
def __init__(self, emb_dim, dims=None, activation="dice", use_softmax=False):
|
|
73
|
+
super(ActivationUnit, self).__init__()
|
|
74
|
+
if dims is None:
|
|
75
|
+
dims = [36]
|
|
76
|
+
self.emb_dim = emb_dim
|
|
77
|
+
self.use_softmax = use_softmax
|
|
78
|
+
self.attention = MLP(4 * self.emb_dim, dims=dims, activation=activation)
|
|
79
|
+
|
|
80
|
+
def forward(self, history, target):
|
|
81
|
+
seq_length = history.size(1)
|
|
82
|
+
target = target.unsqueeze(1).expand(-1, seq_length, -1) #batch_size,seq_length,emb_dim
|
|
83
|
+
att_input = torch.cat([target, history, target - history, target * history],
|
|
84
|
+
dim=-1) # batch_size,seq_length,4*emb_dim
|
|
85
|
+
att_weight = self.attention(att_input.view(-1, 4 * self.emb_dim)) # #(batch_size*seq_length,4*emb_dim)
|
|
86
|
+
att_weight = att_weight.view(-1, seq_length) #(batch_size*seq_length, 1) -> (batch_size,seq_length)
|
|
87
|
+
if self.use_softmax:
|
|
88
|
+
att_weight = att_weight.softmax(dim=-1)
|
|
89
|
+
# (batch_size, seq_length, 1) * (batch_size, seq_length, emb_dim)
|
|
90
|
+
output = (att_weight.unsqueeze(-1) * history).sum(dim=1) #(batch_size,emb_dim)
|
|
91
|
+
return output
|