torch-rechub 0.0.1__py3-none-any.whl → 0.0.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- torch_rechub/__init__.py +14 -0
- torch_rechub/basic/activation.py +3 -1
- torch_rechub/basic/callback.py +2 -2
- torch_rechub/basic/features.py +38 -8
- torch_rechub/basic/initializers.py +92 -0
- torch_rechub/basic/layers.py +800 -46
- torch_rechub/basic/loss_func.py +223 -0
- torch_rechub/basic/metaoptimizer.py +76 -0
- torch_rechub/basic/metric.py +251 -0
- torch_rechub/models/generative/__init__.py +6 -0
- torch_rechub/models/generative/hllm.py +249 -0
- torch_rechub/models/generative/hstu.py +189 -0
- torch_rechub/models/matching/__init__.py +13 -0
- torch_rechub/models/matching/comirec.py +193 -0
- torch_rechub/models/matching/dssm.py +72 -0
- torch_rechub/models/matching/dssm_facebook.py +77 -0
- torch_rechub/models/matching/dssm_senet.py +87 -0
- torch_rechub/models/matching/gru4rec.py +85 -0
- torch_rechub/models/matching/mind.py +103 -0
- torch_rechub/models/matching/narm.py +82 -0
- torch_rechub/models/matching/sasrec.py +143 -0
- torch_rechub/models/matching/sine.py +148 -0
- torch_rechub/models/matching/stamp.py +81 -0
- torch_rechub/models/matching/youtube_dnn.py +75 -0
- torch_rechub/models/matching/youtube_sbc.py +98 -0
- torch_rechub/models/multi_task/__init__.py +5 -2
- torch_rechub/models/multi_task/aitm.py +83 -0
- torch_rechub/models/multi_task/esmm.py +19 -8
- torch_rechub/models/multi_task/mmoe.py +18 -12
- torch_rechub/models/multi_task/ple.py +41 -29
- torch_rechub/models/multi_task/shared_bottom.py +3 -2
- torch_rechub/models/ranking/__init__.py +13 -2
- torch_rechub/models/ranking/afm.py +65 -0
- torch_rechub/models/ranking/autoint.py +102 -0
- torch_rechub/models/ranking/bst.py +61 -0
- torch_rechub/models/ranking/dcn.py +38 -0
- torch_rechub/models/ranking/dcn_v2.py +59 -0
- torch_rechub/models/ranking/deepffm.py +131 -0
- torch_rechub/models/ranking/deepfm.py +8 -7
- torch_rechub/models/ranking/dien.py +191 -0
- torch_rechub/models/ranking/din.py +31 -19
- torch_rechub/models/ranking/edcn.py +101 -0
- torch_rechub/models/ranking/fibinet.py +42 -0
- torch_rechub/models/ranking/widedeep.py +6 -6
- torch_rechub/trainers/__init__.py +4 -2
- torch_rechub/trainers/ctr_trainer.py +191 -0
- torch_rechub/trainers/match_trainer.py +239 -0
- torch_rechub/trainers/matching.md +3 -0
- torch_rechub/trainers/mtl_trainer.py +137 -23
- torch_rechub/trainers/seq_trainer.py +293 -0
- torch_rechub/utils/__init__.py +0 -0
- torch_rechub/utils/data.py +492 -0
- torch_rechub/utils/hstu_utils.py +198 -0
- torch_rechub/utils/match.py +457 -0
- torch_rechub/utils/mtl.py +136 -0
- torch_rechub/utils/onnx_export.py +353 -0
- torch_rechub-0.0.4.dist-info/METADATA +391 -0
- torch_rechub-0.0.4.dist-info/RECORD +62 -0
- {torch_rechub-0.0.1.dist-info → torch_rechub-0.0.4.dist-info}/WHEEL +1 -2
- {torch_rechub-0.0.1.dist-info → torch_rechub-0.0.4.dist-info/licenses}/LICENSE +1 -1
- torch_rechub/basic/utils.py +0 -168
- torch_rechub/trainers/trainer.py +0 -111
- torch_rechub-0.0.1.dist-info/METADATA +0 -105
- torch_rechub-0.0.1.dist-info/RECORD +0 -26
- torch_rechub-0.0.1.dist-info/top_level.txt +0 -1
torch_rechub/__init__.py
CHANGED
torch_rechub/basic/activation.py
CHANGED
|
@@ -30,7 +30,7 @@ def activation_layer(act_name):
|
|
|
30
30
|
|
|
31
31
|
Args:
|
|
32
32
|
act_name: str or nn.Module, name of activation function
|
|
33
|
-
|
|
33
|
+
|
|
34
34
|
Returns:
|
|
35
35
|
act_layer: activation layer
|
|
36
36
|
"""
|
|
@@ -45,6 +45,8 @@ def activation_layer(act_name):
|
|
|
45
45
|
act_layer = nn.PReLU()
|
|
46
46
|
elif act_name.lower() == "softmax":
|
|
47
47
|
act_layer = nn.Softmax(dim=1)
|
|
48
|
+
elif act_name.lower() == 'leakyrelu':
|
|
49
|
+
act_layer = nn.LeakyReLU()
|
|
48
50
|
elif issubclass(act_name, nn.Module):
|
|
49
51
|
act_layer = act_name()
|
|
50
52
|
else:
|
torch_rechub/basic/callback.py
CHANGED
|
@@ -3,7 +3,7 @@ import copy
|
|
|
3
3
|
|
|
4
4
|
class EarlyStopper(object):
|
|
5
5
|
"""Early stops the training if validation loss doesn't improve after a given patience.
|
|
6
|
-
|
|
6
|
+
|
|
7
7
|
Args:
|
|
8
8
|
patience (int): How long to wait after last time validation auc improved.
|
|
9
9
|
"""
|
|
@@ -30,4 +30,4 @@ class EarlyStopper(object):
|
|
|
30
30
|
self.trial_counter += 1
|
|
31
31
|
return False
|
|
32
32
|
else:
|
|
33
|
-
return True
|
|
33
|
+
return True
|
torch_rechub/basic/features.py
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
|
-
from .
|
|
1
|
+
from ..utils.data import get_auto_embedding_dim
|
|
2
|
+
from .initializers import RandomNormal
|
|
2
3
|
|
|
3
4
|
|
|
4
5
|
class SequenceFeature(object):
|
|
@@ -13,17 +14,29 @@ class SequenceFeature(object):
|
|
|
13
14
|
embed_dim (int): embedding vector's length
|
|
14
15
|
pooling (str): pooling method, support `["mean", "sum", "concat"]` (default=`"mean"`)
|
|
15
16
|
shared_with (str): the another feature name which this feature will shared with embedding.
|
|
17
|
+
padding_idx (int, optional): If specified, the entries at padding_idx will be masked 0 in InputMask Layer.
|
|
18
|
+
initializer(Initializer): Initializer the embedding layer weight.
|
|
16
19
|
"""
|
|
17
20
|
|
|
18
|
-
def __init__(self, name, vocab_size, embed_dim=None, pooling="mean", shared_with=None):
|
|
21
|
+
def __init__(self, name, vocab_size, embed_dim=None, pooling="mean", shared_with=None, padding_idx=None, initializer=RandomNormal(0, 0.0001)):
|
|
19
22
|
self.name = name
|
|
20
23
|
self.vocab_size = vocab_size
|
|
21
|
-
if embed_dim
|
|
24
|
+
if embed_dim is None:
|
|
22
25
|
self.embed_dim = get_auto_embedding_dim(vocab_size)
|
|
23
26
|
else:
|
|
24
27
|
self.embed_dim = embed_dim
|
|
25
28
|
self.pooling = pooling
|
|
26
29
|
self.shared_with = shared_with
|
|
30
|
+
self.padding_idx = padding_idx
|
|
31
|
+
self.initializer = initializer
|
|
32
|
+
|
|
33
|
+
def __repr__(self):
|
|
34
|
+
return f'<SequenceFeature {self.name} with Embedding shape ({self.vocab_size}, {self.embed_dim})>'
|
|
35
|
+
|
|
36
|
+
def get_embedding_layer(self):
|
|
37
|
+
if not hasattr(self, 'embed'):
|
|
38
|
+
self.embed = self.initializer(self.vocab_size, self.embed_dim)
|
|
39
|
+
return self.embed
|
|
27
40
|
|
|
28
41
|
|
|
29
42
|
class SparseFeature(object):
|
|
@@ -33,15 +46,29 @@ class SparseFeature(object):
|
|
|
33
46
|
name (str): feature's name.
|
|
34
47
|
vocab_size (int): vocabulary size of embedding table.
|
|
35
48
|
embed_dim (int): embedding vector's length
|
|
49
|
+
shared_with (str): the another feature name which this feature will shared with embedding.
|
|
50
|
+
padding_idx (int, optional): If specified, the entries at padding_idx will be masked 0 in InputMask Layer.
|
|
51
|
+
initializer(Initializer): Initializer the embedding layer weight.
|
|
36
52
|
"""
|
|
37
53
|
|
|
38
|
-
def __init__(self, name, vocab_size, embed_dim=None):
|
|
54
|
+
def __init__(self, name, vocab_size, embed_dim=None, shared_with=None, padding_idx=None, initializer=RandomNormal(0, 0.0001)):
|
|
39
55
|
self.name = name
|
|
40
56
|
self.vocab_size = vocab_size
|
|
41
|
-
if embed_dim
|
|
57
|
+
if embed_dim is None:
|
|
42
58
|
self.embed_dim = get_auto_embedding_dim(vocab_size)
|
|
43
59
|
else:
|
|
44
60
|
self.embed_dim = embed_dim
|
|
61
|
+
self.shared_with = shared_with
|
|
62
|
+
self.padding_idx = padding_idx
|
|
63
|
+
self.initializer = initializer
|
|
64
|
+
|
|
65
|
+
def __repr__(self):
|
|
66
|
+
return f'<SparseFeature {self.name} with Embedding shape ({self.vocab_size}, {self.embed_dim})>'
|
|
67
|
+
|
|
68
|
+
def get_embedding_layer(self):
|
|
69
|
+
if not hasattr(self, 'embed'):
|
|
70
|
+
self.embed = self.initializer(self.vocab_size, self.embed_dim)
|
|
71
|
+
return self.embed
|
|
45
72
|
|
|
46
73
|
|
|
47
74
|
class DenseFeature(object):
|
|
@@ -49,9 +76,12 @@ class DenseFeature(object):
|
|
|
49
76
|
|
|
50
77
|
Args:
|
|
51
78
|
name (str): feature's name.
|
|
52
|
-
embed_dim (int): embedding vector's length, the value fixed `1`.
|
|
79
|
+
embed_dim (int): embedding vector's length, the value fixed `1`. If you put a vector (torch.tensor) , replace the embed_dim with your vector dimension.
|
|
53
80
|
"""
|
|
54
81
|
|
|
55
|
-
def __init__(self, name):
|
|
82
|
+
def __init__(self, name, embed_dim=1):
|
|
56
83
|
self.name = name
|
|
57
|
-
self.embed_dim =
|
|
84
|
+
self.embed_dim = embed_dim
|
|
85
|
+
|
|
86
|
+
def __repr__(self):
|
|
87
|
+
return f'<DenseFeature {self.name}>'
|
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
import torch
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class RandomNormal(object):
|
|
5
|
+
"""Returns an embedding initialized with a normal distribution.
|
|
6
|
+
|
|
7
|
+
Args:
|
|
8
|
+
mean (float): the mean of the normal distribution
|
|
9
|
+
std (float): the standard deviation of the normal distribution
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
def __init__(self, mean=0.0, std=1.0):
|
|
13
|
+
self.mean = mean
|
|
14
|
+
self.std = std
|
|
15
|
+
|
|
16
|
+
def __call__(self, vocab_size, embed_dim):
|
|
17
|
+
embed = torch.nn.Embedding(vocab_size, embed_dim)
|
|
18
|
+
torch.nn.init.normal_(embed.weight, self.mean, self.std)
|
|
19
|
+
return embed
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
class RandomUniform(object):
|
|
23
|
+
"""Returns an embedding initialized with a uniform distribution.
|
|
24
|
+
|
|
25
|
+
Args:
|
|
26
|
+
minval (float): Lower bound of the range of random values of the uniform distribution.
|
|
27
|
+
maxval (float): Upper bound of the range of random values of the uniform distribution.
|
|
28
|
+
"""
|
|
29
|
+
|
|
30
|
+
def __init__(self, minval=0.0, maxval=1.0):
|
|
31
|
+
self.minval = minval
|
|
32
|
+
self.maxval = maxval
|
|
33
|
+
|
|
34
|
+
def __call__(self, vocab_size, embed_dim):
|
|
35
|
+
embed = torch.nn.Embedding(vocab_size, embed_dim)
|
|
36
|
+
torch.nn.init.uniform_(embed.weight, self.minval, self.maxval)
|
|
37
|
+
return embed
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
class XavierNormal(object):
|
|
41
|
+
"""Returns an embedding initialized with the method described in
|
|
42
|
+
`Understanding the difficulty of training deep feedforward neural networks`
|
|
43
|
+
- Glorot, X. & Bengio, Y. (2010), using a uniform distribution.
|
|
44
|
+
|
|
45
|
+
Args:
|
|
46
|
+
gain (float): stddev = gain*sqrt(2 / (fan_in + fan_out))
|
|
47
|
+
"""
|
|
48
|
+
|
|
49
|
+
def __init__(self, gain=1.0):
|
|
50
|
+
self.gain = gain
|
|
51
|
+
|
|
52
|
+
def __call__(self, vocab_size, embed_dim):
|
|
53
|
+
embed = torch.nn.Embedding(vocab_size, embed_dim)
|
|
54
|
+
torch.nn.init.xavier_normal_(embed.weight, self.gain)
|
|
55
|
+
return embed
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
class XavierUniform(object):
|
|
59
|
+
"""Returns an embedding initialized with the method described in
|
|
60
|
+
`Understanding the difficulty of training deep feedforward neural networks`
|
|
61
|
+
- Glorot, X. & Bengio, Y. (2010), using a uniform distribution.
|
|
62
|
+
|
|
63
|
+
Args:
|
|
64
|
+
gain (float): stddev = gain*sqrt(6 / (fan_in + fan_out))
|
|
65
|
+
"""
|
|
66
|
+
|
|
67
|
+
def __init__(self, gain=1.0):
|
|
68
|
+
self.gain = gain
|
|
69
|
+
|
|
70
|
+
def __call__(self, vocab_size, embed_dim):
|
|
71
|
+
embed = torch.nn.Embedding(vocab_size, embed_dim)
|
|
72
|
+
torch.nn.init.xavier_uniform_(embed.weight, self.gain)
|
|
73
|
+
return embed
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
class Pretrained(object):
|
|
77
|
+
"""Creates Embedding instance from given 2-dimensional FloatTensor.
|
|
78
|
+
|
|
79
|
+
Args:
|
|
80
|
+
embedding_weight(Tensor or ndarray or List[List[int]]): FloatTensor containing weights for the Embedding.
|
|
81
|
+
First dimension is being passed to Embedding as ``num_embeddings``, second as ``embedding_dim``.
|
|
82
|
+
freeze (boolean, optional): If ``True``, the tensor does not get updated in the learning process.
|
|
83
|
+
"""
|
|
84
|
+
|
|
85
|
+
def __init__(self, embedding_weight, freeze=True):
|
|
86
|
+
self.embedding_weight = torch.FloatTensor(embedding_weight)
|
|
87
|
+
self.freeze = freeze
|
|
88
|
+
|
|
89
|
+
def __call__(self, vocab_size, embed_dim):
|
|
90
|
+
assert vocab_size == self.embedding_weight.shape[0] and embed_dim == self.embedding_weight.shape[1]
|
|
91
|
+
embed = torch.nn.Embedding.from_pretrained(self.embedding_weight, freeze=self.freeze)
|
|
92
|
+
return embed
|