unike 3.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. unike/__init__.py +5 -0
  2. unike/config/HPOTrainer.py +305 -0
  3. unike/config/Tester.py +385 -0
  4. unike/config/Trainer.py +519 -0
  5. unike/config/TrainerAccelerator.py +39 -0
  6. unike/config/__init__.py +37 -0
  7. unike/data/BernSampler.py +168 -0
  8. unike/data/CompGCNSampler.py +140 -0
  9. unike/data/CompGCNTestSampler.py +84 -0
  10. unike/data/KGEDataLoader.py +315 -0
  11. unike/data/KGReader.py +138 -0
  12. unike/data/RGCNSampler.py +261 -0
  13. unike/data/RGCNTestSampler.py +208 -0
  14. unike/data/RevSampler.py +78 -0
  15. unike/data/TestSampler.py +189 -0
  16. unike/data/TradSampler.py +122 -0
  17. unike/data/TradTestSampler.py +87 -0
  18. unike/data/UniSampler.py +145 -0
  19. unike/data/__init__.py +47 -0
  20. unike/module/BaseModule.py +130 -0
  21. unike/module/__init__.py +20 -0
  22. unike/module/loss/CompGCNLoss.py +96 -0
  23. unike/module/loss/Loss.py +26 -0
  24. unike/module/loss/MarginLoss.py +148 -0
  25. unike/module/loss/RGCNLoss.py +117 -0
  26. unike/module/loss/SigmoidLoss.py +145 -0
  27. unike/module/loss/SoftplusLoss.py +145 -0
  28. unike/module/loss/__init__.py +35 -0
  29. unike/module/model/Analogy.py +237 -0
  30. unike/module/model/CompGCN.py +562 -0
  31. unike/module/model/ComplEx.py +235 -0
  32. unike/module/model/DistMult.py +276 -0
  33. unike/module/model/HolE.py +308 -0
  34. unike/module/model/Model.py +107 -0
  35. unike/module/model/RESCAL.py +309 -0
  36. unike/module/model/RGCN.py +304 -0
  37. unike/module/model/RotatE.py +303 -0
  38. unike/module/model/SimplE.py +237 -0
  39. unike/module/model/TransD.py +458 -0
  40. unike/module/model/TransE.py +290 -0
  41. unike/module/model/TransH.py +322 -0
  42. unike/module/model/TransR.py +402 -0
  43. unike/module/model/__init__.py +60 -0
  44. unike/module/strategy/CompGCNSampling.py +140 -0
  45. unike/module/strategy/NegativeSampling.py +138 -0
  46. unike/module/strategy/RGCNSampling.py +134 -0
  47. unike/module/strategy/Strategy.py +26 -0
  48. unike/module/strategy/__init__.py +29 -0
  49. unike/utils/EarlyStopping.py +94 -0
  50. unike/utils/Timer.py +74 -0
  51. unike/utils/WandbLogger.py +46 -0
  52. unike/utils/__init__.py +26 -0
  53. unike/utils/tools.py +118 -0
  54. unike/version.py +1 -0
  55. unike-3.0.1.dist-info/METADATA +101 -0
  56. unike-3.0.1.dist-info/RECORD +59 -0
  57. unike-3.0.1.dist-info/WHEEL +4 -0
  58. unike-3.0.1.dist-info/entry_points.txt +2 -0
  59. unike-3.0.1.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,145 @@
1
+ # coding:utf-8
2
+ #
3
+ # unike/module/loss/SoftplusLoss.py
4
+ #
5
+ # git pull from OpenKE-PyTorch by LuYF-Lemon-love <luyanfeng_nlp@qq.com> on May 7, 2023
6
+ # updated by LuYF-Lemon-love <luyanfeng_nlp@qq.com> on Jan 6, 2023
7
+ #
8
+ # 该脚本定义了 regularized logistic loss 损失函数.
9
+
10
+ """
11
+ SoftplusLoss - 损失函数类,DistMult 经常应用这种损失函数完成模型学习。
12
+ """
13
+
14
+ import torch
15
+ import numpy as np
16
+ import torch.nn as nn
17
+ from typing import Any
18
+ import torch.nn.functional as F
19
+ from .Loss import Loss
20
+
21
+ class SoftplusLoss(Loss):
22
+
23
+ """
24
+ ``ComplEx`` :cite:`ComplEx` 原论文中应用这种损失函数完成模型训练。
25
+
26
+ .. Note:: :py:meth:`forward` 中的正样本评分函数的得分应大于负样本评分函数的得分。
27
+
28
+ 例子::
29
+
30
+ from unike.module.loss import SoftplusLoss
31
+ from unike.module.strategy import NegativeSampling
32
+
33
+ # define the loss function
34
+ model = NegativeSampling(
35
+ model = distmult,
36
+ loss = SoftplusLoss(),
37
+ batch_size = train_dataloader.get_batch_size(),
38
+ regul_rate = 1.0
39
+ )
40
+ """
41
+
42
+ def __init__(
43
+ self,
44
+ adv_temperature: float | None = None):
45
+
46
+ """创建 SoftplusLoss 对象。
47
+
48
+ :param adv_temperature: RotatE 提出的自我对抗负采样中的温度。
49
+ :type adv_temperature: float
50
+ """
51
+
52
+ super(SoftplusLoss, self).__init__()
53
+ #: 用于代替逻辑函数,类型为 :py:class:`torch.nn.Softplus`,它是 ReLU 函数的平滑近似。
54
+ self.criterion: torch.nn.Softplus = nn.Softplus()
55
+ if adv_temperature != None:
56
+ #: RotatE 提出的自我对抗负采样中的温度。
57
+ self.adv_temperature: torch.nn.parameter.Parameter = nn.Parameter(torch.Tensor([adv_temperature]))
58
+ self.adv_temperature.requires_grad = False
59
+ #: 是否启用 RotatE 提出的自我对抗负采样。
60
+ self.adv_flag: bool = True
61
+ else:
62
+ self.adv_flag: bool = False
63
+
64
+ def get_weights(
65
+ self,
66
+ n_score: torch.Tensor) -> torch.Tensor:
67
+
68
+ """计算 RotatE 提出的自我对抗负采样中的负样本的分布概率。
69
+
70
+ :param n_score: 负样本评分函数的得分。
71
+ :type n_score: torch.Tensor
72
+ :returns: 自我对抗负采样中的负样本的分布概率
73
+ :rtype: torch.Tensor
74
+ """
75
+
76
+ return F.softmax(n_score * self.adv_temperature, dim = -1).detach()
77
+
78
+ def forward(
79
+ self,
80
+ p_score: torch.Tensor,
81
+ n_score: torch.Tensor) -> torch.Tensor:
82
+
83
+ """计算 SoftplusLoss 损失函数。定义每次调用时执行的计算。
84
+ :py:class:`torch.nn.Module` 子类必须重写 :py:meth:`torch.nn.Module.forward`。
85
+
86
+ :param p_score: 正样本评分函数的得分。
87
+ :type p_score: torch.Tensor
88
+ :param n_score: 负样本评分函数的得分。
89
+ :type n_score: torch.Tensor
90
+ :returns: 损失值
91
+ :rtype: torch.Tensor
92
+ """
93
+
94
+ if self.adv_flag:
95
+ return (self.criterion(-p_score).mean() + (self.get_weights(n_score) * self.criterion(n_score)).sum(dim = -1).mean()) / 2
96
+ else:
97
+ return (self.criterion(-p_score).mean() + self.criterion(n_score).mean()) / 2
98
+
99
+ def predict(
100
+ self,
101
+ p_score: torch.Tensor,
102
+ n_score: torch.Tensor) -> np.ndarray:
103
+
104
+ """SoftplusLoss 的推理方法。
105
+
106
+ :param p_score: 正样本评分函数的得分。
107
+ :type p_score: torch.Tensor
108
+ :param n_score: 负样本评分函数的得分。
109
+ :type n_score: torch.Tensor
110
+ :returns: 损失值
111
+ :rtype: numpy.ndarray
112
+ """
113
+
114
+ score = self.forward(p_score, n_score)
115
+ return score.cpu().data.numpy()
116
+
117
+ def get_softplus_loss_hpo_config() -> dict[str, dict[str, Any]]:
118
+
119
+ """返回 :py:class:`SoftplusLoss` 的默认超参数优化配置。
120
+
121
+ 默认配置为::
122
+
123
+ parameters_dict = {
124
+ 'loss': {
125
+ 'value': 'SoftplusLoss'
126
+ },
127
+ 'adv_temperature': {
128
+ 'values': [1.0, 3.0, 6.0]
129
+ }
130
+ }
131
+
132
+ :returns: :py:class:`SoftplusLoss` 的默认超参数优化配置
133
+ :rtype: dict[str, dict[str, typing.Any]]
134
+ """
135
+
136
+ parameters_dict = {
137
+ 'loss': {
138
+ 'value': 'SoftplusLoss'
139
+ },
140
+ 'adv_temperature': {
141
+ 'values': [1.0, 3.0, 6.0]
142
+ }
143
+ }
144
+
145
+ return parameters_dict
@@ -0,0 +1,35 @@
1
+ # coding:utf-8
2
+ #
3
+ # unike/module/loss/__init__.py
4
+ #
5
+ # git pull from OpenKE-PyTorch by LuYF-Lemon-love <luyanfeng_nlp@qq.com> on May 7, 2023
6
+ # updated by LuYF-Lemon-love <luyanfeng_nlp@qq.com> on May 28, 2023
7
+ #
8
+ # 该头文件定义了 loss 接口.
9
+
10
+ """损失函数部分。"""
11
+
12
+ from __future__ import absolute_import
13
+ from __future__ import division
14
+ from __future__ import print_function
15
+
16
+ from .Loss import Loss
17
+ from .MarginLoss import MarginLoss, get_margin_loss_hpo_config
18
+ from .SigmoidLoss import SigmoidLoss, get_sigmoid_loss_hpo_config
19
+ from .SoftplusLoss import SoftplusLoss, get_softplus_loss_hpo_config
20
+ from .RGCNLoss import RGCNLoss, get_rgcn_loss_hpo_config
21
+ from .CompGCNLoss import CompGCNLoss, get_compgcn_loss_hpo_config
22
+
23
+ __all__ = [
24
+ 'Loss',
25
+ 'MarginLoss',
26
+ 'get_margin_loss_hpo_config',
27
+ 'SigmoidLoss',
28
+ 'get_sigmoid_loss_hpo_config',
29
+ 'SoftplusLoss',
30
+ 'get_softplus_loss_hpo_config',
31
+ 'RGCNLoss',
32
+ 'get_rgcn_loss_hpo_config',
33
+ 'CompGCNLoss',
34
+ 'get_compgcn_loss_hpo_config'
35
+ ]
@@ -0,0 +1,237 @@
1
+ # coding:utf-8
2
+ #
3
+ # unike/module/model/Analogy.py
4
+ #
5
+ # git pull from OpenKE-PyTorch by LuYF-Lemon-love <luyanfeng_nlp@qq.com> on May 7, 2023
6
+ # updated by LuYF-Lemon-love <luyanfeng_nlp@qq.com> on Jan 31, 2024
7
+ #
8
+ # 该头文件定义了 Analogy.
9
+
10
+ """
11
+ Analogy 类 - DistMult、HolE 和 ComplEx 的集大成者,效果与 HolE、ComplEx 差不多。
12
+ """
13
+
14
+ import torch
15
+ import typing
16
+ import torch.nn as nn
17
+ from .Model import Model
18
+ from typing_extensions import override
19
+
20
+ class Analogy(Model):
21
+
22
+ """
23
+ ``Analogy`` :cite:`ANALOGY` 提出于 2017 年,:py:class:`unike.module.model.DistMult`、:py:class:`unike.module.model.HolE` 和 :py:class:`unike.module.model.ComplEx` 的集大成者,
24
+ 效果与 :py:class:`unike.module.model.HolE`、:py:class:`unike.module.model.ComplEx` 差不多。
25
+
26
+ 评分函数为:
27
+
28
+ .. math::
29
+
30
+ <\operatorname{Re}(\mathbf{h_c}),\operatorname{Re}(\mathbf{r_c}),\operatorname{Re}(\mathbf{t_c})>
31
+ +<\operatorname{Re}(\mathbf{h_c}),\operatorname{Im}(\mathbf{r_c}),\operatorname{Im}(\mathbf{t_c})>
32
+ +<\operatorname{Im}(\mathbf{h_c}),\operatorname{Re}(\mathbf{r_c}),\operatorname{Im}(\mathbf{t_c})>
33
+ -<\operatorname{Im}(\mathbf{h_c}),\operatorname{Im}(\mathbf{r_c}),\operatorname{Re}(\mathbf{t_c})>
34
+ +<\mathbf{h_d}, \mathbf{r_d}, \mathbf{t_d}>
35
+
36
+ 评分函数为 :py:class:`unike.module.model.DistMult` 和 :py:class:`unike.module.model.ComplEx` 两者评分函数的和。:math:`< \mathbf{a}, \mathbf{b}, \mathbf{c} >` 为逐元素多线性点积(element-wise multi-linear dot product),
37
+ 正三元组的评分函数的值越大越好,负三元组越小越好,如果想获得更详细的信息请访问 :ref:`ANALOGY <analogy>`。
38
+
39
+ 例子::
40
+
41
+ from unike.config import Trainer, Tester
42
+ from unike.module.model import Analogy
43
+ from unike.module.loss import SoftplusLoss
44
+ from unike.module.strategy import NegativeSampling
45
+
46
+ # define the model
47
+ analogy = Analogy(
48
+ ent_tol = train_dataloader.get_ent_tol(),
49
+ rel_tol = train_dataloader.get_rel_tol(),
50
+ dim = 200
51
+ )
52
+
53
+ # define the loss function
54
+ model = NegativeSampling(
55
+ model = analogy,
56
+ loss = SoftplusLoss(),
57
+ batch_size = train_dataloader.get_batch_size(),
58
+ regul_rate = 1.0
59
+ )
60
+
61
+ # test the model
62
+ tester = Tester(model = analogy, data_loader = test_dataloader, use_gpu = True, device = 'cuda:1')
63
+
64
+ # train the model
65
+ trainer = Trainer(model = model, data_loader = train_dataloader,
66
+ epochs = 2000, lr = 0.5, opt_method = "adagrad", use_gpu = True, device = 'cuda:1',
67
+ tester = tester, test = True, valid_interval = 100,
68
+ log_interval = 100, save_interval = 100,
69
+ save_path = '../../checkpoint/analogy.pth', delta = 0.01)
70
+ trainer.run()
71
+ """
72
+
73
+ def __init__(
74
+ self,
75
+ ent_tol: int,
76
+ rel_tol: int,
77
+ dim: int = 100):
78
+
79
+ """创建 Analogy 对象。
80
+
81
+ :param ent_tol: 实体的个数
82
+ :type ent_tol: int
83
+ :param rel_tol: 关系的个数
84
+ :type rel_tol: int
85
+ :param dim: 实体嵌入向量和关系嵌入向量的维度
86
+ :type dim: int
87
+ """
88
+
89
+ super(Analogy, self).__init__(ent_tol, rel_tol)
90
+
91
+ #: 实体嵌入向量和关系嵌入向量的维度
92
+ self.dim: int = dim
93
+ #: 根据实体个数,创建的实体嵌入
94
+ self.ent_embeddings: torch.nn.Embedding = nn.Embedding(self.ent_tol, self.dim * 4)
95
+ #: 根据关系个数,创建的关系嵌入
96
+ self.rel_embeddings: torch.nn.Embedding = nn.Embedding(self.rel_tol, self.dim * 4)
97
+
98
+ nn.init.xavier_uniform_(self.ent_embeddings.weight.data)
99
+ nn.init.xavier_uniform_(self.rel_embeddings.weight.data)
100
+
101
+ @override
102
+ def forward(
103
+ self,
104
+ triples: torch.Tensor,
105
+ negs: torch.Tensor = None,
106
+ mode: str = 'single') -> torch.Tensor:
107
+
108
+ """
109
+ 定义每次调用时执行的计算。
110
+ :py:class:`torch.nn.Module` 子类必须重写 :py:meth:`torch.nn.Module.forward`。
111
+
112
+ :param triples: 正确的三元组
113
+ :type triples: torch.Tensor
114
+ :param negs: 负三元组类别
115
+ :type negs: torch.Tensor
116
+ :param mode: 模式
117
+ :type triples: str
118
+ :returns: 三元组的得分
119
+ :rtype: torch.Tensor
120
+ """
121
+
122
+ head_emb, relation_emb, tail_emb = self.tri2emb(triples, negs, mode)
123
+ score = self._calc(head_emb, relation_emb, tail_emb)
124
+ return score
125
+
126
+ def _calc(
127
+ self,
128
+ head_emb: torch.Tensor,
129
+ relation_emb: torch.Tensor,
130
+ tail_emb: torch.Tensor) -> torch.Tensor:
131
+
132
+ """计算 Analogy 的评分函数。
133
+
134
+ :param head_emb: 头实体的向量。
135
+ :type head_emb: torch.Tensor
136
+ :param relation_emb: 关系的向量。
137
+ :type relation_emb: torch.Tensor
138
+ :param tail_emb: 尾实体的向量。
139
+ :type tail_emb: torch.Tensor
140
+ :returns: 三元组的得分
141
+ :rtype: torch.Tensor
142
+ """
143
+
144
+ head, h = torch.chunk(head_emb, 2, dim=-1)
145
+ h_re, h_im = torch.chunk(head, 2, dim=-1)
146
+ relation, r = torch.chunk(relation_emb, 2, dim=-1)
147
+ r_re, r_im = torch.chunk(relation, 2, dim=-1)
148
+ tail, t = torch.chunk(tail_emb, 2, dim=-1)
149
+ t_re, t_im = torch.chunk(tail, 2, dim=-1)
150
+
151
+ return (torch.sum(r_re * h_re * t_re +
152
+ r_re * h_im * t_im +
153
+ r_im * h_re * t_im -
154
+ r_im * h_im * t_re, -1)
155
+ + torch.sum(h * t * r, -1))
156
+
157
+ @override
158
+ def predict(
159
+ self,
160
+ data: dict[str, typing.Union[torch.Tensor,str]],
161
+ mode) -> torch.Tensor:
162
+
163
+ """Analogy 的推理方法。
164
+
165
+ :param data: 数据。
166
+ :type data: dict[str, typing.Union[torch.Tensor,str]]
167
+ :returns: 三元组的得分
168
+ :rtype: torch.Tensor
169
+ """
170
+
171
+ triples = data["positive_sample"]
172
+ head_emb, relation_emb, tail_emb = self.tri2emb(triples, mode=mode)
173
+ score = self._calc(head_emb, relation_emb, tail_emb)
174
+ return score
175
+
176
+ def regularization(
177
+ self,
178
+ data: dict[str, typing.Union[torch.Tensor, str]]) -> torch.Tensor:
179
+
180
+ """L2 正则化函数(又称权重衰减),在损失函数中用到。
181
+
182
+ :param data: 数据。
183
+ :type data: dict[str, typing.Union[torch.Tensor, str]]
184
+ :returns: 模型参数的正则损失
185
+ :rtype: torch.Tensor
186
+ """
187
+
188
+ pos_sample = data["positive_sample"]
189
+ neg_sample = data["negative_sample"]
190
+ mode = data["mode"]
191
+ pos_head_emb, pos_relation_emb, pos_tail_emb = self.tri2emb(pos_sample)
192
+ if mode == "bern":
193
+ neg_head_emb, neg_relation_emb, neg_tail_emb = self.tri2emb(neg_sample)
194
+ else:
195
+ neg_head_emb, neg_relation_emb, neg_tail_emb = self.tri2emb(pos_sample, neg_sample, mode)
196
+
197
+ pos_regul = (torch.mean(pos_head_emb ** 2) +
198
+ torch.mean(pos_relation_emb ** 2) +
199
+ torch.mean(pos_tail_emb ** 2)) / 3
200
+
201
+ neg_regul = (torch.mean(neg_head_emb ** 2) +
202
+ torch.mean(neg_relation_emb ** 2) +
203
+ torch.mean(neg_tail_emb ** 2)) / 3
204
+
205
+ regul = (pos_regul + neg_regul) / 2
206
+
207
+ return regul
208
+
209
+ def get_analogy_hpo_config() -> dict[str, dict[str, typing.Any]]:
210
+
211
+ """返回 :py:class:`Analogy` 的默认超参数优化配置。
212
+
213
+ 默认配置为::
214
+
215
+ parameters_dict = {
216
+ 'model': {
217
+ 'value': 'Analogy'
218
+ },
219
+ 'dim': {
220
+ 'values': [50, 100, 200]
221
+ }
222
+ }
223
+
224
+ :returns: :py:class:`Analogy` 的默认超参数优化配置
225
+ :rtype: dict[str, dict[str, typing.Any]]
226
+ """
227
+
228
+ parameters_dict = {
229
+ 'model': {
230
+ 'value': 'Analogy'
231
+ },
232
+ 'dim': {
233
+ 'values': [50, 100, 200]
234
+ }
235
+ }
236
+
237
+ return parameters_dict