PyPI - unike - Versions diffs - 3.0.1__py3-none-any.whl - Mend

unike 3.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (59) hide show

unike/__init__.py +5 -0
unike/config/HPOTrainer.py +305 -0
unike/config/Tester.py +385 -0
unike/config/Trainer.py +519 -0
unike/config/TrainerAccelerator.py +39 -0
unike/config/__init__.py +37 -0
unike/data/BernSampler.py +168 -0
unike/data/CompGCNSampler.py +140 -0
unike/data/CompGCNTestSampler.py +84 -0
unike/data/KGEDataLoader.py +315 -0
unike/data/KGReader.py +138 -0
unike/data/RGCNSampler.py +261 -0
unike/data/RGCNTestSampler.py +208 -0
unike/data/RevSampler.py +78 -0
unike/data/TestSampler.py +189 -0
unike/data/TradSampler.py +122 -0
unike/data/TradTestSampler.py +87 -0
unike/data/UniSampler.py +145 -0
unike/data/__init__.py +47 -0
unike/module/BaseModule.py +130 -0
unike/module/__init__.py +20 -0
unike/module/loss/CompGCNLoss.py +96 -0
unike/module/loss/Loss.py +26 -0
unike/module/loss/MarginLoss.py +148 -0
unike/module/loss/RGCNLoss.py +117 -0
unike/module/loss/SigmoidLoss.py +145 -0
unike/module/loss/SoftplusLoss.py +145 -0
unike/module/loss/__init__.py +35 -0
unike/module/model/Analogy.py +237 -0
unike/module/model/CompGCN.py +562 -0
unike/module/model/ComplEx.py +235 -0
unike/module/model/DistMult.py +276 -0
unike/module/model/HolE.py +308 -0
unike/module/model/Model.py +107 -0
unike/module/model/RESCAL.py +309 -0
unike/module/model/RGCN.py +304 -0
unike/module/model/RotatE.py +303 -0
unike/module/model/SimplE.py +237 -0
unike/module/model/TransD.py +458 -0
unike/module/model/TransE.py +290 -0
unike/module/model/TransH.py +322 -0
unike/module/model/TransR.py +402 -0
unike/module/model/__init__.py +60 -0
unike/module/strategy/CompGCNSampling.py +140 -0
unike/module/strategy/NegativeSampling.py +138 -0
unike/module/strategy/RGCNSampling.py +134 -0
unike/module/strategy/Strategy.py +26 -0
unike/module/strategy/__init__.py +29 -0
unike/utils/EarlyStopping.py +94 -0
unike/utils/Timer.py +74 -0
unike/utils/WandbLogger.py +46 -0
unike/utils/__init__.py +26 -0
unike/utils/tools.py +118 -0
unike/version.py +1 -0
unike-3.0.1.dist-info/METADATA +101 -0
unike-3.0.1.dist-info/RECORD +59 -0
unike-3.0.1.dist-info/WHEEL +4 -0
unike-3.0.1.dist-info/entry_points.txt +2 -0
unike-3.0.1.dist-info/licenses/LICENSE +21 -0

unike/module/model/TransR.py ADDED Viewed

@@ -0,0 +1,402 @@
+# coding:utf-8
+#
+# unike/module/model/TransR.py
+#
+# git pull from OpenKE-PyTorch by LuYF-Lemon-love <luyanfeng_nlp@qq.com> on May 7, 2023
+# updated by LuYF-Lemon-love <luyanfeng_nlp@qq.com> on Feb 25, 2024
+#
+# 该头文件定义了 TransR.
+"""
+TransR - 是一个为实体和关系嵌入向量分别构建了独立的向量空间，将实体向量投影到特定的关系向量空间进行平移操作的模型。
+"""
+import torch
+import typing
+import numpy as np
+import torch.nn as nn
+import torch.nn.functional as F
+from .Model import Model
+from typing_extensions import override
+class TransR(Model):
+	"""
+	``TransR`` :cite:`TransR` 提出于 2015 年，是一个为实体和关系嵌入向量分别构建了独立的向量空间，将实体向量投影到特定的关系向量空间进行平移操作的模型。
+	评分函数为:
+	.. math::
+		\Vert hM_r+r-tM_r \Vert_{L_1/L_2}
+	正三元组的评分函数的值越小越好，如果想获得更详细的信息请访问 :ref:`TransR <transr>`。
+	例子::
+		from unike.data import KGEDataLoader, BernSampler, TradTestSampler
+		from unike.module.model import TransE, TransR
+		from unike.module.loss import MarginLoss
+		from unike.module.strategy import NegativeSampling
+		from unike.config import Trainer, Tester
+		# dataloader for training
+		dataloader = KGEDataLoader(
+			in_path = "../../benchmarks/FB15K237/",
+			batch_size = 2048,
+			neg_ent = 25,
+			test = True,
+			test_batch_size = 10,
+			num_workers = 16,
+			train_sampler = BernSampler,
+			test_sampler = TradTestSampler
+		)
+		# define the transe
+		transe = TransE(
+			ent_tol = dataloader.get_ent_tol(),
+			rel_tol = dataloader.get_rel_tol(),
+			dim = 100,
+			p_norm = 1,
+			norm_flag = True)
+		transr = TransR(
+			ent_tol = dataloader.get_ent_tol(),
+			rel_tol = dataloader.get_rel_tol(),
+			dim_e = 100,
+			dim_r = 100,
+			p_norm = 1,
+			norm_flag = True,
+			rand_init = False)
+		model_e = NegativeSampling(
+			model = transe,
+			loss = MarginLoss(margin = 5.0)
+		)
+		model_r = NegativeSampling(
+			model = transr,
+			loss = MarginLoss(margin = 4.0)
+		)
+		# pretrain transe
+		trainer = Trainer(model = model_e, data_loader = dataloader.train_dataloader(),
+			epochs = 1, lr = 0.5, opt_method = "sgd", use_gpu = True, device = 'cuda:0')
+		trainer.run()
+		parameters = transe.get_parameters()
+		transe.save_parameters("../../checkpoint/transr_transe.json")
+		# test the transr
+		tester = Tester(model = transr, data_loader = dataloader, use_gpu = True, device = 'cuda:0')
+		# train transr
+		transr.set_parameters(parameters)
+		trainer = Trainer(model = model_r, data_loader = dataloader.train_dataloader(),
+			epochs = 1000, lr = 1.0, opt_method = "sgd", use_gpu = True, device = 'cuda:0',
+			tester = tester, test = True, valid_interval = 100,
+			log_interval = 100, save_interval = 100, save_path = '../../checkpoint/transr.pth')
+		trainer.run()
+		# test the model
+		transr.load_checkpoint('../../checkpoint/transr.pth')
+		tester.set_sampling_mode("link_test")
+		tester.run_link_prediction()
+	"""
+	def __init__(
+		self,
+		ent_tol: int,
+		rel_tol: int,
+		dim_e: int = 100,
+		dim_r: int = 100,
+		p_norm: int = 1,
+		norm_flag: bool = True,
+		rand_init: bool = False,
+		margin: float | None = None):
+		"""创建 TransR 对象。
+		:param ent_tol: 实体的个数
+		:type ent_tol: int
+		:param rel_tol: 关系的个数
+		:type rel_tol: int
+		:param dim_e: 实体嵌入向量的维度
+		:type dim_e: int
+		:param dim_r: 关系嵌入向量的维度
+		:type dim_r: int
+		:param p_norm: 评分函数的距离函数, 按照原论文，这里可以取 1 或 2。
+		:type p_norm: int
+		:param norm_flag: 是否利用 :py:func:`torch.nn.functional.normalize` 对实体和关系嵌入的最后一维执行 L2-norm。
+		:type norm_flag: bool
+		:param rand_init: 关系矩阵是否采用随机初始化。
+		:type rand_init: bool
+		:param margin: 当使用 ``RotatE`` :cite:`RotatE` 的损失函数 :py:class:`unike.module.loss.SigmoidLoss`，需要提供此参数，将 ``TransE`` :cite:`TransE` 的正三元组的评分由越小越好转化为越大越好，如果想获得更详细的信息请访问 :ref:`RotatE <rotate>`。
+		:type margin: float
+		"""
+		super(TransR, self).__init__(ent_tol, rel_tol)
+		#: 实体嵌入向量的维度
+		self.dim_e: int = dim_e
+		#: 关系嵌入向量的维度
+		self.dim_r: int = dim_r
+		#: 评分函数的距离函数, 按照原论文，这里可以取 1 或 2。
+		self.p_norm: int = p_norm
+		#: 是否利用 :py:func:`torch.nn.functional.normalize`
+		#: 对实体和关系嵌入向量的最后一维执行 L2-norm。
+		self.norm_flag: bool = norm_flag
+		#: 关系矩阵是否采用随机初始化
+		self.rand_init: bool = rand_init
+		#: 根据实体个数，创建的实体嵌入
+		self.ent_embeddings: torch.nn.Embedding = nn.Embedding(self.ent_tol, self.dim_e)
+		#: 根据关系个数，创建的关系嵌入
+		self.rel_embeddings: torch.nn.Embedding = nn.Embedding(self.rel_tol, self.dim_r)
+		if margin != None:
+			#: 当使用 ``RotatE`` :cite:`RotatE` 的损失函数 :py:class:`unike.module.loss.SigmoidLoss`，需要提供此参数，将 ``TransE`` :cite:`TransE` 的正三元组的评分由越小越好转化为越大越好，如果想获得更详细的信息请访问 :ref:`RotatE <rotate>`。
+			self.margin: torch.nn.parameter.Parameter = nn.Parameter(torch.Tensor([margin]))
+			self.margin.requires_grad = False
+			self.margin_flag: bool = True
+		else:
+			self.margin_flag: bool = False
+		nn.init.xavier_uniform_(self.ent_embeddings.weight.data)
+		nn.init.xavier_uniform_(self.rel_embeddings.weight.data)
+		#: 关系矩阵
+		self.transfer_matrix: torch.nn.Embedding = nn.Embedding(self.rel_tol, self.dim_e * self.dim_r)
+		if not self.rand_init:
+			identity = torch.zeros(self.dim_e, self.dim_r)
+			for i in range(min(self.dim_e, self.dim_r)):
+				identity[i][i] = 1
+			identity = identity.view(self.dim_e * self.dim_r)
+			for i in range(self.rel_tol):
+				self.transfer_matrix.weight.data[i] = identity
+		else:
+			nn.init.xavier_uniform_(self.transfer_matrix.weight.data)
+	@override
+	def forward(
+		self,
+		triples: torch.Tensor,
+		negs: torch.Tensor = None,
+		mode: str = 'single') -> torch.Tensor:
+		"""
+		定义每次调用时执行的计算。
+		:py:class:`torch.nn.Module` 子类必须重写 :py:meth:`torch.nn.Module.forward`。
+		:param triples: 正确的三元组
+		:type triples: torch.Tensor
+		:param negs: 负三元组类别
+		:type negs: torch.Tensor
+		:param mode: 模式
+		:type triples: str
+		:returns: 三元组的得分
+		:rtype: torch.Tensor
+		"""
+		head_emb, relation_emb, tail_emb = self.tri2emb(triples, negs, mode)
+		rel_transfer = self.transfer_matrix(triples[:, 1])
+		head_emb = self._transfer(head_emb, rel_transfer)
+		tail_emb = self._transfer(tail_emb, rel_transfer)
+		score = self._calc(head_emb, relation_emb, tail_emb)
+		if self.margin_flag:
+			return self.margin - score
+		else:
+			return score
+	def _transfer(
+		self,
+		e: torch.Tensor,
+		r_transfer: torch.Tensor) -> torch.Tensor:
+		"""
+		将头实体或尾实体的向量投影到特定的关系向量空间。
+		:param e: 头实体或尾实体向量。
+		:type e: torch.Tensor
+		:param r_transfer: 特定关系矩阵
+		:type r_transfer: torch.Tensor
+		:returns: 投影后的实体向量
+		:rtype: torch.Tensor
+		"""
+		r_transfer = r_transfer.view(-1, self.dim_e, self.dim_r)
+		r_transfer = r_transfer.unsqueeze(dim=1)
+		e = e.unsqueeze(dim=-2)
+		e = torch.matmul(e, r_transfer)
+		return e.squeeze(dim=-2)
+	def _calc(
+		self,
+		h: torch.Tensor,
+		r: torch.Tensor,
+		t: torch.Tensor) -> torch.Tensor:
+		"""计算 TransR 的评分函数。
+		:param h: 头实体的向量。
+		:type h: torch.Tensor
+		:param r: 关系的向量。
+		:type r: torch.Tensor
+		:param t: 尾实体的向量。
+		:type t: torch.Tensor
+		:returns: 三元组的得分
+		:rtype: torch.Tensor
+		"""
+		# 对嵌入的最后一维进行归一化
+		if self.norm_flag:
+			h = F.normalize(h, 2, -1)
+			r = F.normalize(r, 2, -1)
+			t = F.normalize(t, 2, -1)
+		score = (h + r) - t
+		# 利用距离函数计算得分
+		score = torch.norm(score, self.p_norm, -1)
+		return score
+	@override
+	def predict(
+		self,
+		data: dict[str, typing.Union[torch.Tensor,str]],
+		mode: str) -> torch.Tensor:
+		"""TransR 的推理方法。
+		:param data: 数据。
+		:type data: dict[str, typing.Union[torch.Tensor,str]]
+		:param mode: 'head_predict' 或 'tail_predict'
+		:type mode: str
+		:returns: 三元组的得分
+		:rtype: torch.Tensor
+		"""
+		triples = data["positive_sample"]
+		head_emb, relation_emb, tail_emb = self.tri2emb(triples, mode=mode)
+		rel_transfer = self.transfer_matrix(triples[:, 1])
+		head_emb = self._transfer(head_emb, rel_transfer)
+		tail_emb = self._transfer(tail_emb, rel_transfer)
+		score = self._calc(head_emb, relation_emb, tail_emb)
+		if self.margin_flag:
+			score = self.margin - score
+			return score
+		else:
+			return -score
+	def regularization(
+		self,
+		data: dict[str, typing.Union[torch.Tensor, str]]) -> torch.Tensor:
+		"""L2 正则化函数（又称权重衰减），在损失函数中用到。
+		:param data: 数据。
+		:type data: dict[str, typing.Union[torch.Tensor, str]]
+		:returns: 模型参数的正则损失
+		:rtype: torch.Tensor
+		"""
+		pos_sample = data["positive_sample"]
+		neg_sample = data["negative_sample"]
+		mode = data["mode"]
+		pos_head_emb, pos_relation_emb, pos_tail_emb = self.tri2emb(pos_sample)
+		pos_rel_transfer = self.transfer_matrix(pos_sample[:, 1])
+		if mode == "bern":
+			neg_head_emb, neg_relation_emb, neg_tail_emb = self.tri2emb(neg_sample)
+		else:
+			neg_head_emb, neg_relation_emb, neg_tail_emb = self.tri2emb(pos_sample, neg_sample, mode)
+		neg_rel_transfer = self.transfer_matrix(pos_sample[:, 1])
+		pos_regul = (torch.mean(pos_head_emb ** 2) +
+					 torch.mean(pos_relation_emb ** 2) +
+					 torch.mean(pos_tail_emb ** 2) +
+					 torch.mean(pos_rel_transfer ** 2)) / 4
+		neg_regul = (torch.mean(neg_head_emb ** 2) +
+					 torch.mean(neg_relation_emb ** 2) +
+					 torch.mean(neg_tail_emb ** 2) +
+					 torch.mean(neg_rel_transfer ** 2)) / 4
+		regul = (pos_regul + neg_regul) / 2
+		return regul
+def get_transr_hpo_config() -> dict[str, dict[str, typing.Any]]:
+	"""返回 :py:class:`TransR` 的默认超参数优化配置。
+	``TransR`` :cite:`TransR` 进行超参数优化的时候，需要先训练一个 ``TransE`` :cite:`TransE` 模型（训练 1 epoch）。
+	然后 ``TransR`` :cite:`TransR` 的实体和关系的嵌入向量初始化为 TransE 的结果。
+	**margin_e** 、 **lr_e** 和 **opt_method_e** 是 ``TransE`` :cite:`TransE` 的训练超参数。
+	如果想获得更详细的信息请访问 :ref:`TransR <transr>`。
+	默认配置为::
+		parameters_dict = {
+			'model': {
+				'value': 'TransR'
+			},
+			'dim': {
+				'values': [50, 100]
+			},
+			'p_norm': {
+				'values': [1, 2]
+			},
+			'norm_flag': {
+				'value': True
+			},
+			'rand_init': {
+				'value': False
+			},
+			'margin_e': {
+				'values': [1.0, 3.0, 6.0]
+			},
+			'lr_e': {
+				'distribution': 'uniform',
+				'min': 1e-5,
+				'max': 1.0
+			},
+			'opt_method_e': {
+				'values': ['adam', 'adagrad', 'sgd']
+			},
+		}
+	:returns: :py:class:`TransR` 的默认超参数优化配置
+	:rtype: dict[str, dict[str, typing.Any]]
+	"""
+	parameters_dict = {
+		'model': {
+			'value': 'TransR'
+		},
+		'dim': {
+			'values': [50, 100]
+		},
+		'p_norm': {
+			'values': [1, 2]
+		},
+		'norm_flag': {
+			'value': True
+		},
+		'rand_init': {
+			'value': False
+		},
+		'margin_e': {
+			'values': [1.0, 3.0, 6.0]
+		},
+		'lr_e': {
+			'distribution': 'uniform',
+			'min': 1e-5,
+			'max': 1.0
+		},
+		'opt_method_e': {
+			'values': ['adam', 'adagrad', 'sgd']
+		},
+	}
+	return parameters_dict

unike/module/model/__init__.py ADDED Viewed

@@ -0,0 +1,60 @@
+# coding:utf-8
+#
+# unike/module/model/__init__.py
+#
+# git pull from OpenKE-PyTorch by LuYF-Lemon-love <luyanfeng_nlp@qq.com> on May 7, 2023
+# updated by LuYF-Lemon-love <luyanfeng_nlp@qq.com> on May 28, 2023
+#
+# 该头文件定义了 model 接口.
+"""KGE 模型部分。"""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from .Model import Model
+from .TransE import TransE, get_transe_hpo_config
+from .TransH import TransH, get_transh_hpo_config
+from .TransR import TransR, get_transr_hpo_config
+from .TransD import TransD, get_transd_hpo_config
+from .RotatE import RotatE, get_rotate_hpo_config
+from .RESCAL import RESCAL, get_rescal_hpo_config
+from .DistMult import DistMult, get_distmult_hpo_config
+from .HolE import HolE, get_hole_hpo_config
+from .ComplEx import ComplEx, get_complex_hpo_config
+from .Analogy import Analogy, get_analogy_hpo_config
+from .SimplE import SimplE, get_simple_hpo_config
+from .RGCN import RGCN, get_rgcn_hpo_config
+from .CompGCN import CompGCN, CompGCNCov, get_compgcn_hpo_config
+__all__ = [
+    'Model',
+    'TransE',
+    'get_transe_hpo_config',
+    'TransH',
+    'get_transh_hpo_config',
+    'TransR',
+    'get_transr_hpo_config',
+    'TransD',
+    'get_transd_hpo_config',
+    'RotatE',
+    'get_rotate_hpo_config',
+    'RESCAL',
+    'get_rescal_hpo_config',
+    'DistMult',
+    'get_distmult_hpo_config',
+    'HolE',
+    'get_hole_hpo_config',
+    'ComplEx',
+    'get_complex_hpo_config',
+    'Analogy',
+    'get_analogy_hpo_config',
+    'SimplE',
+    'get_simple_hpo_config',
+    'RGCN',
+    'get_rgcn_hpo_config',
+    'CompGCN',
+    'CompGCNCov',
+    'get_compgcn_hpo_config'
+]

unike/module/strategy/CompGCNSampling.py ADDED Viewed

@@ -0,0 +1,140 @@
+# coding:utf-8
+#
+# unike/module/strategy/CompGCNSampling.py
+#
+# created by LuYF-Lemon-love <luyanfeng_nlp@qq.com> on Jan 16, 2023
+# updated by LuYF-Lemon-love <luyanfeng_nlp@qq.com> on Jan 20, 2023
+#
+# 该脚本定义了 CompGCN 模型的训练策略.
+"""
+CompGCNSampling - 训练策略类，包含损失函数。
+"""
+import dgl
+import torch
+import typing
+from ..loss import Loss
+from ..model import CompGCN
+from .Strategy import Strategy
+class CompGCNSampling(Strategy):
+	"""
+	将模型和损失函数封装到一起，方便模型训练，用于 ``CompGCN`` :cite:`CompGCN`。
+	例子::
+		from unike.module.model import CompGCN
+		from unike.module.loss import CompGCNLoss
+		from unike.module.strategy import CompGCNSampling
+		from unike.config import Trainer, GraphTester
+		# define the model
+		compgcn = CompGCN(
+			ent_tol = dataloader.train_sampler.ent_tol,
+			rel_tol = dataloader.train_sampler.rel_tol,
+			dim = 100
+		)
+		# define the loss function
+		model = CompGCNSampling(
+			model = compgcn,
+			loss = CompGCNLoss(model = compgcn),
+			ent_tol = dataloader.train_sampler.ent_tol
+		)
+		# test the model
+		tester = GraphTester(model = compgcn, data_loader = dataloader, use_gpu = True, device = 'cuda:0', prediction = "tail")
+		# train the model
+		trainer = Trainer(model = model, data_loader = dataloader.train_dataloader(),
+			epochs = 2000, lr = 0.0001, use_gpu = True, device = 'cuda:0',
+			tester = tester, test = True, valid_interval = 50, log_interval = 50,
+			save_interval = 50, save_path = '../../checkpoint/compgcn.pth'
+		)
+		trainer.run()
+	"""
+	def __init__(
+		self,
+		model: CompGCN = None,
+		loss: Loss = None,
+		smoothing: float = 0.1,
+		ent_tol: int = None):
+		"""创建 CompGCNSampling 对象。
+		:param model: CompGCN 模型
+		:type model: :py:class:`unike.module.model.CompGCN`
+		:param loss: 损失函数。
+		:type loss: :py:class:`unike.module.loss.Loss`
+		:param smoothing: smoothing
+		:type smoothing: float
+		:param ent_tol: 实体个数
+		:type ent_tol: int
+		"""
+		super(CompGCNSampling, self).__init__()
+		#: CompGCN 模型，即 :py:class:`unike.module.model.CompGCN`
+		self.model: CompGCN = model
+		#: 损失函数，即 :py:class:`unike.module.loss.Loss`
+		self.loss: Loss = loss
+		#: smoothing
+		self.smoothing: float = smoothing
+		#: 实体个数
+		self.ent_tol: int = ent_tol
+	def forward(
+		self,
+		data: dict[str, typing.Union[dgl.DGLGraph, torch.Tensor]]) -> torch.Tensor:
+		"""计算最后的损失值。定义每次调用时执行的计算。
+		:py:class:`torch.nn.Module` 子类必须重写 :py:meth:`torch.nn.Module.forward`。
+		:param data: 数据
+		:type data: dict[str, typing.Union[dgl.DGLGraph, torch.Tensor]]
+		:returns: 损失值
+		:rtype: torch.Tensor
+		"""
+		graph    = data["graph"]
+		relation = data['relation']
+		norm     = data['norm']
+		sample   = data["sample"]
+		label    = data["label"]
+		score = self.model(graph, relation, norm, sample)
+		label = (1.0 - self.smoothing) * label + (1.0 / self.ent_tol)
+		loss  = self.loss(score,  label)
+		return loss
+def get_compgcn_sampling_hpo_config() -> dict[str, dict[str, typing.Any]]:
+	"""返回 :py:class:`CompGCNSampling` 的默认超参数优化配置。
+	默认配置为::
+		parameters_dict = {
+			'strategy': {
+				'value': 'CompGCNSampling'
+			},
+			'smoothing': {
+				'value': 0.1
+			}
+		}
+	:returns: :py:class:`CompGCNSampling` 的默认超参数优化配置
+	:rtype: dict[str, dict[str, typing.Any]]
+	"""
+	parameters_dict = {
+		'strategy': {
+			'value': 'CompGCNSampling'
+		},
+		'smoothing': {
+			'value': 0.1
+		}
+	}
+	return parameters_dict