PyPI - unike - Versions diffs - 3.0.1__py3-none-any.whl - Mend

unike 3.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (59) hide show

unike/__init__.py +5 -0
unike/config/HPOTrainer.py +305 -0
unike/config/Tester.py +385 -0
unike/config/Trainer.py +519 -0
unike/config/TrainerAccelerator.py +39 -0
unike/config/__init__.py +37 -0
unike/data/BernSampler.py +168 -0
unike/data/CompGCNSampler.py +140 -0
unike/data/CompGCNTestSampler.py +84 -0
unike/data/KGEDataLoader.py +315 -0
unike/data/KGReader.py +138 -0
unike/data/RGCNSampler.py +261 -0
unike/data/RGCNTestSampler.py +208 -0
unike/data/RevSampler.py +78 -0
unike/data/TestSampler.py +189 -0
unike/data/TradSampler.py +122 -0
unike/data/TradTestSampler.py +87 -0
unike/data/UniSampler.py +145 -0
unike/data/__init__.py +47 -0
unike/module/BaseModule.py +130 -0
unike/module/__init__.py +20 -0
unike/module/loss/CompGCNLoss.py +96 -0
unike/module/loss/Loss.py +26 -0
unike/module/loss/MarginLoss.py +148 -0
unike/module/loss/RGCNLoss.py +117 -0
unike/module/loss/SigmoidLoss.py +145 -0
unike/module/loss/SoftplusLoss.py +145 -0
unike/module/loss/__init__.py +35 -0
unike/module/model/Analogy.py +237 -0
unike/module/model/CompGCN.py +562 -0
unike/module/model/ComplEx.py +235 -0
unike/module/model/DistMult.py +276 -0
unike/module/model/HolE.py +308 -0
unike/module/model/Model.py +107 -0
unike/module/model/RESCAL.py +309 -0
unike/module/model/RGCN.py +304 -0
unike/module/model/RotatE.py +303 -0
unike/module/model/SimplE.py +237 -0
unike/module/model/TransD.py +458 -0
unike/module/model/TransE.py +290 -0
unike/module/model/TransH.py +322 -0
unike/module/model/TransR.py +402 -0
unike/module/model/__init__.py +60 -0
unike/module/strategy/CompGCNSampling.py +140 -0
unike/module/strategy/NegativeSampling.py +138 -0
unike/module/strategy/RGCNSampling.py +134 -0
unike/module/strategy/Strategy.py +26 -0
unike/module/strategy/__init__.py +29 -0
unike/utils/EarlyStopping.py +94 -0
unike/utils/Timer.py +74 -0
unike/utils/WandbLogger.py +46 -0
unike/utils/__init__.py +26 -0
unike/utils/tools.py +118 -0
unike/version.py +1 -0
unike-3.0.1.dist-info/METADATA +101 -0
unike-3.0.1.dist-info/RECORD +59 -0
unike-3.0.1.dist-info/WHEEL +4 -0
unike-3.0.1.dist-info/entry_points.txt +2 -0
unike-3.0.1.dist-info/licenses/LICENSE +21 -0

unike/module/strategy/NegativeSampling.py ADDED Viewed

@@ -0,0 +1,138 @@
+# coding:utf-8
+#
+# unike/module/strategy/NegativeSampling.py
+#
+# git pull from OpenKE-PyTorch by LuYF-Lemon-love <luyanfeng_nlp@qq.com> on May 7, 2023
+# updated by LuYF-Lemon-love <luyanfeng_nlp@qq.com> on May 9, 2024
+#
+# 该脚本定义了平移模型和语义匹配模型的训练策略.
+"""
+NegativeSampling - 训练策略类，包含损失函数。
+"""
+import torch
+import typing
+from ..loss import Loss
+from ..model import Model
+from .Strategy import Strategy
+class NegativeSampling(Strategy):
+	"""
+	将模型和损失函数封装到一起，方便模型训练。
+	例子::
+		from unike.module.model import TransE
+		from unike.module.loss import MarginLoss
+		from unike.module.strategy import NegativeSampling
+		# define the model
+		transe = TransE(
+			ent_tol = dataloader.get_ent_tol(),
+			rel_tol = dataloader.get_rel_tol(),
+			dim = 50,
+			p_norm = 1,
+			norm_flag = True
+		)
+		# define the loss function
+		model = NegativeSampling(
+			model = transe,
+			loss = MarginLoss(margin = 1.0),
+			regul_rate = 0.01
+		)
+	"""
+	def __init__(
+		self,
+		model: Model = None,
+		loss: Loss = None,
+		regul_rate: float = 0.0,
+		l3_regul_rate: float = 0.0):
+		"""创建 NegativeSampling 对象。
+		:param model: KGE 模型
+		:type model: :py:class:`unike.module.model.Model`
+		:param loss: 损失函数。
+		:type loss: :py:class:`unike.module.loss.Loss`
+		:param regul_rate: 权重衰减系数
+		:type regul_rate: float
+		:param l3_regul_rate: l3 正则化系数
+		:type l3_regul_rate: float
+		"""
+		super(NegativeSampling, self).__init__()
+		#: KGE 模型，即 :py:class:`unike.module.model.Model`
+		self.model: Model = model
+		#: 损失函数，即 :py:class:`unike.module.loss.Loss`
+		self.loss: Loss = loss
+		#: 权重衰减系数
+		self.regul_rate: float = regul_rate
+		#: l3 正则化系数
+		self.l3_regul_rate: float = l3_regul_rate
+	def forward(self, data: dict[str, typing.Union[torch.Tensor, str]]) -> torch.Tensor:
+		"""计算最后的损失值。定义每次调用时执行的计算。
+		:py:class:`torch.nn.Module` 子类必须重写 :py:meth:`torch.nn.Module.forward`。
+		:param data: 数据
+		:type data: dict[str, typing.Union[torch.Tensor, str]]
+		:returns: 损失值
+		:rtype: torch.Tensor
+		"""
+		pos_sample = data["positive_sample"]
+		neg_sample = data["negative_sample"]
+		mode = data["mode"]
+		pos_score = self.model(pos_sample)
+		if mode == "bern":
+			neg_score = self.model(neg_sample)
+			neg_score = neg_score.view(pos_score.shape[0], -1)
+		else:
+			neg_score = self.model(pos_sample, neg_sample, mode)
+		loss_res = self.loss(pos_score, neg_score)
+		if self.regul_rate != 0:
+			loss_res += self.regul_rate * self.model.regularization(data)
+		if self.l3_regul_rate != 0:
+			loss_res += self.l3_regul_rate * self.model.l3_regularization()
+		return loss_res
+def get_negative_sampling_hpo_config() -> dict[str, dict[str, typing.Any]]:
+	"""返回 :py:class:`NegativeSampling` 的默认超参数优化配置。
+	默认配置为::
+		parameters_dict = {
+			'strategy': {
+				'value': 'NegativeSampling'
+			},
+			'regul_rate': {
+				'value': 0.0
+			},
+			'l3_regul_rate': {
+				'value': 0.0
+			}
+		}
+	:returns: :py:class:`NegativeSampling` 的默认超参数优化配置
+	:rtype: dict[str, dict[str, typing.Any]]
+	"""
+	parameters_dict = {
+		'strategy': {
+			'value': 'NegativeSampling'
+		},
+		'regul_rate': {
+			'value': 0.0
+		},
+		'l3_regul_rate': {
+			'value': 0.0
+		}
+	}
+	return parameters_dict

unike/module/strategy/RGCNSampling.py ADDED Viewed

@@ -0,0 +1,134 @@
+# coding:utf-8
+#
+# unike/module/strategy/RGCNSampling.py
+#
+# created by LuYF-Lemon-love <luyanfeng_nlp@qq.com> on Jan 16, 2023
+# updated by LuYF-Lemon-love <luyanfeng_nlp@qq.com> on Jan 18, 2023
+#
+# 该脚本定义了 R-GCN 模型的训练策略.
+"""
+NegativeSampling - 训练策略类，包含损失函数。
+"""
+import dgl
+import torch
+import typing
+from ..loss import Loss
+from ..model import Model
+from .Strategy import Strategy
+class RGCNSampling(Strategy):
+	"""
+	将模型和损失函数封装到一起，方便模型训练，用于 ``R-GCN`` :cite:`R-GCN`。
+    例子::
+        from unike.data import GraphDataLoader
+        from unike.module.model import RGCN
+        from unike.module.loss import RGCNLoss
+        from unike.module.strategy import RGCNSampling
+        from unike.config import Trainer, GraphTester
+        dataloader = GraphDataLoader(
+        	in_path = "../../benchmarks/FB15K237/",
+        	batch_size = 60000,
+        	neg_ent = 10,
+        	test = True,
+        	test_batch_size = 100,
+        	num_workers = 16
+        )
+        # define the model
+        rgcn = RGCN(
+        	ent_tol = dataloader.train_sampler.ent_tol,
+        	rel_tol = dataloader.train_sampler.rel_tol,
+        	dim = 500,
+        	num_layers = 2
+        )
+        # define the loss function
+        model = RGCNSampling(
+        	model = rgcn,
+        	loss = RGCNLoss(model = rgcn, regularization = 1e-5)
+        )
+        # test the model
+        tester = GraphTester(model = rgcn, data_loader = dataloader, use_gpu = True, device = 'cuda:0')
+        # train the model
+        trainer = Trainer(model = model, data_loader = dataloader.train_dataloader(),
+        	epochs = 10000, lr = 0.0001, use_gpu = True, device = 'cuda:0',
+        	tester = tester, test = True, valid_interval = 500, log_interval = 500,
+        	save_interval = 500, save_path = '../../checkpoint/rgcn.pth'
+        )
+        trainer.run()
+	"""
+	def __init__(
+		self,
+		model: Model = None,
+		loss: Loss = None):
+		"""创建 RGCNSampling 对象。
+		:param model: R-GCN 模型
+		:type model: :py:class:`unike.module.model.RGCN`
+		:param loss: 损失函数。
+		:type loss: :py:class:`unike.module.loss.Loss`
+		"""
+		super(RGCNSampling, self).__init__()
+		#: R-GCN 模型，即 :py:class:`unike.module.model.RGCN`
+		self.model: Model = model
+		#: 损失函数，即 :py:class:`unike.module.loss.Loss`
+		self.loss: Loss = loss
+	def forward(
+		self,
+		data: dict[str, typing.Union[dgl.DGLGraph, torch.Tensor]]) -> torch.Tensor:
+		"""计算最后的损失值。定义每次调用时执行的计算。
+		:py:class:`torch.nn.Module` 子类必须重写 :py:meth:`torch.nn.Module.forward`。
+		:param data: 数据
+		:type data: dict[str, typing.Union[dgl.DGLGraph, torch.Tensor]]
+		:returns: 损失值
+		:rtype: torch.Tensor
+		"""
+		graph    = data["graph"]
+		entity   = data['entity']
+		relation = data['relation']
+		norm     = data['norm']
+		triples  = data["triples"]
+		label    = data["label"]
+		score = self.model(graph, entity, relation, norm, triples)
+		loss  = self.loss(score,  label)
+		return loss
+def get_rgcn_sampling_hpo_config() -> dict[str, dict[str, typing.Any]]:
+	"""返回 :py:class:`RGCNSampling` 的默认超参数优化配置。
+	默认配置为::
+		parameters_dict = {
+			'strategy': {
+				'value': 'RGCNSampling'
+			}
+		}
+	:returns: :py:class:`RGCNSampling` 的默认超参数优化配置
+	:rtype: dict[str, dict[str, typing.Any]]
+	"""
+	parameters_dict = {
+		'strategy': {
+			'value': 'RGCNSampling'
+		}
+	}
+	return parameters_dict

unike/module/strategy/Strategy.py ADDED Viewed

@@ -0,0 +1,26 @@
+# coding:utf-8
+#
+# unike/module/strategy/Strategy.py
+#
+# git pull from OpenKE-PyTorch by LuYF-Lemon-love <luyanfeng_nlp@qq.com> on May 7, 2023
+# updated by LuYF-Lemon-love <luyanfeng_nlp@qq.com> on Jan 4, 2023
+#
+# 该脚本定义了损失函数的基类.
+"""
+Strategy - 该脚本定义了训练策略的基类。
+"""
+from ..BaseModule import BaseModule
+class Strategy(BaseModule):
+	"""
+	继承自 :py:class:`unike.module.BaseModule`，什么额外的属性都没有增加。
+	"""
+	def __init__(self):
+		"""创建 Loss 对象。"""
+		super(Strategy, self).__init__()

unike/module/strategy/__init__.py ADDED Viewed

@@ -0,0 +1,29 @@
+# coding:utf-8
+#
+# unike/module/strategy/__init__.py
+#
+# git pull from OpenKE-PyTorch by LuYF-Lemon-love <luyanfeng_nlp@qq.com> on May 7, 2023
+# updated by LuYF-Lemon-love <luyanfeng_nlp@qq.com> on May 28, 2023
+#
+# 该头文件定义了 strategy 接口.
+"""训练策略部分。"""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from .Strategy import Strategy
+from .NegativeSampling import NegativeSampling, get_negative_sampling_hpo_config
+from .RGCNSampling import RGCNSampling, get_rgcn_sampling_hpo_config
+from .CompGCNSampling import CompGCNSampling, get_compgcn_sampling_hpo_config
+__all__ = [
+    'Strategy',
+    'NegativeSampling',
+    'get_negative_sampling_hpo_config',
+    'RGCNSampling',
+    'get_rgcn_sampling_hpo_config',
+    'CompGCNSampling',
+    'get_compgcn_sampling_hpo_config'
+]

unike/utils/EarlyStopping.py ADDED Viewed

@@ -0,0 +1,94 @@
+# coding:utf-8
+#
+# unike/utils/EarlyStopping.py
+#
+# created by LuYF-Lemon-love <luyanfeng_nlp@qq.com> on Jan 5, 2024
+# updated by LuYF-Lemon-love <luyanfeng_nlp@qq.com> on May 6, 2024
+#
+# 该脚本定义了 EarlyStopping 类.
+"""
+EarlyStopping - 使用早停止避免过拟合。
+"""
+import os
+import numpy as np
+from ..module.model import Model
+import logging
+logger = logging.getLogger(__name__)
+logging.basicConfig(format='%(levelname)s:%(module)s:%(asctime)s:%(message)s',
+					datefmt='%Y-%m-%d %H:%M:%S', level=logging.DEBUG)
+class EarlyStopping:
+    """
+    如果验证得分（越大越好）在给定的耐心后没有改善，则提前停止训练。
+    """
+    def __init__(
+        self,
+        save_path: str,
+        patience: int = 2,
+        verbose: bool = True,
+        delta: float = 0):
+        """创建 EarlyStopping 对象。
+        :param save_path: 模型保存目录
+        :type save_path: str
+        :param patience: 上次验证得分改善后等待多长时间。默认值：2
+        :type patience: int
+        :param verbose: 如果为 True，则为每个验证得分改进打印一条消息。默认值：True
+        :type verbose: bool
+        :param delta: 监测数量的最小变化才符合改进条件。默认值：0
+        :type delta: float
+        """
+        #: 模型保存目录
+        self.save_path: str = os.path.join(save_path, 'best_network.pth')
+        #: 上次验证得分改善后等待多长时间。默认值：2
+        self.patience: int = patience
+        #: 如果为 True，则为每个验证得分改进打印一条消息。默认值：True
+        self.verbose: bool = verbose
+        #: 监测数量的最小变化才符合改进条件。默认值：0
+        self.delta: float = delta
+        #: 计数变量
+        self.counter: int = 0
+        #: 保存最好的得分
+        self.best_score: float = -np.Inf
+        #: 早停开关
+        self.early_stop: bool = False
+    def __call__(
+        self,
+        score: float,
+        model: Model):
+        """
+        进行早停记录。
+        """
+        if score <= self.best_score + self.delta:
+            self.counter += 1
+            logger.info(f'EarlyStopping counter: {self.counter} / {self.patience}')
+            if self.counter >= self.patience:
+                self.early_stop = True
+        else:
+            self.save_checkpoint(score, model)
+            self.counter = 0
+    def save_checkpoint(
+        self,
+        score: float,
+        model: Model):
+        """
+        当验证得分改善时保存模型。
+        """
+        if self.verbose:
+            logger.info(f'Validation score improved ({self.best_score:.6f} --> {score:.6f}).  Saving model ...')
+        model.save_checkpoint(self.save_path)
+        self.best_score = score

unike/utils/Timer.py ADDED Viewed

@@ -0,0 +1,74 @@
+# coding:utf-8
+#
+# unike/utils/Timer.py
+#
+# created by LuYF-Lemon-love <luyanfeng_nlp@qq.com> on July 6, 2023
+# updated by LuYF-Lemon-love <luyanfeng_nlp@qq.com> on Feb 24, 2023
+#
+# 该脚本定义了计时器类.
+"""
+:py:class:`Timer` - 计时器类。
+"""
+import time
+class Timer:
+    """记录多次实验的时间。
+    通过调用 :py:meth:`stop` 能够返回距离上一次 :py:meth:`stop` 调用或创建 :py:class:`Timer` 对象时的时间间隔。
+    :py:meth:`avg` 能够返回多次实验的平均时间；:py:meth:`sum` 能够返回多次实验的总时间。"""
+    def __init__(self):
+        """创建 Timer 对象。"""
+        #: 存放时间间隔的列表
+        self.times: list[float] = []
+        #: 记录当前时间
+        self.current: float = None
+        #: 记录上一次的时间
+        self.last: float = None
+        self.__restart()
+    def __restart(self):
+        """重启计时器。"""
+        self.last = self.current = time.time()
+    def stop(self) -> float:
+        """停止计时器并将时间记录在列表中。
+        :returns: 返回最后一次的间隔时间。
+        :rtype: float
+        """
+        self.current = time.time()
+        self.times.append(self.current - self.last)
+        self.last = self.current
+        return self.times[-1]
+    def avg(self) -> float:
+        """返回平均时间。
+        :returns: 平均时间
+        :rtype: float
+        """
+        return sum(self.times) / len(self.times)
+    def sum(self) -> float:
+        """返回时间总和。
+        :returns: 时间总和。
+        :rtype: float
+        """
+        return sum(self.times)

unike/utils/WandbLogger.py ADDED Viewed

@@ -0,0 +1,46 @@
+# coding:utf-8
+#
+# unike/utils/WandbLogger.py
+#
+# created by LuYF-Lemon-love <luyanfeng_nlp@qq.com> on Jan 1, 2024
+# updated by LuYF-Lemon-love <luyanfeng_nlp@qq.com> on Feb 24, 2024
+#
+# 该脚本定义了 WandbLogger 类.
+"""
+WandbLogger - 使用 Weights and Biases 记录实验结果。
+"""
+import typing
+import wandb
+class WandbLogger:
+    """使用 `Weights and Biases <https://docs.wandb.ai/>`_ 记录实验结果。"""
+    def __init__(self,
+        project: str ="pybind11-ke",
+        name: str = "transe",
+        config: dict[str, typing.Any] | None = None):
+        """创建 WandbLogger 对象。
+        :param project: wandb 的项目名称
+        :type project: str
+        :param name: wandb 的 run name
+        :type name: str
+        :param config: wandb 的项目配置如超参数。
+        :type config: dict[str, typing.Any] | None
+        """
+        wandb.login()
+        wandb.init(project=project, name=name, config=config)
+        #: config 的副本
+        self.config: dict = wandb.config
+    def finish(self):
+        """关闭 wandb"""
+        wandb.finish()

unike/utils/__init__.py ADDED Viewed

@@ -0,0 +1,26 @@
+# coding:utf-8
+#
+# unike/utils/__init__.py
+#
+# created by LuYF-Lemon-love <luyanfeng_nlp@qq.com> on July 6, 2023
+#
+# 该头文件定义了 utils 接口.
+"""工具类。"""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from .Timer import Timer
+from .WandbLogger import WandbLogger
+from .tools import import_class, construct_type_constrain
+from .EarlyStopping import EarlyStopping
+__all__ = [
+	'Timer',
+	'WandbLogger',
+	'import_class',
+	'construct_type_constrain',
+	'EarlyStopping',
+]

unike/utils/tools.py ADDED Viewed

@@ -0,0 +1,118 @@
+# coding:utf-8
+#
+# unike/utils/tools.py
+#
+# created by LuYF-Lemon-love <luyanfeng_nlp@qq.com> on Jan 3, 2024
+# updated by LuYF-Lemon-love <luyanfeng_nlp@qq.com> on Feb 25, 2024
+#
+# 该脚本定义了 WandbLogger 类.
+import importlib
+def import_class(module_and_class_name: str) -> type:
+    """从模块中导入类。
+    :param module_and_class_name: 模块和类名，如 **unike.module.model.TransE** 。
+    :type module_and_class_name: str
+    :returns: 类名
+    :rtype: type
+    """
+    module_name, class_name = module_and_class_name.rsplit(".", 1)
+    module = importlib.import_module(module_name)
+    class_ = getattr(module, class_name)
+    return class_
+def construct_type_constrain(
+    in_path: str = "./",
+    train_file: str = "train2id.txt",
+    valid_file: str = "valid2id.txt",
+    test_file: str = "test2id.txt"
+    ):
+    """构建 type_constrain.txt 文件
+    type_constrain.txt: 类型约束文件, 第一行是关系的个数
+    下面的行是每个关系的类型限制 (训练集、验证集、测试集中每个关系存在的 head 和 tail 的类型)
+    每个关系有两行：
+    第一行：**rel_id** **heads_num** **head1** **head2** ...
+    第二行: **rel_id** **tails_num** **tail1** **tail2** ...
+    如 benchmarks/FB15K 的 id 为 1200 的关系，它有 4 种类型头实体（3123，1034，58 和 5733）和 4 种类型的尾实体（12123，4388，11087 和 11088）。
+    1200	4	3123	1034	58	5733
+    1200	4	12123	4388	11087	11088
+    :param in_path: 数据集目录
+    :type in_path: str
+    :param train_file: train2id.txt
+    :type train_file: str
+    :param valid_file: valid2id.txt
+    :type valid_file: str
+    :param test_file: test2id.txt
+    :type test_file: str
+    """
+    rel_head: dict = {}
+    rel_tail: dict = {}
+    train = open(in_path + train_file, "r")
+    valid = open(in_path + valid_file, "r")
+    test = open(in_path + test_file, "r")
+    tot = (int)(train.readline())
+    for i in range(tot):
+        content = train.readline()
+        h,t,r = content.strip().split()
+        if not r in rel_head:
+            rel_head[r] = {}
+        if not r in rel_tail:
+            rel_tail[r] = {}
+        rel_head[r][h] = 1
+        rel_tail[r][t] = 1
+    tot = (int)(valid.readline())
+    for i in range(tot):
+        content = valid.readline()
+        h,t,r = content.strip().split()
+        if not r in rel_head:
+            rel_head[r] = {}
+        if not r in rel_tail:
+            rel_tail[r] = {}
+        rel_head[r][h] = 1
+        rel_tail[r][t] = 1
+    tot = (int)(test.readline())
+    for i in range(tot):
+        content = test.readline()
+        h,t,r = content.strip().split()
+        if not r in rel_head:
+            rel_head[r] = {}
+        if not r in rel_tail:
+            rel_tail[r] = {}
+        rel_head[r][h] = 1
+        rel_tail[r][t] = 1
+    train.close()
+    valid.close()
+    test.close()
+    f = open(in_path + "type_constrain.txt", "w")
+    f.write("%d\n" % (len(rel_head)))
+    for i in rel_head:
+        f.write("%s\t%d" % (i, len(rel_head[i])))
+        for j in rel_head[i]:
+            f.write("\t%s" % (j))
+        f.write("\n")
+        f.write("%s\t%d" % (i, len(rel_tail[i])))
+        for j in rel_tail[i]:
+            f.write("\t%s" % (j))
+        f.write("\n")
+    f.close()

unike/version.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ __version__: str = '3.0.1'