torch-rechub 0.0.1__py3-none-any.whl → 0.0.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. torch_rechub/__init__.py +14 -0
  2. torch_rechub/basic/activation.py +3 -1
  3. torch_rechub/basic/callback.py +2 -2
  4. torch_rechub/basic/features.py +38 -8
  5. torch_rechub/basic/initializers.py +92 -0
  6. torch_rechub/basic/layers.py +800 -46
  7. torch_rechub/basic/loss_func.py +223 -0
  8. torch_rechub/basic/metaoptimizer.py +76 -0
  9. torch_rechub/basic/metric.py +251 -0
  10. torch_rechub/models/generative/__init__.py +6 -0
  11. torch_rechub/models/generative/hllm.py +249 -0
  12. torch_rechub/models/generative/hstu.py +189 -0
  13. torch_rechub/models/matching/__init__.py +13 -0
  14. torch_rechub/models/matching/comirec.py +193 -0
  15. torch_rechub/models/matching/dssm.py +72 -0
  16. torch_rechub/models/matching/dssm_facebook.py +77 -0
  17. torch_rechub/models/matching/dssm_senet.py +87 -0
  18. torch_rechub/models/matching/gru4rec.py +85 -0
  19. torch_rechub/models/matching/mind.py +103 -0
  20. torch_rechub/models/matching/narm.py +82 -0
  21. torch_rechub/models/matching/sasrec.py +143 -0
  22. torch_rechub/models/matching/sine.py +148 -0
  23. torch_rechub/models/matching/stamp.py +81 -0
  24. torch_rechub/models/matching/youtube_dnn.py +75 -0
  25. torch_rechub/models/matching/youtube_sbc.py +98 -0
  26. torch_rechub/models/multi_task/__init__.py +5 -2
  27. torch_rechub/models/multi_task/aitm.py +83 -0
  28. torch_rechub/models/multi_task/esmm.py +19 -8
  29. torch_rechub/models/multi_task/mmoe.py +18 -12
  30. torch_rechub/models/multi_task/ple.py +41 -29
  31. torch_rechub/models/multi_task/shared_bottom.py +3 -2
  32. torch_rechub/models/ranking/__init__.py +13 -2
  33. torch_rechub/models/ranking/afm.py +65 -0
  34. torch_rechub/models/ranking/autoint.py +102 -0
  35. torch_rechub/models/ranking/bst.py +61 -0
  36. torch_rechub/models/ranking/dcn.py +38 -0
  37. torch_rechub/models/ranking/dcn_v2.py +59 -0
  38. torch_rechub/models/ranking/deepffm.py +131 -0
  39. torch_rechub/models/ranking/deepfm.py +8 -7
  40. torch_rechub/models/ranking/dien.py +191 -0
  41. torch_rechub/models/ranking/din.py +31 -19
  42. torch_rechub/models/ranking/edcn.py +101 -0
  43. torch_rechub/models/ranking/fibinet.py +42 -0
  44. torch_rechub/models/ranking/widedeep.py +6 -6
  45. torch_rechub/trainers/__init__.py +4 -2
  46. torch_rechub/trainers/ctr_trainer.py +191 -0
  47. torch_rechub/trainers/match_trainer.py +239 -0
  48. torch_rechub/trainers/matching.md +3 -0
  49. torch_rechub/trainers/mtl_trainer.py +137 -23
  50. torch_rechub/trainers/seq_trainer.py +293 -0
  51. torch_rechub/utils/__init__.py +0 -0
  52. torch_rechub/utils/data.py +492 -0
  53. torch_rechub/utils/hstu_utils.py +198 -0
  54. torch_rechub/utils/match.py +457 -0
  55. torch_rechub/utils/mtl.py +136 -0
  56. torch_rechub/utils/onnx_export.py +353 -0
  57. torch_rechub-0.0.4.dist-info/METADATA +391 -0
  58. torch_rechub-0.0.4.dist-info/RECORD +62 -0
  59. {torch_rechub-0.0.1.dist-info → torch_rechub-0.0.4.dist-info}/WHEEL +1 -2
  60. {torch_rechub-0.0.1.dist-info → torch_rechub-0.0.4.dist-info/licenses}/LICENSE +1 -1
  61. torch_rechub/basic/utils.py +0 -168
  62. torch_rechub/trainers/trainer.py +0 -111
  63. torch_rechub-0.0.1.dist-info/METADATA +0 -105
  64. torch_rechub-0.0.1.dist-info/RECORD +0 -26
  65. torch_rechub-0.0.1.dist-info/top_level.txt +0 -1
@@ -0,0 +1,223 @@
1
+ import torch
2
+ import torch.functional as F
3
+ import torch.nn as nn
4
+
5
+
6
+ class RegularizationLoss(nn.Module):
7
+ """Unified L1/L2 Regularization Loss for embedding and dense parameters.
8
+
9
+ Example:
10
+ >>> reg_loss_fn = RegularizationLoss(embedding_l2=1e-5, dense_l2=1e-5)
11
+ >>> # In model's forward or trainer
12
+ >>> reg_loss = reg_loss_fn(model)
13
+ >>> total_loss = task_loss + reg_loss
14
+ """
15
+
16
+ def __init__(self, embedding_l1=0.0, embedding_l2=0.0, dense_l1=0.0, dense_l2=0.0):
17
+ super(RegularizationLoss, self).__init__()
18
+ self.embedding_l1 = embedding_l1
19
+ self.embedding_l2 = embedding_l2
20
+ self.dense_l1 = dense_l1
21
+ self.dense_l2 = dense_l2
22
+
23
+ def forward(self, model):
24
+ reg_loss = 0.0
25
+
26
+ # Register normalization layers
27
+ norm_params = set()
28
+ for module in model.modules():
29
+ if isinstance(module, (nn.BatchNorm1d, nn.BatchNorm2d, nn.BatchNorm3d, nn.LayerNorm, nn.GroupNorm, nn.InstanceNorm1d, nn.InstanceNorm2d, nn.InstanceNorm3d)):
30
+ for param in module.parameters():
31
+ norm_params.add(id(param))
32
+
33
+ # Register embedding layers
34
+ embedding_params = set()
35
+ for module in model.modules():
36
+ if isinstance(module, (nn.Embedding, nn.EmbeddingBag)):
37
+ for param in module.parameters():
38
+ embedding_params.add(id(param))
39
+
40
+ for param in model.parameters():
41
+ if param.requires_grad:
42
+ # Skip normalization layer parameters
43
+ if id(param) in norm_params:
44
+ continue
45
+
46
+ if id(param) in embedding_params:
47
+ if self.embedding_l1 > 0:
48
+ reg_loss += self.embedding_l1 * torch.sum(torch.abs(param))
49
+ if self.embedding_l2 > 0:
50
+ reg_loss += self.embedding_l2 * torch.sum(param**2)
51
+ else:
52
+ if self.dense_l1 > 0:
53
+ reg_loss += self.dense_l1 * torch.sum(torch.abs(param))
54
+ if self.dense_l2 > 0:
55
+ reg_loss += self.dense_l2 * torch.sum(param**2)
56
+
57
+ return reg_loss
58
+
59
+
60
+ class HingeLoss(torch.nn.Module):
61
+ """Hinge Loss for pairwise learning.
62
+ reference: https://github.com/ustcml/RecStudio/blob/main/recstudio/model/loss_func.py
63
+
64
+ """
65
+
66
+ def __init__(self, margin=2, num_items=None):
67
+ super().__init__()
68
+ self.margin = margin
69
+ self.n_items = num_items
70
+
71
+ def forward(self, pos_score, neg_score):
72
+ loss = torch.maximum(torch.max(neg_score, dim=-1).values - pos_score + self.margin, torch.tensor([0]).type_as(pos_score))
73
+ if self.n_items is not None:
74
+ impostors = neg_score - pos_score.view(-1, 1) + self.margin > 0
75
+ rank = torch.mean(impostors, -1) * self.n_items
76
+ return torch.mean(loss * torch.log(rank + 1))
77
+ else:
78
+ return torch.mean(loss)
79
+
80
+
81
+ class BPRLoss(torch.nn.Module):
82
+
83
+ def __init__(self):
84
+ super().__init__()
85
+
86
+ def forward(self, pos_score, neg_score):
87
+ loss = torch.mean(-(pos_score - neg_score).sigmoid().log(), dim=-1)
88
+ return loss
89
+
90
+
91
+ class NCELoss(torch.nn.Module):
92
+ """Noise Contrastive Estimation (NCE) Loss for recommendation systems.
93
+
94
+ NCE Loss is more efficient than CrossEntropyLoss for large-scale recommendation
95
+ scenarios. It uses in-batch negatives to reduce computational complexity.
96
+
97
+ Reference:
98
+ - Noise-contrastive estimation: A new estimation principle for unnormalized
99
+ statistical models (Gutmann & Hyvärinen, 2010)
100
+ - HLLM: Hierarchical Large Language Model for Recommendation
101
+
102
+ Args:
103
+ temperature (float): Temperature parameter for scaling logits. Default: 1.0
104
+ ignore_index (int): Index to ignore in loss computation. Default: 0
105
+ reduction (str): Specifies the reduction to apply to the output.
106
+ Options: 'mean', 'sum', 'none'. Default: 'mean'
107
+
108
+ Example:
109
+ >>> nce_loss = NCELoss(temperature=0.1)
110
+ >>> logits = torch.randn(32, 1000) # (batch_size, vocab_size)
111
+ >>> targets = torch.randint(0, 1000, (32,))
112
+ >>> loss = nce_loss(logits, targets)
113
+ """
114
+
115
+ def __init__(self, temperature=1.0, ignore_index=0, reduction='mean'):
116
+ super().__init__()
117
+ self.temperature = temperature
118
+ self.ignore_index = ignore_index
119
+ self.reduction = reduction
120
+
121
+ def forward(self, logits, targets):
122
+ """Compute NCE loss.
123
+
124
+ Args:
125
+ logits (torch.Tensor): Model output logits of shape (batch_size, vocab_size)
126
+ targets (torch.Tensor): Target indices of shape (batch_size,)
127
+
128
+ Returns:
129
+ torch.Tensor: NCE loss value
130
+ """
131
+ # Scale logits by temperature
132
+ logits = logits / self.temperature
133
+
134
+ # Compute log softmax
135
+ log_probs = torch.nn.functional.log_softmax(logits, dim=-1)
136
+
137
+ # Get log probability of target class
138
+ batch_size = targets.shape[0]
139
+ target_log_probs = log_probs[torch.arange(batch_size), targets]
140
+
141
+ # Create mask for ignore_index
142
+ mask = targets != self.ignore_index
143
+
144
+ # Compute loss
145
+ loss = -target_log_probs
146
+
147
+ # Apply mask
148
+ if mask.any():
149
+ loss = loss[mask]
150
+
151
+ # Apply reduction
152
+ if self.reduction == 'mean':
153
+ return loss.mean()
154
+ elif self.reduction == 'sum':
155
+ return loss.sum()
156
+ else: # 'none'
157
+ return loss
158
+
159
+
160
+ class InBatchNCELoss(torch.nn.Module):
161
+ """In-Batch NCE Loss with explicit negative sampling.
162
+
163
+ This loss function uses other samples in the batch as negative samples,
164
+ which is more efficient than sampling random negatives.
165
+
166
+ Args:
167
+ temperature (float): Temperature parameter for scaling logits. Default: 0.1
168
+ ignore_index (int): Index to ignore in loss computation. Default: 0
169
+ reduction (str): Specifies the reduction to apply to the output.
170
+ Options: 'mean', 'sum', 'none'. Default: 'mean'
171
+
172
+ Example:
173
+ >>> loss_fn = InBatchNCELoss(temperature=0.1)
174
+ >>> embeddings = torch.randn(32, 256) # (batch_size, embedding_dim)
175
+ >>> item_embeddings = torch.randn(1000, 256) # (vocab_size, embedding_dim)
176
+ >>> targets = torch.randint(0, 1000, (32,))
177
+ >>> loss = loss_fn(embeddings, item_embeddings, targets)
178
+ """
179
+
180
+ def __init__(self, temperature=0.1, ignore_index=0, reduction='mean'):
181
+ super().__init__()
182
+ self.temperature = temperature
183
+ self.ignore_index = ignore_index
184
+ self.reduction = reduction
185
+
186
+ def forward(self, embeddings, item_embeddings, targets):
187
+ """Compute in-batch NCE loss.
188
+
189
+ Args:
190
+ embeddings (torch.Tensor): User/query embeddings of shape (batch_size, embedding_dim)
191
+ item_embeddings (torch.Tensor): Item embeddings of shape (vocab_size, embedding_dim)
192
+ targets (torch.Tensor): Target item indices of shape (batch_size,)
193
+
194
+ Returns:
195
+ torch.Tensor: In-batch NCE loss value
196
+ """
197
+ # Compute logits: (batch_size, vocab_size)
198
+ logits = torch.matmul(embeddings, item_embeddings.t()) / self.temperature
199
+
200
+ # Compute log softmax
201
+ log_probs = torch.nn.functional.log_softmax(logits, dim=-1)
202
+
203
+ # Get log probability of target class
204
+ batch_size = targets.shape[0]
205
+ target_log_probs = log_probs[torch.arange(batch_size), targets]
206
+
207
+ # Create mask for ignore_index
208
+ mask = targets != self.ignore_index
209
+
210
+ # Compute loss
211
+ loss = -target_log_probs
212
+
213
+ # Apply mask
214
+ if mask.any():
215
+ loss = loss[mask]
216
+
217
+ # Apply reduction
218
+ if self.reduction == 'mean':
219
+ return loss.mean()
220
+ elif self.reduction == 'sum':
221
+ return loss.sum()
222
+ else: # 'none'
223
+ return loss
@@ -0,0 +1,76 @@
1
+ """The metaoptimizer module, it provides a class MetaBalance
2
+ MetaBalance is used to scale the gradient and balance the gradient of each task
3
+ Authors: Qida Dong, dongjidan@126.com
4
+ """
5
+ import torch
6
+ from torch.optim.optimizer import Optimizer
7
+
8
+
9
+ class MetaBalance(Optimizer):
10
+ """MetaBalance Optimizer
11
+ This method is used to scale the gradient and balance the gradient of each task
12
+
13
+ Args:
14
+ parameters (list): the parameters of model
15
+ relax_factor (float, optional): the relax factor of gradient scaling (default: 0.7)
16
+ beta (float, optional): the coefficient of moving average (default: 0.9)
17
+ """
18
+
19
+ def __init__(self, parameters, relax_factor=0.7, beta=0.9):
20
+
21
+ if relax_factor < 0. or relax_factor >= 1.:
22
+ raise ValueError(f'Invalid relax_factor: {relax_factor}, it should be 0. <= relax_factor < 1.')
23
+ if beta < 0. or beta >= 1.:
24
+ raise ValueError(f'Invalid beta: {beta}, it should be 0. <= beta < 1.')
25
+ rel_beta_dict = {'relax_factor': relax_factor, 'beta': beta}
26
+ super(MetaBalance, self).__init__(parameters, rel_beta_dict)
27
+
28
+ @torch.no_grad()
29
+ def step(self, losses):
30
+ """_summary_
31
+ Args:
32
+ losses (_type_): _description_
33
+
34
+ Raises:
35
+ RuntimeError: _description_
36
+ """
37
+
38
+ for idx, loss in enumerate(losses):
39
+ loss.backward(retain_graph=True)
40
+ for group in self.param_groups:
41
+ for gp in group['params']:
42
+ if gp.grad is None:
43
+ # print('breaking')
44
+ break
45
+ if gp.grad.is_sparse:
46
+ raise RuntimeError('MetaBalance does not support sparse gradients')
47
+ # store the result of moving average
48
+ state = self.state[gp]
49
+ if len(state) == 0:
50
+ for i in range(len(losses)):
51
+ if i == 0:
52
+ gp.norms = [0]
53
+ else:
54
+ gp.norms.append(0)
55
+
56
+
57
+ # calculate the moving average
58
+ beta = group['beta']
59
+ gp.norms[idx] = gp.norms[idx] * beta + \
60
+ (1 - beta) * torch.norm(gp.grad)
61
+ # scale the auxiliary gradient
62
+ relax_factor = group['relax_factor']
63
+ gp.grad = gp.grad * \
64
+ gp.norms[0] / (gp.norms[idx] + 1e-5) * relax_factor + gp.grad * (1. - relax_factor)
65
+ # store the gradient of each auxiliary task in state
66
+ if idx == 0:
67
+ state['sum_gradient'] = torch.zeros_like(gp.data)
68
+ state['sum_gradient'] += gp.grad
69
+ else:
70
+ state['sum_gradient'] += gp.grad
71
+
72
+ if gp.grad is not None:
73
+ gp.grad.detach_()
74
+ gp.grad.zero_()
75
+ if idx == len(losses) - 1:
76
+ gp.grad = state['sum_gradient']
@@ -0,0 +1,251 @@
1
+ """The metric module, it is used to provide some metrics for recommenders.
2
+ Available function:
3
+ - auc_score: compute AUC
4
+ - gauc_score: compute GAUC
5
+ - log_loss: compute LogLoss
6
+ - topk_metrics: compute topk metrics contains 'ndcg', 'mrr', 'recall', 'hit'
7
+ Authors: Qida Dong, dongjidan@126.com
8
+ """
9
+ from collections import defaultdict
10
+
11
+ import numpy as np
12
+ from sklearn.metrics import roc_auc_score
13
+
14
+
15
+ def auc_score(y_true, y_pred):
16
+
17
+ return roc_auc_score(y_true, y_pred)
18
+
19
+
20
+ def get_user_pred(y_true, y_pred, users):
21
+ """divide the result into different group by user id
22
+
23
+ Args:
24
+ y_true (array): all true labels of the data
25
+ y_pred (array): the predicted score
26
+ users (array): user id
27
+
28
+ Return:
29
+ user_pred (dict): {userid: values}, key is user id and value is the labels and scores of each user
30
+ """
31
+ user_pred = {}
32
+ for i, u in enumerate(users):
33
+ if u not in user_pred:
34
+ user_pred[u] = {'y_true': [y_true[i]], 'y_pred': [y_pred[i]]}
35
+ else:
36
+ user_pred[u]['y_true'].append(y_true[i])
37
+ user_pred[u]['y_pred'].append(y_pred[i])
38
+
39
+ return user_pred
40
+
41
+
42
+ def gauc_score(y_true, y_pred, users, weights=None):
43
+ """compute GAUC
44
+
45
+ Args:
46
+ y_true (array): dim(N, ), all true labels of the data
47
+ y_pred (array): dim(N, ), the predicted score
48
+ users (array): dim(N, ), user id
49
+ weight (dict): {userid: weight_value}, it contains weights for each group.
50
+ if it is None, the weight is equal to the number
51
+ of times the user is recommended
52
+ Return:
53
+ score: float, GAUC
54
+ """
55
+ assert len(y_true) == len(y_pred) and len(y_true) == len(users)
56
+
57
+ user_pred = get_user_pred(y_true, y_pred, users)
58
+ score = 0
59
+ num = 0
60
+ for u in user_pred.keys():
61
+ auc = auc_score(user_pred[u]['y_true'], user_pred[u]['y_pred'])
62
+ if weights is None:
63
+ user_weight = len(user_pred[u]['y_true'])
64
+ else:
65
+ user_weight = weights[u]
66
+ auc *= user_weight
67
+ num += user_weight
68
+ score += auc
69
+ return score / num
70
+
71
+
72
+ def ndcg_score(y_true, y_pred, topKs=None):
73
+ if topKs is None:
74
+ topKs = [5]
75
+ result = topk_metrics(y_true, y_pred, topKs)
76
+ return result['NDCG']
77
+
78
+
79
+ def hit_score(y_true, y_pred, topKs=None):
80
+ if topKs is None:
81
+ topKs = [5]
82
+ result = topk_metrics(y_true, y_pred, topKs)
83
+ return result['Hit']
84
+
85
+
86
+ def mrr_score(y_true, y_pred, topKs=None):
87
+ if topKs is None:
88
+ topKs = [5]
89
+ result = topk_metrics(y_true, y_pred, topKs)
90
+ return result['MRR']
91
+
92
+
93
+ def recall_score(y_true, y_pred, topKs=None):
94
+ if topKs is None:
95
+ topKs = [5]
96
+ result = topk_metrics(y_true, y_pred, topKs)
97
+ return result['Recall']
98
+
99
+
100
+ def precision_score(y_true, y_pred, topKs=None):
101
+ if topKs is None:
102
+ topKs = [5]
103
+ result = topk_metrics(y_true, y_pred, topKs)
104
+ return result['Precision']
105
+
106
+
107
+ def topk_metrics(y_true, y_pred, topKs=None):
108
+ """choice topk metrics and compute it
109
+ the metrics contains 'ndcg', 'mrr', 'recall', 'precision' and 'hit'
110
+
111
+ Args:
112
+ y_true (dict): {userid, item_ids}, the key is user id and the value is the list that contains the items the user interacted
113
+ y_pred (dict): {userid, item_ids}, the key is user id and the value is the list that contains the items recommended
114
+ topKs (list or tuple): if you want to get top5 and top10, topKs=(5, 10)
115
+
116
+ Return:
117
+ results (dict): {metric_name: metric_values}, it contains five metrics, 'ndcg', 'recall', 'mrr', 'hit', 'precision'
118
+
119
+ """
120
+ if topKs is None:
121
+ topKs = [5]
122
+ assert len(y_true) == len(y_pred)
123
+
124
+ if not isinstance(topKs, (tuple, list)):
125
+ raise ValueError('topKs wrong, it should be tuple or list')
126
+
127
+ pred_array = []
128
+ true_array = []
129
+ for u in y_true.keys():
130
+ pred_array.append(y_pred[u])
131
+ true_array.append(y_true[u])
132
+ ndcg_result = []
133
+ mrr_result = []
134
+ hit_result = []
135
+ precision_result = []
136
+ recall_result = []
137
+ for idx in range(len(topKs)):
138
+ ndcgs = 0
139
+ mrrs = 0
140
+ hits = 0
141
+ precisions = 0
142
+ recalls = 0
143
+ gts = 0
144
+ for i in range(len(true_array)):
145
+ if len(true_array[i]) != 0:
146
+ mrr_tmp = 0
147
+ mrr_flag = True
148
+ hit_tmp = 0
149
+ dcg_tmp = 0
150
+ idcg_tmp = 0
151
+ for j in range(topKs[idx]):
152
+ if pred_array[i][j] in true_array[i]:
153
+ hit_tmp += 1.
154
+ if mrr_flag:
155
+ mrr_flag = False
156
+ mrr_tmp = 1. / (1 + j)
157
+ dcg_tmp += 1. / (np.log2(j + 2))
158
+ if j < len(true_array[i]):
159
+ idcg_tmp += 1. / (np.log2(j + 2))
160
+ gts += len(true_array[i])
161
+ hits += hit_tmp
162
+ mrrs += mrr_tmp
163
+ recalls += hit_tmp / len(true_array[i])
164
+ precisions += hit_tmp / topKs[idx]
165
+ if idcg_tmp != 0:
166
+ ndcgs += dcg_tmp / idcg_tmp
167
+ hit_result.append(round(hits / gts, 4))
168
+ mrr_result.append(round(mrrs / len(pred_array), 4))
169
+ recall_result.append(round(recalls / len(pred_array), 4))
170
+ precision_result.append(round(precisions / len(pred_array), 4))
171
+ ndcg_result.append(round(ndcgs / len(pred_array), 4))
172
+
173
+ results = defaultdict(list)
174
+ for idx in range(len(topKs)):
175
+
176
+ output = f'NDCG@{topKs[idx]}: {ndcg_result[idx]}'
177
+ results['NDCG'].append(output)
178
+
179
+ output = f'MRR@{topKs[idx]}: {mrr_result[idx]}'
180
+ results['MRR'].append(output)
181
+
182
+ output = f'Recall@{topKs[idx]}: {recall_result[idx]}'
183
+ results['Recall'].append(output)
184
+
185
+ output = f'Hit@{topKs[idx]}: {hit_result[idx]}'
186
+ results['Hit'].append(output)
187
+
188
+ output = f'Precision@{topKs[idx]}: {precision_result[idx]}'
189
+ results['Precision'].append(output)
190
+ return results
191
+
192
+
193
+ def log_loss(y_true, y_pred):
194
+ score = y_true * np.log(y_pred) + (1 - y_true) * np.log(1 - y_pred)
195
+ return -score.sum() / len(y_true)
196
+
197
+
198
+ def Coverage(y_pred, all_items, topKs=None):
199
+ """compute the coverage
200
+ This method measures the diversity of the recommended items
201
+ and the ability to explore the long-tailed items
202
+ Arg:
203
+ y_pred (dict): {userid, item_ids}, the key is user id and the value is the list that contains the items recommended
204
+ all_items (set): all unique items
205
+ Return:
206
+ result (list[float]): the list of coverage scores
207
+ """
208
+ if topKs is None:
209
+ topKs = [5]
210
+ result = []
211
+ for k in topKs:
212
+ rec_items = set([])
213
+ for u in y_pred.keys():
214
+ tmp_items = set(y_pred[u][:k])
215
+ rec_items = rec_items | tmp_items
216
+ score = len(rec_items) * 1. / len(all_items)
217
+ score = round(score, 4)
218
+ result.append(f'Coverage@{k}: {score}')
219
+ return result
220
+
221
+
222
+ # print(Coverage({'0':[0,1,2],'1':[1,3,4]}, {0,1,2,3,4,5}, [2,3]))
223
+
224
+ # pred = np.array([ 0.3, 0.2, 0.5, 0.9, 0.7, 0.31, 0.8, 0.1, 0.4, 0.6])
225
+ # label = np.array([ 1, 0, 0, 1, 0, 0, 1, 0, 0, 1])
226
+ # users_id = np.array([ 2, 1, 0, 2, 1, 0, 0, 2, 1, 1])
227
+
228
+ # print('auc: ', auc_score(label, pred))
229
+ # print('gauc: ', gauc_score(label, pred, users_id))
230
+ # print('log_loss: ', log_loss(label, pred))
231
+
232
+ # for mt in ['ndcg', 'mrr', 'recall', 'hit','s']:
233
+ # tm = topk_metrics(y_true, y_pred, users_id, 3, metric_type=mt)
234
+ # print(f'{mt}: {tm}')
235
+ # y_pred = {'0': [0, 1], '1': [0, 1], '2': [2, 3]}
236
+ # y_true = {'0': [1, 2], '1': [0, 1, 2], '2': [2, 3]}
237
+ # out = topk_metrics(y_true, y_pred, topKs=(1,2))
238
+ # ndcgs = ndcg_score(y_true,y_pred, topKs=(1,2))
239
+ # print(out)
240
+ # print(ndcgs)
241
+
242
+ # ground_truth, match_res = np.load("C:\\Users\\dongj\\Desktop/res.npy", allow_pickle=True)
243
+ # print(len(ground_truth),len(match_res))
244
+ # out = topk_metrics(y_true=ground_truth, y_pred=match_res, topKs=[50])
245
+ # print(out)
246
+
247
+ if __name__ == "__main__":
248
+ y_pred = {'0': [0, 1], '1': [0, 1], '2': [2, 3]}
249
+ y_true = {'0': [1, 2], '1': [0, 1, 2], '2': [2, 3]}
250
+ out = topk_metrics(y_true, y_pred, topKs=(1, 2))
251
+ print(out)
@@ -0,0 +1,6 @@
1
+ """Generative Recommendation Models."""
2
+
3
+ from .hllm import HLLMModel
4
+ from .hstu import HSTUModel
5
+
6
+ __all__ = ['HSTUModel', 'HLLMModel']