PyPI - torch-rechub - Versions diffs - 0.0.5__py3-none-any.whl → 0.1.0__py3-none-any.whl - Mend

torch-rechub 0.0.5py3-none-any.whl → 0.1.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (28) hide show

torch_rechub/basic/layers.py +213 -150
torch_rechub/basic/loss_func.py +62 -47
torch_rechub/basic/tracking.py +198 -0
torch_rechub/data/__init__.py +0 -0
torch_rechub/data/convert.py +67 -0
torch_rechub/data/dataset.py +107 -0
torch_rechub/models/generative/hstu.py +48 -33
torch_rechub/serving/__init__.py +50 -0
torch_rechub/serving/annoy.py +133 -0
torch_rechub/serving/base.py +107 -0
torch_rechub/serving/faiss.py +154 -0
torch_rechub/serving/milvus.py +215 -0
torch_rechub/trainers/ctr_trainer.py +52 -3
torch_rechub/trainers/match_trainer.py +52 -3
torch_rechub/trainers/mtl_trainer.py +61 -3
torch_rechub/trainers/seq_trainer.py +93 -17
torch_rechub/types.py +5 -0
torch_rechub/utils/data.py +167 -137
torch_rechub/utils/hstu_utils.py +87 -76
torch_rechub/utils/model_utils.py +10 -12
torch_rechub/utils/onnx_export.py +98 -45
torch_rechub/utils/quantization.py +128 -0
torch_rechub/utils/visualization.py +4 -12
{torch_rechub-0.0.5.dist-info → torch_rechub-0.1.0.dist-info}/METADATA +20 -5
{torch_rechub-0.0.5.dist-info → torch_rechub-0.1.0.dist-info}/RECORD +27 -17
torch_rechub/trainers/matching.md +0 -3
{torch_rechub-0.0.5.dist-info → torch_rechub-0.1.0.dist-info}/WHEEL +0 -0
{torch_rechub-0.0.5.dist-info → torch_rechub-0.1.0.dist-info}/licenses/LICENSE +0 -0

torch_rechub/serving/base.py ADDED Viewed

@@ -0,0 +1,107 @@
+"""Base abstraction for vector indexers used in the retrieval stage."""
+import abc
+import typing as ty
+import torch
+from torch_rechub.types import FilePath
+class BaseBuilder(abc.ABC):
+    """
+    Abstract base class for vector index construction.
+    A builder owns all build-time configuration and produces a ``BaseIndexer`` through a
+    context-managed build operation.
+    Examples
+    --------
+    >>> builder = BaseBuilder(...)
+    >>> embeddings = torch.randn(1000, 128)
+    >>> with builder.from_embeddings(embeddings) as indexer:
+    ...     ids, scores = indexer.query(embeddings[:2], top_k=5)
+    ...     indexer.save("index.bin")
+    >>> with builder.from_index_file("index.bin") as indexer:
+    ...     ids, scores = indexer.query(embeddings[:2], top_k=5)
+    """
+    @abc.abstractmethod
+    def from_embeddings(
+        self,
+        embeddings: torch.Tensor,
+    ) -> ty.ContextManager["BaseIndexer"]:
+        """
+        Build a vector index from the embeddings.
+        Parameters
+        ----------
+        embeddings : torch.Tensor
+            A 2D tensor (n, d) containing embedding vectors to build a new index.
+        Returns
+        -------
+        ContextManager[BaseIndexer]
+            A context manager that yields a fully initialized ``BaseIndexer``.
+        """
+    @abc.abstractmethod
+    def from_index_file(
+        self,
+        index_file: FilePath,
+    ) -> ty.ContextManager["BaseIndexer"]:
+        """
+        Build a vector index from the index file.
+        Parameters
+        ----------
+        index_file : FilePath
+            Path to a serialized index on disk to be loaded.
+        Returns
+        -------
+        ContextManager[BaseIndexer]
+            A context manager that yields a fully initialized ``BaseIndexer``.
+        """
+class BaseIndexer(abc.ABC):
+    """Abstract base class for vector indexers in the retrieval stage."""
+    @abc.abstractmethod
+    def query(
+        self,
+        embeddings: torch.Tensor,
+        top_k: int,
+    ) -> tuple[torch.Tensor,
+               torch.Tensor]:
+        """
+        Query the vector index.
+        Parameters
+        ----------
+        embeddings : torch.Tensor
+            A 2D tensor (n, d) containing embedding vectors to query the index.
+        top_k : int
+            The number of nearest items to retrieve for each vector.
+        Returns
+        -------
+        torch.Tensor
+            A 2D tensor of shape (n, top_k), containing the retrieved nearest neighbor
+            IDs for each vector, ordered by descending relevance.
+        torch.Tensor
+            A 2D tensor of shape (n, top_k), containing the relevance distances of the
+            nearest neighbors for each vector.
+        """
+    @abc.abstractmethod
+    def save(self, file_path: FilePath) -> None:
+        """
+        Persist the index to local disk.
+        Parameters
+        ----------
+        file_path : FilePath
+            Destination path where the index will be saved.
+        """

torch_rechub/serving/faiss.py ADDED Viewed

@@ -0,0 +1,154 @@
+"""FAISS-based vector index implementation for the retrieval stage."""
+import contextlib
+import typing as ty
+import faiss
+import torch
+from torch_rechub.types import FilePath
+from .base import BaseBuilder, BaseIndexer
+# Type for indexing methods.
+_FaissIndexType = ty.Literal["Flat", "HNSW", "IVF"]
+# Type for indexing metrics.
+_FaissMetric = ty.Literal["IP", "L2"]
+# Default indexing method.
+_DEFAULT_FAISS_INDEX_TYPE: _FaissIndexType = "Flat"
+# Default indexing metric.
+_DEFAULT_FAISS_METRIC: _FaissMetric = "L2"
+# Default number of clusters to build an IVF index.
+_DEFAULT_N_LISTS = 100
+# Default max number of neighbors to build an HNSW index.
+_DEFAULT_M = 32
+class FaissBuilder(BaseBuilder):
+    """Implement ``BaseBuilder`` for FAISS vector index construction."""
+    def __init__(
+        self,
+        index_type: _FaissIndexType = _DEFAULT_FAISS_INDEX_TYPE,
+        metric: _FaissMetric = _DEFAULT_FAISS_METRIC,
+        *,
+        m: int = _DEFAULT_M,
+        nlists: int = _DEFAULT_N_LISTS,
+        efSearch: ty.Optional[int] = None,
+        nprobe: ty.Optional[int] = None,
+    ) -> None:
+        """
+        Initialize a FAISS builder.
+        Parameters
+        ----------
+        index_type : ``"Flat"``, ``"HNSW"``, or ``"IVF"``, optional
+            The indexing index_type. Default to ``"Flat"``.
+        metric : ``"IP"``, ``"L2"``, optional
+            The indexing metric. Default to ``"L2"``.
+        m : int, optional
+            Max number of neighbors to build an HNSW index.
+        nlists : int, optional
+            Number of clusters to build an IVF index.
+        efSearch : int or None, optional
+            Number of candidate nodes during an HNSW search.
+        nprobe : int or None, optional
+            Number of clusters during an IVF search.
+        """
+        self._index_type_dsl = _build_index_type_dsl(index_type, m=m, nlists=nlists)
+        self._metric = _resolve_metric_type(metric)
+        self._efSearch = efSearch
+        self._nprobe = nprobe
+    @contextlib.contextmanager
+    def from_embeddings(
+        self,
+        embeddings: torch.Tensor,
+    ) -> ty.Generator["FaissIndexer",
+                      None,
+                      None]:
+        """Adhere to ``BaseBuilder.from_embeddings``."""
+        index: faiss.Index = faiss.index_factory(
+            embeddings.shape[1],
+            self._index_type_dsl,
+            self._metric,
+        )
+        if isinstance(index, faiss.IndexHNSW) and self._efSearch is not None:
+            index.hnsw.efSearch = self._efSearch
+        if isinstance(index, faiss.IndexIVF) and self._nprobe is not None:
+            index.nprobe = self._nprobe
+        index.train(embeddings)
+        index.add(embeddings)
+        try:
+            yield FaissIndexer(index)
+        finally:
+            pass
+    @contextlib.contextmanager
+    def from_index_file(
+        self,
+        index_file: FilePath,
+    ) -> ty.Generator["FaissIndexer",
+                      None,
+                      None]:
+        """Adhere to ``BaseBuilder.from_index_file``."""
+        index = faiss.read_index(str(index_file))
+        try:
+            yield FaissIndexer(index)
+        finally:
+            pass
+class FaissIndexer(BaseIndexer):
+    """FAISS-based implementation of ``BaseIndexer``."""
+    def __init__(self, index: faiss.Index) -> None:
+        """Initialize a FAISS indexer."""
+        self._index = index
+    def query(
+        self,
+        embeddings: torch.Tensor,
+        top_k: int,
+    ) -> tuple[torch.Tensor,
+               torch.Tensor]:
+        """Adhere to ``BaseIndexer.query``."""
+        dists, ids = self._index.search(embeddings.cpu().numpy(), top_k)
+        return torch.from_numpy(ids), torch.from_numpy(dists)
+    def save(self, file_path: FilePath) -> None:
+        """Adhere to ``BaseIndexer.save``."""
+        faiss.write_index(self._index, str(file_path))
+# helper functions
+def _build_index_type_dsl(index_type: _FaissIndexType, *, m: int, nlists: int) -> str:
+    """Build the index_type DSL passed to ``faiss.index_factory``."""
+    if index_type == "HNSW":
+        return f"{index_type}{m},Flat"
+    if index_type == "IVF":
+        return f"{index_type}{nlists},Flat"
+    return "Flat"
+def _resolve_metric_type(metric: _FaissMetric) -> int:
+    """Resolve the metric type from a string literal to an integer."""
+    if metric == "L2":
+        return ty.cast(int, faiss.METRIC_L2)
+    return ty.cast(int, faiss.METRIC_INNER_PRODUCT)

torch_rechub/serving/milvus.py ADDED Viewed

@@ -0,0 +1,215 @@
+"""Milvus-based vector index implementation for the retrieval stage."""
+import contextlib
+import typing as ty
+import uuid
+import numpy as np
+import pymilvus as milvus
+import torch
+from torch_rechub.types import FilePath
+from .base import BaseBuilder, BaseIndexer
+# Type for indexing methods.
+_MilvusIndexType = ty.Literal["FLAT", "HNSW", "IVF_FLAT"]
+# Type for indexing metrics.
+_MilvusMetric = ty.Literal["COSINE", "IP", "L2"]
+# Default indexing method.
+_DEFAULT_MILVUS_INDEX_TYPE: _MilvusIndexType = "FLAT"
+# Default indexing metric.
+_DEFAULT_MILVUS_METRIC: _MilvusMetric = "COSINE"
+# Default number of clusters to build an IVF index.
+_DEFAULT_N_LIST = 128
+# Default max number of neighbors to build an HNSW index.
+_DEFAULT_M = 30
+# Default name of Milvus database connection.
+_DEFAULT_NAME = "rechub"
+# Default host of Milvus instance.
+_DEFAULT_HOST = "localhost"
+# Default port of Milvus instance.
+_DEFAULT_PORT = 19530
+# Name of the embedding column in the Milvus database table.
+_EMBEDDING_COLUMN = "embedding"
+class MilvusBuilder(BaseBuilder):
+    """Implement ``BaseBuilder`` for Milvus vector index construction."""
+    def __init__(
+        self,
+        d: int,
+        index_type: _MilvusIndexType = _DEFAULT_MILVUS_INDEX_TYPE,
+        metric: _MilvusMetric = _DEFAULT_MILVUS_METRIC,
+        *,
+        m: int = _DEFAULT_M,
+        nlist: int = _DEFAULT_N_LIST,
+        ef: ty.Optional[int] = None,
+        nprobe: ty.Optional[int] = None,
+        name: str = _DEFAULT_NAME,
+        host: str = _DEFAULT_HOST,
+        port: int = _DEFAULT_PORT,
+    ) -> None:
+        """
+        Initialize a Milvus builder.
+        Parameters
+        ----------
+        d : int
+            The dimension of embeddings.
+        index_type : ``"FLAT"``, ``"HNSW"``, or ``"IVF_FLAT"``, optional
+            The indexing index_type. Default to ``"FLAT"``.
+        metric : ``"COSINE"``, ``"IP"``, or ``"L2"``, optional
+            The indexing metric. Default to ``"COSINE"``.
+        m : int, optional
+            Max number of neighbors to build an HNSW index.
+        nlist : int, optional
+            Number of clusters to build an IVF index.
+        ef : int or None, optional
+            Number of candidate nodes during an HNSW search.
+        nprobe : int or None, optional
+            Number of clusters during an IVF search.
+        name : str, optional
+            The name of connection. Each name corresponds to one connection.
+        host : str, optional
+            The host of Milvus instance. Default at "localhost".
+        port : int, optional
+            The port of Milvus instance. Default at 19530
+        """
+        self._d = d
+        # connection parameters
+        self._name = name
+        self._host = host
+        self._port = port
+        bparams: dict[str, ty.Any] = {}
+        qparams: dict[str, ty.Any] = {}
+        if index_type == "HNSW":
+            bparams.update(M=m)
+            if ef is not None:
+                qparams.update(ef=ef)
+        if index_type == "IVF_FLAT":
+            bparams.update(nlist=nlist)
+            if nprobe is not None:
+                qparams.update(nprobe=nprobe)
+        self._build_params = dict(
+            index_type=index_type,
+            metric_type=metric,
+            params=bparams,
+        )
+        self._query_params = dict(
+            metric_type=metric,
+            params=qparams,
+        )
+    @contextlib.contextmanager
+    def from_embeddings(
+        self,
+        embeddings: torch.Tensor,
+    ) -> ty.Generator["MilvusIndexer",
+                      None,
+                      None]:
+        """Adhere to ``BaseBuilder.from_embeddings``."""
+        milvus.connections.connect(self._name, host=self._host, port=self._port)
+        collection = self._build_collection(embeddings)
+        try:
+            yield MilvusIndexer(collection, self._query_params)
+        finally:
+            collection.drop()
+            milvus.connections.disconnect(self._name)
+    @contextlib.contextmanager
+    def from_index_file(
+        self,
+        index_file: FilePath,
+    ) -> ty.Generator["MilvusIndexer",
+                      None,
+                      None]:
+        """Adhere to ``BaseBuilder.from_index_file``."""
+        raise NotImplementedError("Milvus does not support index files!")
+    def _build_collection(self, embeddings: torch.Tensor) -> milvus.Collection:
+        """Build a Milvus collection with the current connection."""
+        fields = [
+            milvus.FieldSchema(
+                name="id",
+                dtype=milvus.DataType.INT64,
+                is_primary=True,
+            ),
+            milvus.FieldSchema(
+                name=_EMBEDDING_COLUMN,
+                dtype=milvus.DataType.FLOAT_VECTOR,
+                dim=self._d,
+            ),
+        ]
+        collection = milvus.Collection(
+            name=f"{self._name}_{uuid.uuid4().hex}",
+            schema=milvus.CollectionSchema(fields=fields),
+            using=self._name,
+        )
+        n, _ = embeddings.shape
+        collection.insert([np.arange(n, dtype=np.int64), embeddings.cpu().numpy()])
+        collection.create_index(_EMBEDDING_COLUMN, index_params=self._build_params)
+        collection.load()
+        return collection
+class MilvusIndexer(BaseIndexer):
+    """Milvus-based implementation of ``BaseIndexer``."""
+    def __init__(
+        self,
+        collection: milvus.Collection,
+        query_params: dict[str,
+                           ty.Any],
+    ) -> None:
+        """Initialize a Milvus indexer."""
+        self._collection = collection
+        self._query_params = query_params
+    def query(
+        self,
+        embeddings: torch.Tensor,
+        top_k: int,
+    ) -> tuple[torch.Tensor,
+               torch.Tensor]:
+        """Adhere to ``BaseIndexer.query``."""
+        results = self._collection.search(
+            data=embeddings.cpu().numpy(),
+            anns_field=_EMBEDDING_COLUMN,
+            param=self._query_params,
+            limit=top_k,
+        )
+        n, _ = embeddings.shape
+        nn_ids = np.zeros((n, top_k), dtype=np.int64)
+        nn_distances = np.zeros((n, top_k), dtype=np.float32)
+        for i, result in enumerate(results):
+            nn_ids[i] = result.ids
+            nn_distances[i] = result.distances
+        return torch.from_numpy(nn_ids), torch.from_numpy(nn_distances)
+    def save(self, file_path: FilePath) -> None:
+        """Adhere to ``BaseIndexer.save``."""
+        raise NotImplementedError("Milvus does not support index files!")

torch_rechub/trainers/ctr_trainer.py CHANGED Viewed

@@ -43,6 +43,7 @@ class CTRTrainer(object):
         gpus=None,
         loss_mode=True,
         model_path="./",
+        model_logger=None,
     ):
         self.model = model  # for uniform weights save method in one gpu or multi gpu
         if gpus is None:
@@ -70,10 +71,13 @@ class CTRTrainer(object):
         self.model_path = model_path
         # Initialize regularization loss
         self.reg_loss_fn = RegularizationLoss(**regularization_params)
+        self.model_logger = model_logger
     def train_one_epoch(self, data_loader, log_interval=10):
         self.model.train()
         total_loss = 0
+        epoch_loss = 0
+        batch_count = 0
         tk0 = tqdm.tqdm(data_loader, desc="train", smoothing=0, mininterval=1.0)
         for i, (x_dict, y) in enumerate(tk0):
             x_dict = {k: v.to(self.device) for k, v in x_dict.items()}  # tensor to GPU
@@ -93,27 +97,62 @@ class CTRTrainer(object):
             loss.backward()
             self.optimizer.step()
             total_loss += loss.item()
+            epoch_loss += loss.item()
+            batch_count += 1
             if (i + 1) % log_interval == 0:
                 tk0.set_postfix(loss=total_loss / log_interval)
                 total_loss = 0
+        # Return average epoch loss
+        return epoch_loss / batch_count if batch_count > 0 else 0
     def fit(self, train_dataloader, val_dataloader=None):
+        for logger in self._iter_loggers():
+            logger.log_hyperparams({'n_epoch': self.n_epoch, 'learning_rate': self.optimizer.param_groups[0]['lr'], 'loss_mode': self.loss_mode})
         for epoch_i in range(self.n_epoch):
             print('epoch:', epoch_i)
-            self.train_one_epoch(train_dataloader)
+            train_loss = self.train_one_epoch(train_dataloader)
+            for logger in self._iter_loggers():
+                logger.log_metrics({'train/loss': train_loss, 'learning_rate': self.optimizer.param_groups[0]['lr']}, step=epoch_i)
             if self.scheduler is not None:
                 if epoch_i % self.scheduler.step_size == 0:
                     print("Current lr : {}".format(self.optimizer.state_dict()['param_groups'][0]['lr']))
                 self.scheduler.step()  # update lr in epoch level by scheduler
             if val_dataloader:
                 auc = self.evaluate(self.model, val_dataloader)
                 print('epoch:', epoch_i, 'validation: auc:', auc)
+                for logger in self._iter_loggers():
+                    logger.log_metrics({'val/auc': auc}, step=epoch_i)
                 if self.early_stopper.stop_training(auc, self.model.state_dict()):
                     print(f'validation: best auc: {self.early_stopper.best_auc}')
                     self.model.load_state_dict(self.early_stopper.best_weights)
                     break
         torch.save(self.model.state_dict(), os.path.join(self.model_path, "model.pth"))  # save best auc model
+        for logger in self._iter_loggers():
+            logger.finish()
+    def _iter_loggers(self):
+        """Return logger instances as a list.
+        Returns
+        -------
+        list
+            Active logger instances. Empty when ``model_logger`` is ``None``.
+        """
+        if self.model_logger is None:
+            return []
+        if isinstance(self.model_logger, (list, tuple)):
+            return list(self.model_logger)
+        return [self.model_logger]
     def evaluate(self, model, data_loader):
         model.eval()
         targets, predicts = list(), list()
@@ -146,7 +185,7 @@ class CTRTrainer(object):
                 predicts.extend(y_pred.tolist())
         return predicts
-    def export_onnx(self, output_path, dummy_input=None, batch_size=2, seq_length=10, opset_version=14, dynamic_batch=True, device=None, verbose=False):
+    def export_onnx(self, output_path, dummy_input=None, batch_size=2, seq_length=10, opset_version=14, dynamic_batch=True, device=None, verbose=False, onnx_export_kwargs=None):
         """Export the trained model to ONNX format.
         This method exports the ranking model (e.g., DeepFM, WideDeep, DCN) to ONNX format
@@ -163,6 +202,7 @@ class CTRTrainer(object):
             device (str, optional): Device for export ('cpu', 'cuda', etc.).
                 If None, defaults to 'cpu' for maximum compatibility.
             verbose (bool): Print export details (default: False).
+            onnx_export_kwargs (dict, optional): Extra kwargs forwarded to ``torch.onnx.export``.
         Returns:
             bool: True if export succeeded, False otherwise.
@@ -188,7 +228,16 @@ class CTRTrainer(object):
         export_device = device if device is not None else 'cpu'
         exporter = ONNXExporter(model, device=export_device)
-        return exporter.export(output_path=output_path, dummy_input=dummy_input, batch_size=batch_size, seq_length=seq_length, opset_version=opset_version, dynamic_batch=dynamic_batch, verbose=verbose)
+        return exporter.export(
+            output_path=output_path,
+            dummy_input=dummy_input,
+            batch_size=batch_size,
+            seq_length=seq_length,
+            opset_version=opset_version,
+            dynamic_batch=dynamic_batch,
+            verbose=verbose,
+            onnx_export_kwargs=onnx_export_kwargs,
+        )
     def visualization(self, input_data=None, batch_size=2, seq_length=10, depth=3, show_shapes=True, expand_nested=True, save_path=None, graph_name="model", device=None, dpi=300, **kwargs):
         """Visualize the model's computation graph.

torch-rechub 0.0.5__py3-none-any.whl → 0.1.0__py3-none-any.whl

torch-rechub 0.0.5py3-none-any.whl → 0.1.0py3-none-any.whl