PyPI - torch-rechub - Versions diffs - 0.0.5__py3-none-any.whl → 0.0.6__py3-none-any.whl - Mend

torch-rechub 0.0.5py3-none-any.whl → 0.0.6py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

torch_rechub/basic/tracking.py +198 -0
torch_rechub/data/__init__.py +0 -0
torch_rechub/data/convert.py +67 -0
torch_rechub/data/dataset.py +120 -0
torch_rechub/trainers/ctr_trainer.py +40 -1
torch_rechub/trainers/match_trainer.py +39 -1
torch_rechub/trainers/mtl_trainer.py +49 -1
torch_rechub/trainers/seq_trainer.py +59 -2
{torch_rechub-0.0.5.dist-info → torch_rechub-0.0.6.dist-info}/METADATA +13 -5
{torch_rechub-0.0.5.dist-info → torch_rechub-0.0.6.dist-info}/RECORD +12 -8
{torch_rechub-0.0.5.dist-info → torch_rechub-0.0.6.dist-info}/WHEEL +0 -0
{torch_rechub-0.0.5.dist-info → torch_rechub-0.0.6.dist-info}/licenses/LICENSE +0 -0

torch_rechub/basic/tracking.py ADDED Viewed

@@ -0,0 +1,198 @@
+"""Experiment tracking utilities for Torch-RecHub.
+This module exposes lightweight adapters for common visualization and
+experiment tracking tools, namely Weights & Biases (wandb), SwanLab, and
+TensorBoardX.
+"""
+from abc import ABC, abstractmethod
+from typing import Any, Dict, List, Optional, Union
+class BaseLogger(ABC):
+    """Base interface for experiment tracking backends.
+    Methods
+    -------
+    log_metrics(metrics, step=None)
+        Record scalar metrics at a given step.
+    log_hyperparams(params)
+        Store hyperparameters and run configuration.
+    finish()
+        Flush pending logs and release resources.
+    """
+    @abstractmethod
+    def log_metrics(self, metrics: Dict[str, Any], step: Optional[int] = None) -> None:
+        """Log metrics to the tracking backend.
+        Parameters
+        ----------
+        metrics : dict of str to Any
+            Metric name-value pairs to record.
+        step : int, optional
+            Explicit global step or epoch index. When ``None``, the backend
+            uses its own default step handling.
+        """
+        raise NotImplementedError
+    @abstractmethod
+    def log_hyperparams(self, params: Dict[str, Any]) -> None:
+        """Log experiment hyperparameters.
+        Parameters
+        ----------
+        params : dict of str to Any
+            Hyperparameters or configuration values to persist with the run.
+        """
+        raise NotImplementedError
+    @abstractmethod
+    def finish(self) -> None:
+        """Finalize logging and free any backend resources."""
+        raise NotImplementedError
+class WandbLogger(BaseLogger):
+    """Weights & Biases logger implementation.
+    Parameters
+    ----------
+    project : str
+        Name of the wandb project to log to.
+    name : str, optional
+        Display name for the run.
+    config : dict, optional
+        Initial hyperparameter configuration to record.
+    tags : list of str, optional
+        Optional tags for grouping runs.
+    notes : str, optional
+        Long-form notes shown in the run overview.
+    dir : str, optional
+        Local directory for wandb artifacts and cache.
+    **kwargs : dict
+        Additional keyword arguments forwarded to ``wandb.init``.
+    Raises
+    ------
+    ImportError
+        If ``wandb`` is not installed in the current environment.
+    """
+    def __init__(self, project: str, name: Optional[str] = None, config: Optional[Dict[str, Any]] = None, tags: Optional[List[str]] = None, notes: Optional[str] = None, dir: Optional[str] = None, **kwargs):
+        try:
+            import wandb
+            self._wandb = wandb
+        except ImportError:
+            raise ImportError("wandb is not installed. Install it with: pip install wandb")
+        self.run = self._wandb.init(project=project, name=name, config=config, tags=tags, notes=notes, dir=dir, **kwargs)
+    def log_metrics(self, metrics: Dict[str, Any], step: Optional[int] = None) -> None:
+        if step is not None:
+            self._wandb.log(metrics, step=step)
+        else:
+            self._wandb.log(metrics)
+    def log_hyperparams(self, params: Dict[str, Any]) -> None:
+        if self.run is not None:
+            self.run.config.update(params)
+    def finish(self) -> None:
+        if self.run is not None:
+            self.run.finish()
+class SwanLabLogger(BaseLogger):
+    """SwanLab logger implementation.
+    Parameters
+    ----------
+    project : str, optional
+        Project identifier for grouping experiments.
+    experiment_name : str, optional
+        Display name for the experiment or run.
+    description : str, optional
+        Text description shown alongside the run.
+    config : dict, optional
+        Hyperparameters or configuration to log at startup.
+    logdir : str, optional
+        Directory where logs and artifacts are stored.
+    **kwargs : dict
+        Additional keyword arguments forwarded to ``swanlab.init``.
+    Raises
+    ------
+    ImportError
+        If ``swanlab`` is not installed in the current environment.
+    """
+    def __init__(self, project: Optional[str] = None, experiment_name: Optional[str] = None, description: Optional[str] = None, config: Optional[Dict[str, Any]] = None, logdir: Optional[str] = None, **kwargs):
+        try:
+            import swanlab
+            self._swanlab = swanlab
+        except ImportError:
+            raise ImportError("swanlab is not installed. Install it with: pip install swanlab")
+        self.run = self._swanlab.init(project=project, experiment_name=experiment_name, description=description, config=config, logdir=logdir, **kwargs)
+    def log_metrics(self, metrics: Dict[str, Any], step: Optional[int] = None) -> None:
+        if step is not None:
+            self._swanlab.log(metrics, step=step)
+        else:
+            self._swanlab.log(metrics)
+    def log_hyperparams(self, params: Dict[str, Any]) -> None:
+        if self.run is not None:
+            self.run.config.update(params)
+    def finish(self) -> None:
+        self._swanlab.finish()
+class TensorBoardXLogger(BaseLogger):
+    """TensorBoardX logger implementation.
+    Parameters
+    ----------
+    log_dir : str
+        Directory where event files will be written.
+    comment : str, default=""
+        Comment appended to the log directory name.
+    **kwargs : dict
+        Additional keyword arguments forwarded to
+        ``tensorboardX.SummaryWriter``.
+    Raises
+    ------
+    ImportError
+        If ``tensorboardX`` is not installed in the current environment.
+    """
+    def __init__(self, log_dir: str, comment: str = "", **kwargs):
+        try:
+            from tensorboardX import SummaryWriter
+            self._SummaryWriter = SummaryWriter
+        except ImportError:
+            raise ImportError("tensorboardX is not installed. Install it with: pip install tensorboardX")
+        self.writer = self._SummaryWriter(log_dir=log_dir, comment=comment, **kwargs)
+        self._step = 0
+    def log_metrics(self, metrics: Dict[str, Any], step: Optional[int] = None) -> None:
+        if step is None:
+            step = self._step
+            self._step += 1
+        for key, value in metrics.items():
+            if value is not None:
+                if isinstance(value, (int, float)):
+                    self.writer.add_scalar(key, value, step)
+    def log_hyperparams(self, params: Dict[str, Any]) -> None:
+        hparam_str = "\n".join([f"{k}: {v}" for k, v in params.items()])
+        self.writer.add_text("hyperparameters", hparam_str, 0)
+    def finish(self) -> None:
+        if self.writer is not None:
+            self.writer.close()

torch_rechub/data/__init__.py ADDED Viewed

File without changes

torch_rechub/data/convert.py ADDED Viewed

@@ -0,0 +1,67 @@
+"""Utilities for converting array-like data structures into PyTorch tensors."""
+import numpy.typing as npt
+import pyarrow as pa
+import pyarrow.compute as pc
+import pyarrow.types as pt
+import torch
+def pa_array_to_tensor(arr: pa.Array) -> torch.Tensor:
+    """
+    Convert a PyArrow array to a PyTorch tensor.
+    Parameters
+    ----------
+    arr : pa.Array
+        The given PyArrow array.
+    Returns
+    -------
+    torch.Tensor: The result PyTorch tensor.
+    Raises
+    ------
+    TypeError
+        if the array type or the value type (when nested) is unsupported.
+    ValueError
+        if the nested array is ragged (unequal lengths of each row).
+    """
+    if _is_supported_scalar(arr.type):
+        arr = pc.cast(arr, pa.float32())
+        return torch.from_numpy(_to_writable_numpy(arr))
+    if not _is_supported_list(arr.type):
+        raise TypeError(f"Unsupported array type: {arr.type}")
+    if not _is_supported_scalar(val_type := arr.type.value_type):
+        raise TypeError(f"Unsupported value type in the nested array: {val_type}")
+    if len(pc.unique(pc.list_value_length(arr))) > 1:
+        raise ValueError("Cannot convert the ragged nested array.")
+    arr = pc.cast(arr, pa.list_(pa.float32()))
+    np_arr = _to_writable_numpy(arr.values)  # type: ignore[attr-defined]
+    # For empty list-of-lists, define output shape as (0, 0); otherwise infer width.
+    return torch.from_numpy(np_arr.reshape(len(arr), -1 if len(arr) > 0 else 0))
+# helper functions
+def _is_supported_list(t: pa.DataType) -> bool:
+    """Check if the given PyArrow data type is a supported list."""
+    return pt.is_fixed_size_list(t) or pt.is_large_list(t) or pt.is_list(t)
+def _is_supported_scalar(t: pa.DataType) -> bool:
+    """Check if the given PyArrow data type is a supported scalar type."""
+    return pt.is_boolean(t) or pt.is_floating(t) or pt.is_integer(t) or pt.is_null(t)
+def _to_writable_numpy(arr: pa.Array) -> npt.NDArray:
+    """Dump a PyArrow array into a writable NumPy array."""
+    # Force the NumPy array to be writable. PyArrow's to_numpy() often returns a
+    # read-only view for zero-copy, which PyTorch's from_numpy() does not support.
+    return arr.to_numpy(writable=True, zero_copy_only=False)

torch_rechub/data/dataset.py ADDED Viewed

@@ -0,0 +1,120 @@
+"""Dataset implementations providing streaming, batch-wise data access for PyTorch."""
+import os
+import typing as ty
+import pyarrow.dataset as pd
+import torch
+from torch.utils.data import IterableDataset, get_worker_info
+from .convert import pa_array_to_tensor
+# Type for path to a file
+_FilePath = ty.Union[str, os.PathLike]
+# The default batch size when reading a Parquet dataset
+_DEFAULT_BATCH_SIZE = 1024
+class ParquetIterableDataset(IterableDataset):
+    """
+    IterableDataset that streams data from one or more Parquet files.
+    Parameters
+    ----------
+    file_paths : list[_FilePath]
+        Paths to Parquet files.
+    columns : list[str], optional
+        Column names to select. If ``None``, all columns are read.
+    batch_size : int, default DEFAULT_BATCH_SIZE
+        Number of rows per streamed batch.
+    Notes
+    -----
+    This dataset reads data lazily and never loads the entire Parquet dataset to memory.
+    The current worker receives a partition of ``file_paths`` and builds its own PyArrow
+    Dataset and Scanner. Iteration yields dictionaries mapping column names to PyTorch
+    tensors created via NumPy, one batch at a time.
+    Examples
+    --------
+    >>> ds = ParquetIterableDataset(
+    ...     ["/data/train1.parquet", "/data/train2.parquet"],
+    ...     columns=["x", "y", "label"],
+    ...     batch_size=1024,
+    ... )
+    >>> loader = DataLoader(ds, batch_size=None)
+    >>> # Now iterate over batches.
+    >>> for batch in loader:
+    ...     x, y, label = batch["x"], batch["y"], batch["label"]
+    ...     # Do some work.
+    ...     ...
+    """
+    def __init__(
+        self,
+        file_paths: ty.Sequence[_FilePath],
+        /,
+        columns: ty.Optional[ty.Sequence[str]] = None,
+        batch_size: int = _DEFAULT_BATCH_SIZE,
+    ) -> None:
+        """Initialize this instance."""
+        self._file_paths = tuple(map(str, file_paths))
+        self._columns = None if columns is None else tuple(columns)
+        self._batch_size = batch_size
+    def __iter__(self) -> ty.Iterator[dict[str, torch.Tensor]]:
+        """
+        Stream Parquet data as mapped PyTorch tensors.
+        Build a PyArrow Dataset from the current worker's assigned file partition, then
+        create a Scanner to lazily read batches of the selected columns. Each batch is
+        converted to a dict mapping column names to PyTorch tensors (via NumPy).
+        Returns
+        -------
+        Iterator[dict[str, torch.Tensor]]
+            An iterator that yields one converted batch at a time.
+        """
+        if not (partition := self._get_partition()):
+            return
+        # Build the dataset for the current worker.
+        ds = pd.dataset(partition, format="parquet")
+        # Create a scanner. This does not read data.
+        columns = None if self._columns is None else list(self._columns)
+        scanner = ds.scanner(columns=columns, batch_size=self._batch_size)
+        for batch in scanner.to_batches():
+            data_dict: dict[str, torch.Tensor] = {}
+            for name, array in zip(batch.column_names, batch.columns):
+                data_dict[name] = pa_array_to_tensor(array)
+            yield data_dict
+    # private interfaces
+    def _get_partition(self) -> tuple[str, ...]:
+        """
+        Get the partition of file paths for the current worker.
+        This method splits the full list of file paths into contiguous partitions with
+        a nearly equal size by the total number of workers and the current worker ID.
+        If running in the main process (i.e., no worker information is available), the
+        entire list of file paths is returned.
+        Returns
+        -------
+        tuple[str, ...]
+            The partition of file paths for the current worker.
+        """
+        if (info := get_worker_info()) is None:
+            return self._file_paths
+        n = len(self._file_paths)
+        per_worker = (n + info.num_workers - 1) // info.num_workers
+        start = info.id * per_worker
+        end = n if (end := start + per_worker) > n else end
+        return self._file_paths[start:end]

torch_rechub/trainers/ctr_trainer.py CHANGED Viewed

@@ -43,6 +43,7 @@ class CTRTrainer(object):
         gpus=None,
         loss_mode=True,
         model_path="./",
+        model_logger=None,
     ):
         self.model = model  # for uniform weights save method in one gpu or multi gpu
         if gpus is None:
@@ -70,10 +71,13 @@ class CTRTrainer(object):
         self.model_path = model_path
         # Initialize regularization loss
         self.reg_loss_fn = RegularizationLoss(**regularization_params)
+        self.model_logger = model_logger
     def train_one_epoch(self, data_loader, log_interval=10):
         self.model.train()
         total_loss = 0
+        epoch_loss = 0
+        batch_count = 0
         tk0 = tqdm.tqdm(data_loader, desc="train", smoothing=0, mininterval=1.0)
         for i, (x_dict, y) in enumerate(tk0):
             x_dict = {k: v.to(self.device) for k, v in x_dict.items()}  # tensor to GPU
@@ -93,27 +97,62 @@ class CTRTrainer(object):
             loss.backward()
             self.optimizer.step()
             total_loss += loss.item()
+            epoch_loss += loss.item()
+            batch_count += 1
             if (i + 1) % log_interval == 0:
                 tk0.set_postfix(loss=total_loss / log_interval)
                 total_loss = 0
+        # Return average epoch loss
+        return epoch_loss / batch_count if batch_count > 0 else 0
     def fit(self, train_dataloader, val_dataloader=None):
+        for logger in self._iter_loggers():
+            logger.log_hyperparams({'n_epoch': self.n_epoch, 'learning_rate': self.optimizer.param_groups[0]['lr'], 'loss_mode': self.loss_mode})
         for epoch_i in range(self.n_epoch):
             print('epoch:', epoch_i)
-            self.train_one_epoch(train_dataloader)
+            train_loss = self.train_one_epoch(train_dataloader)
+            for logger in self._iter_loggers():
+                logger.log_metrics({'train/loss': train_loss, 'learning_rate': self.optimizer.param_groups[0]['lr']}, step=epoch_i)
             if self.scheduler is not None:
                 if epoch_i % self.scheduler.step_size == 0:
                     print("Current lr : {}".format(self.optimizer.state_dict()['param_groups'][0]['lr']))
                 self.scheduler.step()  # update lr in epoch level by scheduler
             if val_dataloader:
                 auc = self.evaluate(self.model, val_dataloader)
                 print('epoch:', epoch_i, 'validation: auc:', auc)
+                for logger in self._iter_loggers():
+                    logger.log_metrics({'val/auc': auc}, step=epoch_i)
                 if self.early_stopper.stop_training(auc, self.model.state_dict()):
                     print(f'validation: best auc: {self.early_stopper.best_auc}')
                     self.model.load_state_dict(self.early_stopper.best_weights)
                     break
         torch.save(self.model.state_dict(), os.path.join(self.model_path, "model.pth"))  # save best auc model
+        for logger in self._iter_loggers():
+            logger.finish()
+    def _iter_loggers(self):
+        """Return logger instances as a list.
+        Returns
+        -------
+        list
+            Active logger instances. Empty when ``model_logger`` is ``None``.
+        """
+        if self.model_logger is None:
+            return []
+        if isinstance(self.model_logger, (list, tuple)):
+            return list(self.model_logger)
+        return [self.model_logger]
     def evaluate(self, model, data_loader):
         model.eval()
         targets, predicts = list(), list()

torch_rechub/trainers/match_trainer.py CHANGED Viewed

@@ -39,6 +39,7 @@ class MatchTrainer(object):
         device="cpu",
         gpus=None,
         model_path="./",
+        model_logger=None,
     ):
         self.model = model  # for uniform weights save method in one gpu or multi gpu
         if gpus is None:
@@ -73,10 +74,13 @@ class MatchTrainer(object):
         self.model_path = model_path
         # Initialize regularization loss
         self.reg_loss_fn = RegularizationLoss(**regularization_params)
+        self.model_logger = model_logger
     def train_one_epoch(self, data_loader, log_interval=10):
         self.model.train()
         total_loss = 0
+        epoch_loss = 0
+        batch_count = 0
         tk0 = tqdm.tqdm(data_loader, desc="train", smoothing=0, mininterval=1.0)
         for i, (x_dict, y) in enumerate(tk0):
             x_dict = {k: v.to(self.device) for k, v in x_dict.items()}  # tensor to GPU
@@ -114,14 +118,26 @@ class MatchTrainer(object):
             loss.backward()
             self.optimizer.step()
             total_loss += loss.item()
+            epoch_loss += loss.item()
+            batch_count += 1
             if (i + 1) % log_interval == 0:
                 tk0.set_postfix(loss=total_loss / log_interval)
                 total_loss = 0
+        # Return average epoch loss
+        return epoch_loss / batch_count if batch_count > 0 else 0
     def fit(self, train_dataloader, val_dataloader=None):
+        for logger in self._iter_loggers():
+            logger.log_hyperparams({'n_epoch': self.n_epoch, 'learning_rate': self.optimizer.param_groups[0]['lr'], 'loss_mode': self.mode})
         for epoch_i in range(self.n_epoch):
             print('epoch:', epoch_i)
-            self.train_one_epoch(train_dataloader)
+            train_loss = self.train_one_epoch(train_dataloader)
+            for logger in self._iter_loggers():
+                logger.log_metrics({'train/loss': train_loss, 'learning_rate': self.optimizer.param_groups[0]['lr']}, step=epoch_i)
             if self.scheduler is not None:
                 if epoch_i % self.scheduler.step_size == 0:
                     print("Current lr : {}".format(self.optimizer.state_dict()['param_groups'][0]['lr']))
@@ -130,12 +146,34 @@ class MatchTrainer(object):
             if val_dataloader:
                 auc = self.evaluate(self.model, val_dataloader)
                 print('epoch:', epoch_i, 'validation: auc:', auc)
+                for logger in self._iter_loggers():
+                    logger.log_metrics({'val/auc': auc}, step=epoch_i)
                 if self.early_stopper.stop_training(auc, self.model.state_dict()):
                     print(f'validation: best auc: {self.early_stopper.best_auc}')
                     self.model.load_state_dict(self.early_stopper.best_weights)
                     break
         torch.save(self.model.state_dict(), os.path.join(self.model_path, "model.pth"))  # save best auc model
+        for logger in self._iter_loggers():
+            logger.finish()
+    def _iter_loggers(self):
+        """Return logger instances as a list.
+        Returns
+        -------
+        list
+            Active logger instances. Empty when ``model_logger`` is ``None``.
+        """
+        if self.model_logger is None:
+            return []
+        if isinstance(self.model_logger, (list, tuple)):
+            return list(self.model_logger)
+        return [self.model_logger]
     def evaluate(self, model, data_loader):
         model.eval()
         targets, predicts = list(), list()

torch_rechub/trainers/mtl_trainer.py CHANGED Viewed

@@ -47,6 +47,7 @@ class MTLTrainer(object):
         device="cpu",
         gpus=None,
         model_path="./",
+        model_logger=None,
     ):
         self.model = model
         if gpus is None:
@@ -104,6 +105,7 @@ class MTLTrainer(object):
         self.model_path = model_path
         # Initialize regularization loss
         self.reg_loss_fn = RegularizationLoss(**regularization_params)
+        self.model_logger = model_logger
     def train_one_epoch(self, data_loader):
         self.model.train()
@@ -163,21 +165,42 @@ class MTLTrainer(object):
     def fit(self, train_dataloader, val_dataloader, mode='base', seed=0):
         total_log = []
+        # Log hyperparameters once
+        for logger in self._iter_loggers():
+            logger.log_hyperparams({'n_epoch': self.n_epoch, 'learning_rate': self._current_lr(), 'adaptive_method': self.adaptive_method})
         for epoch_i in range(self.n_epoch):
             _log_per_epoch = self.train_one_epoch(train_dataloader)
+            # Collect metrics
+            logs = {f'train/task_{task_id}_loss': loss_val for task_id, loss_val in enumerate(_log_per_epoch)}
+            lr_value = self._current_lr()
+            if lr_value is not None:
+                logs['learning_rate'] = lr_value
             if self.scheduler is not None:
                 if epoch_i % self.scheduler.step_size == 0:
                     print("Current lr : {}".format(self.optimizer.state_dict()['param_groups'][0]['lr']))
                 self.scheduler.step()  # update lr in epoch level by scheduler
             scores = self.evaluate(self.model, val_dataloader)
             print('epoch:', epoch_i, 'validation scores: ', scores)
-            for score in scores:
+            for task_id, score in enumerate(scores):
+                logs[f'val/task_{task_id}_score'] = score
                 _log_per_epoch.append(score)
+            logs['auc'] = scores[self.earlystop_taskid]
+            if self.loss_weight:
+                for task_id, weight in enumerate(self.loss_weight):
+                    logs[f'loss_weight/task_{task_id}'] = weight.item()
             total_log.append(_log_per_epoch)
+            # Log metrics once per epoch
+            for logger in self._iter_loggers():
+                logger.log_metrics(logs, step=epoch_i)
             if self.early_stopper.stop_training(scores[self.earlystop_taskid], self.model.state_dict()):
                 print('validation best auc of main task %d: %.6f' % (self.earlystop_taskid, self.early_stopper.best_auc))
                 self.model.load_state_dict(self.early_stopper.best_weights)
@@ -185,8 +208,33 @@ class MTLTrainer(object):
         torch.save(self.model.state_dict(), os.path.join(self.model_path, "model_{}_{}.pth".format(mode, seed)))  # save best auc model
+        for logger in self._iter_loggers():
+            logger.finish()
         return total_log
+    def _iter_loggers(self):
+        """Return logger instances as a list.
+        Returns
+        -------
+        list
+            Active logger instances. Empty when ``model_logger`` is ``None``.
+        """
+        if self.model_logger is None:
+            return []
+        if isinstance(self.model_logger, (list, tuple)):
+            return list(self.model_logger)
+        return [self.model_logger]
+    def _current_lr(self):
+        """Fetch current learning rate regardless of adaptive method."""
+        if self.adaptive_method == "metabalance":
+            return self.share_optimizer.param_groups[0]['lr'] if hasattr(self, 'share_optimizer') else None
+        if hasattr(self, 'optimizer'):
+            return self.optimizer.param_groups[0]['lr']
+        return None
     def evaluate(self, model, data_loader):
         model.eval()
         targets, predicts = list(), list()

torch_rechub/trainers/seq_trainer.py CHANGED Viewed

@@ -46,7 +46,22 @@ class SeqTrainer(object):
         ... )
     """
-    def __init__(self, model, optimizer_fn=torch.optim.Adam, optimizer_params=None, scheduler_fn=None, scheduler_params=None, n_epoch=10, earlystop_patience=10, device='cpu', gpus=None, model_path='./', loss_type='cross_entropy', loss_params=None):
+    def __init__(
+        self,
+        model,
+        optimizer_fn=torch.optim.Adam,
+        optimizer_params=None,
+        scheduler_fn=None,
+        scheduler_params=None,
+        n_epoch=10,
+        earlystop_patience=10,
+        device='cpu',
+        gpus=None,
+        model_path='./',
+        loss_type='cross_entropy',
+        loss_params=None,
+        model_logger=None
+    ):
         self.model = model  # for uniform weights save method in one gpu or multi gpu
         if gpus is None:
             gpus = []
@@ -74,9 +89,11 @@ class SeqTrainer(object):
                 loss_params = {"ignore_index": 0}
             self.loss_fn = nn.CrossEntropyLoss(**loss_params)
+        self.loss_type = loss_type
         self.n_epoch = n_epoch
         self.early_stopper = EarlyStopper(patience=earlystop_patience)
         self.model_path = model_path
+        self.model_logger = model_logger
     def fit(self, train_dataloader, val_dataloader=None):
         """训练模型.
@@ -90,10 +107,18 @@ class SeqTrainer(object):
         """
         history = {'train_loss': [], 'val_loss': [], 'val_accuracy': []}
+        for logger in self._iter_loggers():
+            logger.log_hyperparams({'n_epoch': self.n_epoch, 'learning_rate': self.optimizer.param_groups[0]['lr'], 'loss_type': self.loss_type})
         for epoch_i in range(self.n_epoch):
             print('epoch:', epoch_i)
             # 训练阶段
-            self.train_one_epoch(train_dataloader)
+            train_loss = self.train_one_epoch(train_dataloader)
+            history['train_loss'].append(train_loss)
+            # Collect metrics
+            logs = {'train/loss': train_loss, 'learning_rate': self.optimizer.param_groups[0]['lr']}
             if self.scheduler is not None:
                 if epoch_i % self.scheduler.step_size == 0:
                     print("Current lr : {}".format(self.optimizer.state_dict()['param_groups'][0]['lr']))
@@ -105,6 +130,10 @@ class SeqTrainer(object):
                 history['val_loss'].append(val_loss)
                 history['val_accuracy'].append(val_accuracy)
+                logs['val/loss'] = val_loss
+                logs['val/accuracy'] = val_accuracy
+                logs['auc'] = val_accuracy  # For compatibility with EarlyStopper
                 print(f"epoch: {epoch_i}, validation: loss: {val_loss:.4f}, accuracy: {val_accuracy:.4f}")
                 # 早停
@@ -113,9 +142,30 @@ class SeqTrainer(object):
                     self.model.load_state_dict(self.early_stopper.best_weights)
                     break
+            for logger in self._iter_loggers():
+                logger.log_metrics(logs, step=epoch_i)
         torch.save(self.model.state_dict(), os.path.join(self.model_path, "model.pth"))  # save best model
+        for logger in self._iter_loggers():
+            logger.finish()
         return history
+    def _iter_loggers(self):
+        """Return logger instances as a list.
+        Returns
+        -------
+        list
+            Active logger instances. Empty when ``model_logger`` is ``None``.
+        """
+        if self.model_logger is None:
+            return []
+        if isinstance(self.model_logger, (list, tuple)):
+            return list(self.model_logger)
+        return [self.model_logger]
     def train_one_epoch(self, data_loader, log_interval=10):
         """Train the model for a single epoch.
@@ -128,6 +178,8 @@ class SeqTrainer(object):
         """
         self.model.train()
         total_loss = 0
+        epoch_loss = 0
+        batch_count = 0
         tk0 = tqdm.tqdm(data_loader, desc="train", smoothing=0, mininterval=1.0)
         for i, (seq_tokens, seq_positions, seq_time_diffs, targets) in enumerate(tk0):
             # Move tensors to the target device
@@ -152,10 +204,15 @@ class SeqTrainer(object):
             self.optimizer.step()
             total_loss += loss.item()
+            epoch_loss += loss.item()
+            batch_count += 1
             if (i + 1) % log_interval == 0:
                 tk0.set_postfix(loss=total_loss / log_interval)
                 total_loss = 0
+        # Return average epoch loss
+        return epoch_loss / batch_count if batch_count > 0 else 0
     def evaluate(self, data_loader):
         """Evaluate the model on a validation/test data loader.

{torch_rechub-0.0.5.dist-info → torch_rechub-0.0.6.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: torch-rechub
-Version: 0.0.5
+Version: 0.0.6
 Summary: A Pytorch Toolbox for Recommendation Models, Easy-to-use and Easy-to-extend.
 Project-URL: Homepage, https://github.com/datawhalechina/torch-rechub
 Project-URL: Documentation, https://www.torch-rechub.com
@@ -28,19 +28,26 @@ Requires-Dist: scikit-learn>=0.24.0
 Requires-Dist: torch>=1.10.0
 Requires-Dist: tqdm>=4.60.0
 Requires-Dist: transformers>=4.46.3
+Provides-Extra: bigdata
+Requires-Dist: pyarrow~=21.0; extra == 'bigdata'
 Provides-Extra: dev
 Requires-Dist: bandit>=1.7.0; extra == 'dev'
 Requires-Dist: flake8>=3.8.0; extra == 'dev'
 Requires-Dist: isort==5.13.2; extra == 'dev'
 Requires-Dist: mypy>=0.800; extra == 'dev'
 Requires-Dist: pre-commit>=2.20.0; extra == 'dev'
+Requires-Dist: pyarrow-stubs>=20.0; extra == 'dev'
 Requires-Dist: pytest-cov>=2.0; extra == 'dev'
 Requires-Dist: pytest>=6.0; extra == 'dev'
 Requires-Dist: toml>=0.10.2; extra == 'dev'
 Requires-Dist: yapf==0.43.0; extra == 'dev'
 Provides-Extra: onnx
-Requires-Dist: onnx>=1.12.0; extra == 'onnx'
-Requires-Dist: onnxruntime>=1.12.0; extra == 'onnx'
+Requires-Dist: onnx>=1.14.0; extra == 'onnx'
+Requires-Dist: onnxruntime>=1.14.0; extra == 'onnx'
+Provides-Extra: tracking
+Requires-Dist: swanlab>=0.1.0; extra == 'tracking'
+Requires-Dist: tensorboardx>=2.5; extra == 'tracking'
+Requires-Dist: wandb>=0.13.0; extra == 'tracking'
 Provides-Extra: visualization
 Requires-Dist: graphviz>=0.20; extra == 'visualization'
 Requires-Dist: torchview>=0.2.6; extra == 'visualization'
@@ -89,7 +96,8 @@ Description-Content-Type: text/markdown
 * **易于配置:** 通过配置文件或命令行参数轻松调整实验设置。
 * **可复现性:** 旨在确保实验结果的可复现性。
 * **ONNX 导出:** 支持将训练好的模型导出为 ONNX 格式，便于部署到生产环境。
-* **其他特性:** 例如，支持负采样、多任务学习等。
+* **跨引擎数据处理:** 现已支持基于 PySpark 的数据处理与转换，方便在大数据管道中落地。
+* **实验可视化与跟踪:** 内置 WandB、SwanLab、TensorBoardX 三种可视化/追踪工具的统一集成。
 ## 📖 目录
@@ -399,4 +407,4 @@ ctr_trainer.visualization(save_path="model.pdf", dpi=300)  # 保存为高清 PDF
 ---
-*最后更新: [2025-12-04]*
+*最后更新: [2025-12-11]*

{torch_rechub-0.0.5.dist-info → torch_rechub-0.0.6.dist-info}/RECORD RENAMED Viewed

@@ -8,6 +8,10 @@ torch_rechub/basic/layers.py,sha256=URWk78dlffMOAhDVDhOhugcr4nmwEa192AI1diktC-4,
 torch_rechub/basic/loss_func.py,sha256=6bjljqpiuUP6O8-wUbGd8FSvflY5Dp_DV_57OuQVMz4,7969
 torch_rechub/basic/metaoptimizer.py,sha256=y-oT4MV3vXnSQ5Zd_ZEHP1KClITEi3kbZa6RKjlkYw8,3093
 torch_rechub/basic/metric.py,sha256=9JsaJJGvT6VRvsLoM2Y171CZxESsjYTofD3qnMI-bPM,8443
+torch_rechub/basic/tracking.py,sha256=7-aoyKJxyqb8GobpjRjFsgPYWsBDOV44BYOC_vMoCto,6608
+torch_rechub/data/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+torch_rechub/data/convert.py,sha256=clGFEbDSDpdZBvscWatfjtuXMZUzgy1kiEAg4w_q7VM,2241
+torch_rechub/data/dataset.py,sha256=fDDQ5N3x99KPfy0Ux4LRQbFlWbLg_dvKTO1WUEbEN04,4111
 torch_rechub/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 torch_rechub/models/generative/__init__.py,sha256=TsCdVIhOcalQwqKZKjEuNbHKyIjyclapKGNwYfFR7TM,135
 torch_rechub/models/generative/hllm.py,sha256=6Vrp5Bh0fTFHCn7C-3EqzOyc7UunOyEY9TzAKGHrW-8,9669
@@ -45,11 +49,11 @@ torch_rechub/models/ranking/edcn.py,sha256=6f_S8I6Ir16kCIU54R4EfumWfUFOND5KDKUPH
 torch_rechub/models/ranking/fibinet.py,sha256=fmEJ9WkO8Mn0RtK_8aRHlnQFh_jMBPO0zODoHZPWmDA,2234
 torch_rechub/models/ranking/widedeep.py,sha256=eciRvWRBHLlctabLLS5NB7k3MnqrWXCBdpflOU6jMB0,1636
 torch_rechub/trainers/__init__.py,sha256=NSa2DqgfE1HGDyj40YgrbtUrfBHBxNBpw57XtaAB_jE,148
-torch_rechub/trainers/ctr_trainer.py,sha256=ECXaK0x2_6jZVxtEazgN3hkBpSAMPeGeNtunqI_OECo,12860
-torch_rechub/trainers/match_trainer.py,sha256=QHZb32Rf7yp-NvEzdeiG1HQghQ76_vuu59K1IsdK60k,15055
+torch_rechub/trainers/ctr_trainer.py,sha256=e0xS-W48BOixN0ogksWOcVJNKFiO3g2oNA_hlHytRqk,14138
+torch_rechub/trainers/match_trainer.py,sha256=atkO-gfDuTk6lh-WvaJOh5kgn6HPzbQQN42Rvz8kyXY,16327
 torch_rechub/trainers/matching.md,sha256=vIBQ3UMmVpUpyk38rrkelFwm_wXVXqMOuqzYZ4M8bzw,30
-torch_rechub/trainers/mtl_trainer.py,sha256=MjasE_QOPfGxiUW1JpYYQ2iuBSSk-lissAGp4Sw1CWk,16427
-torch_rechub/trainers/seq_trainer.py,sha256=uAo9XymwQupCqvm5otKW81tz1nxd3crJ2ul2r7lrEAE,17633
+torch_rechub/trainers/mtl_trainer.py,sha256=n3T-ctWACSyl0awBQixOlZUQ8I5cfGyZzgKV09EF8hw,18293
+torch_rechub/trainers/seq_trainer.py,sha256=pyY70kAjTWdKrnAYZynql1PPNtveYDLMB_1hbpCHa48,19217
 torch_rechub/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 torch_rechub/utils/data.py,sha256=vzLAAVt6dujg_vbGhQewiJc0l6JzwzdcM_9EjoOz898,19882
 torch_rechub/utils/hstu_utils.py,sha256=qLON_pJDC-kDyQn1PoN_HaHi5xTNCwZPgJeV51Z61Lc,6207
@@ -58,7 +62,7 @@ torch_rechub/utils/model_utils.py,sha256=VLhSbTpupxrFyyY3NzMQ32PPmo5YHm1T96u9KDl
 torch_rechub/utils/mtl.py,sha256=AxU05ezizCuLdbPuCg1ZXE0WAStzuxaS5Sc3nwMCBpI,5737
 torch_rechub/utils/onnx_export.py,sha256=LRHyZaR9zZJyg6xtuqQHWmusWq-yEvw9EhlmoEwcqsg,8364
 torch_rechub/utils/visualization.py,sha256=Djv8W5SkCk3P2dol5VXf0_eanIhxDwRd7fzNOQY4uiU,9506
-torch_rechub-0.0.5.dist-info/METADATA,sha256=7k9N1xGB4JeWzri7iA7kJbPnAJ-KhXF7vBV-_b8Ghrg,17998
-torch_rechub-0.0.5.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
-torch_rechub-0.0.5.dist-info/licenses/LICENSE,sha256=V7ietiX9G_84HtgEbxDgxClniqXGm2t5q8WM4AHGTu0,1066
-torch_rechub-0.0.5.dist-info/RECORD,,
+torch_rechub-0.0.6.dist-info/METADATA,sha256=OihjWb0yCI1bmTEoCYAC6pI6cCgl5KS5uSrAGZwv7yY,18470
+torch_rechub-0.0.6.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+torch_rechub-0.0.6.dist-info/licenses/LICENSE,sha256=V7ietiX9G_84HtgEbxDgxClniqXGm2t5q8WM4AHGTu0,1066
+torch_rechub-0.0.6.dist-info/RECORD,,

{torch_rechub-0.0.5.dist-info → torch_rechub-0.0.6.dist-info}/WHEEL RENAMED Viewed

File without changes

{torch_rechub-0.0.5.dist-info → torch_rechub-0.0.6.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

torch-rechub 0.0.5__py3-none-any.whl → 0.0.6__py3-none-any.whl

torch-rechub 0.0.5py3-none-any.whl → 0.0.6py3-none-any.whl