PyPI - textpolicy - Versions diffs - 0.0.1__py3-none-any.whl → 0.1.0__py3-none-any.whl - Mend

textpolicy 0.0.1py3-none-any.whl → 0.1.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (68) hide show

textpolicy/__init__.py +52 -0
textpolicy/__main__.py +8 -0
textpolicy/algorithms/__init__.py +54 -0
textpolicy/algorithms/grpo.py +642 -0
textpolicy/algorithms/gspo.py +582 -0
textpolicy/buffer/__init__.py +23 -0
textpolicy/buffer/buffer.py +244 -0
textpolicy/buffer/episode.py +383 -0
textpolicy/buffer/sampling.py +438 -0
textpolicy/buffer/storage.py +255 -0
textpolicy/cli.py +67 -0
textpolicy/environment/__init__.py +79 -0
textpolicy/environment/base.py +110 -0
textpolicy/environment/environment.py +46 -0
textpolicy/environment/factory.py +103 -0
textpolicy/environment/gym.py +106 -0
textpolicy/environment/task_suites.py +51 -0
textpolicy/environment/text_generation.py +789 -0
textpolicy/environment/vectorized.py +253 -0
textpolicy/generation/__init__.py +62 -0
textpolicy/generation/lora.py +411 -0
textpolicy/generation/mlx_generation.py +557 -0
textpolicy/generation/reload.py +253 -0
textpolicy/rewards/__init__.py +137 -0
textpolicy/rewards/adapters.py +387 -0
textpolicy/rewards/basic.py +214 -0
textpolicy/rewards/integrated_system.py +338 -0
textpolicy/rewards/mlx_batch_processor.py +447 -0
textpolicy/rewards/registry.py +293 -0
textpolicy/rewards/rollout_rewards.py +410 -0
textpolicy/rewards/verifiers.py +369 -0
textpolicy/rollout/__init__.py +44 -0
textpolicy/rollout/aggregator.py +145 -0
textpolicy/rollout/base.py +108 -0
textpolicy/rollout/rollout.py +142 -0
textpolicy/rollout/runner.py +280 -0
textpolicy/rollout/strategy.py +208 -0
textpolicy/rollout/worker.py +194 -0
textpolicy/training/__init__.py +14 -0
textpolicy/training/metrics.py +242 -0
textpolicy/training/rollout_manager.py +78 -0
textpolicy/training/trainer.py +684 -0
textpolicy/utils/__init__.py +40 -0
textpolicy/utils/benchmarking.py +489 -0
textpolicy/utils/data.py +60 -0
textpolicy/utils/debug.py +170 -0
textpolicy/utils/environment.py +349 -0
textpolicy/utils/logging/__init__.py +22 -0
textpolicy/utils/logging/base.py +48 -0
textpolicy/utils/logging/console.py +61 -0
textpolicy/utils/logging/factory.py +133 -0
textpolicy/utils/logging/multi.py +83 -0
textpolicy/utils/logging/tensorboard.py +65 -0
textpolicy/utils/logging/wandb.py +72 -0
textpolicy/utils/memory.py +118 -0
textpolicy/utils/performance.py +464 -0
textpolicy/utils/timing.py +171 -0
textpolicy/validate.py +101 -0
textpolicy/validation/__init__.py +13 -0
textpolicy/validation/logprob_validation.py +315 -0
textpolicy-0.1.0.dist-info/METADATA +99 -0
textpolicy-0.1.0.dist-info/RECORD +66 -0
textpolicy-0.1.0.dist-info/entry_points.txt +2 -0
textpolicy-0.0.1.dist-info/METADATA +0 -10
textpolicy-0.0.1.dist-info/RECORD +0 -6
{textpolicy-0.0.1.dist-info → textpolicy-0.1.0.dist-info}/WHEEL +0 -0
{textpolicy-0.0.1.dist-info → textpolicy-0.1.0.dist-info}/licenses/LICENSE +0 -0
{textpolicy-0.0.1.dist-info → textpolicy-0.1.0.dist-info}/top_level.txt +0 -0

textpolicy/utils/logging/factory.py ADDED Viewed

@@ -0,0 +1,133 @@
+# textpolicy/utils/logging/factory.py
+"""
+Factory functions for creating logger instances.
+"""
+from typing import Optional, List
+from .base import Logger
+from .wandb import WandbLogger
+from .tensorboard import TensorboardLogger
+from .console import ConsoleLogger
+from .multi import MultiLogger
+def create_logger(
+    logger_type: str = "console",
+    **kwargs
+) -> Logger:
+    """
+    Factory function to create logger instances.
+    Args:
+        logger_type: Type of logger ("wandb", "tensorboard", "console")
+        **kwargs: Logger-specific parameters
+    Returns:
+        Logger instance
+    Raises:
+        ValueError: If logger_type is unknown
+    Examples:
+        # Console logger
+        logger = create_logger("console", verbose=True)
+        # Wandb logger
+        logger = create_logger("wandb", project_name="my-project", run_name="test")
+        # TensorBoard logger
+        logger = create_logger("tensorboard", log_dir="./logs")
+    """
+    if logger_type == "wandb":
+        if "project_name" not in kwargs:
+            raise ValueError("project_name is required for wandb logger")
+        project_name = kwargs.pop("project_name")
+        return WandbLogger(project_name, **kwargs)
+    elif logger_type == "tensorboard":
+        if "log_dir" not in kwargs:
+            raise ValueError("log_dir is required for tensorboard logger")
+        log_dir = kwargs.pop("log_dir")
+        return TensorboardLogger(log_dir)
+    elif logger_type == "console":
+        return ConsoleLogger(**kwargs)
+    else:
+        available_types = ["wandb", "tensorboard", "console"]
+        raise ValueError(f"Unknown logger type: {logger_type}. Available: {available_types}")
+def create_multi_logger(
+    configs: List[dict]
+) -> MultiLogger:
+    """
+    Create a MultiLogger from a list of logger configurations.
+    Args:
+        configs: List of dictionaries with "type" and other parameters
+    Returns:
+        MultiLogger instance
+    Example:
+        logger = create_multi_logger([
+            {"type": "console", "verbose": True},
+            {"type": "wandb", "project_name": "my-project"}
+        ])
+    """
+    loggers = []
+    for config in configs:
+        logger_type = config.pop("type")
+        logger = create_logger(logger_type, **config)
+        loggers.append(logger)
+    return MultiLogger(loggers)
+def create_auto_logger(
+    project_name: Optional[str] = None,
+    log_dir: Optional[str] = None,
+    console: bool = True
+) -> Logger:
+    """
+    Automatically create appropriate logger based on available dependencies.
+    Priority order:
+    1. Wandb (if project_name provided and wandb available)
+    2. TensorBoard (if log_dir provided and tensorboard available)
+    3. Console (always available)
+    Args:
+        project_name: Wandb project name (enables wandb if available)
+        log_dir: TensorBoard log directory (enables tensorboard if available)
+        console: Whether to include console logging
+    Returns:
+        Logger instance (MultiLogger if multiple backends, single Logger otherwise)
+    """
+    loggers = []
+    # Try wandb first
+    if project_name:
+        try:
+            loggers.append(WandbLogger(project_name))
+        except ImportError:
+            print("Warning: wandb not available, skipping")
+    # Try tensorboard
+    if log_dir:
+        try:
+            loggers.append(TensorboardLogger(log_dir))
+        except ImportError:
+            print("Warning: tensorboard not available, skipping")
+    # Always add console if requested
+    if console:
+        loggers.append(ConsoleLogger(verbose=True))
+    # Return appropriate logger type
+    if len(loggers) == 0:
+        # Fallback to console
+        return ConsoleLogger(verbose=True)
+    elif len(loggers) == 1:
+        return loggers[0]
+    else:
+        return MultiLogger(loggers)

textpolicy/utils/logging/multi.py ADDED Viewed

@@ -0,0 +1,83 @@
+# textpolicy/utils/logging/multi.py
+"""
+Multi-logger for combining multiple logging backends.
+"""
+from typing import Dict, List
+from .base import Logger
+class MultiLogger(Logger):
+    """
+    Combine multiple logging backends into a single interface.
+    Features:
+    - Log to multiple backends simultaneously
+    - Graceful error handling (one logger failure doesn't stop others)
+    - Unified interface for complex logging setups
+    Example:
+        # Log to both wandb and console
+        logger = MultiLogger([
+            WandbLogger("my-project"),
+            ConsoleLogger(verbose=True)
+        ])
+    """
+    def __init__(self, loggers: List[Logger]):
+        """
+        Initialize multi-logger with list of backends.
+        Args:
+            loggers: List of Logger instances to combine
+        Raises:
+            ValueError: If no loggers provided
+        """
+        if not loggers:
+            raise ValueError("At least one logger must be provided")
+        self.loggers = loggers
+    def log_metrics(self, metrics: Dict[str, float], step: int):
+        """
+        Log training metrics to all backends.
+        Continues logging to other backends even if one fails.
+        Args:
+            metrics: Training metrics dictionary
+            step: Training step number
+        """
+        for logger in self.loggers:
+            try:
+                logger.log_metrics(metrics, step)
+            except Exception as e:
+                print(f"Warning: Logger {type(logger).__name__} failed: {e}")
+    def log_evaluation(self, metrics: Dict[str, float], step: int):
+        """
+        Log evaluation metrics to all backends.
+        Continues logging to other backends even if one fails.
+        Args:
+            metrics: Evaluation metrics dictionary
+            step: Training step when evaluation was performed
+        """
+        for logger in self.loggers:
+            try:
+                logger.log_evaluation(metrics, step)
+            except Exception as e:
+                print(f"Warning: Logger {type(logger).__name__} failed: {e}")
+    def finish(self):
+        """
+        Finish all loggers.
+        Attempts to finish all loggers even if some fail.
+        """
+        for logger in self.loggers:
+            try:
+                logger.finish()
+            except Exception as e:
+                print(f"Warning: Logger {type(logger).__name__} finish failed: {e}")

textpolicy/utils/logging/tensorboard.py ADDED Viewed

@@ -0,0 +1,65 @@
+# textpolicy/utils/logging/tensorboard.py
+"""
+TensorBoard logging integration.
+"""
+from typing import Dict
+from .base import Logger
+class TensorboardLogger(Logger):
+    """
+    TensorBoard integration for local experiment visualization.
+    Features:
+    - Local scalar metric visualization
+    - Histogram and distribution tracking
+    - Image and model graph visualization
+    - No external service dependency
+    Requires: pip install tensorboard
+    """
+    def __init__(self, log_dir: str):
+        """
+        Initialize TensorBoard logging.
+        Args:
+            log_dir: Directory to store TensorBoard log files
+        Raises:
+            ImportError: If tensorboard is not installed
+        """
+        try:
+            from torch.utils.tensorboard import SummaryWriter  # type: ignore
+            self.writer = SummaryWriter(log_dir)
+        except ImportError:
+            raise ImportError(
+                "tensorboard not installed. Install with: pip install tensorboard"
+            )
+    def log_metrics(self, metrics: Dict[str, float], step: int):
+        """
+        Log training metrics to TensorBoard with 'train/' prefix.
+        Args:
+            metrics: Training metrics dictionary
+            step: Training step number
+        """
+        for key, value in metrics.items():
+            self.writer.add_scalar(f"train/{key}", value, step)
+    def log_evaluation(self, metrics: Dict[str, float], step: int):
+        """
+        Log evaluation metrics to TensorBoard with 'eval/' prefix.
+        Args:
+            metrics: Evaluation metrics dictionary
+            step: Training step when evaluation was performed
+        """
+        for key, value in metrics.items():
+            self.writer.add_scalar(f"eval/{key}", value, step)
+    def finish(self):
+        """Close TensorBoard writer and flush remaining data."""
+        self.writer.close()

textpolicy/utils/logging/wandb.py ADDED Viewed

@@ -0,0 +1,72 @@
+# textpolicy/utils/logging/wandb.py
+"""
+Weights & Biases (wandb) logging integration.
+"""
+from typing import Dict, Optional
+from .base import Logger
+class WandbLogger(Logger):
+    """
+    Weights & Biases integration for experiment tracking.
+    Features:
+    - Automatic experiment organization with projects
+    - Real-time metric visualization
+    - Hyperparameter tracking
+    - Model artifact management
+    Requires: pip install wandb
+    """
+    def __init__(self, project_name: str, run_name: Optional[str] = None, **kwargs):
+        """
+        Initialize wandb logging.
+        Args:
+            project_name: Wandb project name for organization
+            run_name: Optional run name (auto-generated if None)
+            **kwargs: Additional wandb.init() parameters (tags, config, etc.)
+        Raises:
+            ImportError: If wandb is not installed
+        """
+        try:
+            import wandb # type: ignore
+            self.wandb = wandb
+            self.run = wandb.init(
+                project=project_name,
+                name=run_name,
+                **kwargs
+            )
+        except ImportError:
+            raise ImportError(
+                "wandb not installed. Install with: pip install wandb"
+            )
+    def log_metrics(self, metrics: Dict[str, float], step: int):
+        """
+        Log training metrics to wandb with 'train/' prefix.
+        Args:
+            metrics: Training metrics dictionary
+            step: Training step number
+        """
+        prefixed_metrics = {"train/" + k: v for k, v in metrics.items()}
+        self.wandb.log(prefixed_metrics, step=step)
+    def log_evaluation(self, metrics: Dict[str, float], step: int):
+        """
+        Log evaluation metrics to wandb with 'eval/' prefix.
+        Args:
+            metrics: Evaluation metrics dictionary
+            step: Training step when evaluation was performed
+        """
+        prefixed_metrics = {"eval/" + k: v for k, v in metrics.items()}
+        self.wandb.log(prefixed_metrics, step=step)
+    def finish(self):
+        """Finish wandb run and upload final data."""
+        self.wandb.finish()

textpolicy/utils/memory.py ADDED Viewed

@@ -0,0 +1,118 @@
+# textpolicy/utils/memory.py
+"""
+Memory monitoring utilities for TextPolicy.
+"""
+import gc
+from typing import Dict, Optional
+try:
+    import mlx.core as mx # type: ignore
+    MLX_AVAILABLE = True
+except ImportError:
+    MLX_AVAILABLE = False
+def get_memory_stats() -> Dict[str, float]:
+    """
+    Get current memory usage statistics.
+    Returns:
+        Dictionary with memory statistics in MB
+    """
+    stats = {}
+    # MLX memory usage (Apple Silicon GPU/ANE)
+    if MLX_AVAILABLE:
+        try:
+            # MLX memory information
+            stats["mlx_memory_mb"] = mx.metal.get_active_memory() / 1024 / 1024
+            stats["mlx_peak_mb"] = mx.metal.get_peak_memory() / 1024 / 1024
+        except Exception as e:
+            print(f"Error getting MLX memory stats: {e}")
+            stats["mlx_memory_mb"] = 0.0
+            stats["mlx_peak_mb"] = 0.0
+    # Python memory usage
+    try:
+        import psutil # type: ignore
+        process = psutil.Process()
+        stats["python_memory_mb"] = process.memory_info().rss / 1024 / 1024
+        stats["python_virtual_mb"] = process.memory_info().vms / 1024 / 1024
+    except ImportError:
+        stats["python_memory_mb"] = 0.0
+        stats["python_virtual_mb"] = 0.0
+    return stats
+def clear_memory():
+    """
+    Clear memory caches and run garbage collection.
+    Useful for freeing memory between training runs or evaluations.
+    """
+    # Python garbage collection
+    gc.collect()
+    # MLX memory cleanup
+    if MLX_AVAILABLE:
+        try:
+            mx.metal.clear_cache()
+        except Exception as e:
+            print(f"Error clearing MLX memory: {e}")
+            pass
+class MemoryMonitor:
+    """
+    Monitor memory usage during training.
+    Features:
+    - Track peak memory usage
+    - Automatic memory alerts
+    - Integration with logging systems
+    """
+    def __init__(self, alert_threshold_mb: float = 8000):
+        """
+        Initialize memory monitor.
+        Args:
+            alert_threshold_mb: Memory usage threshold for alerts (default 8GB)
+        """
+        self.alert_threshold = alert_threshold_mb
+        self.peak_stats = {}
+    def check_memory(self, step: Optional[int] = None) -> Dict[str, float]:
+        """
+        Check current memory usage and update peaks.
+        Args:
+            step: Optional training step for logging
+        Returns:
+            Current memory statistics
+        """
+        current = get_memory_stats()
+        # Update peaks
+        for key, value in current.items():
+            if key not in self.peak_stats or value > self.peak_stats[key]:
+                self.peak_stats[key] = value
+        # Check for alerts
+        total_memory = current.get("mlx_memory_mb", 0) + current.get("python_memory_mb", 0)
+        if total_memory > self.alert_threshold:
+            print(f"Memory alert: {total_memory:.1f}MB (threshold: {self.alert_threshold:.1f}MB)")
+            if step is not None:
+                print(f"   At training step: {step}")
+        return current
+    def get_peak_stats(self) -> Dict[str, float]:
+        """Get peak memory usage statistics."""
+        return self.peak_stats.copy()
+    def reset_peaks(self):
+        """Reset peak memory tracking."""
+        self.peak_stats.clear()

textpolicy 0.0.1__py3-none-any.whl → 0.1.0__py3-none-any.whl

textpolicy 0.0.1py3-none-any.whl → 0.1.0py3-none-any.whl