PyPI - textpolicy - Versions diffs - 0.0.1__py3-none-any.whl → 0.1.1__py3-none-any.whl - Mend

textpolicy 0.0.1py3-none-any.whl → 0.1.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (68) hide show

textpolicy/__init__.py +53 -0
textpolicy/__main__.py +8 -0
textpolicy/algorithms/__init__.py +54 -0
textpolicy/algorithms/grpo.py +642 -0
textpolicy/algorithms/gspo.py +582 -0
textpolicy/buffer/__init__.py +23 -0
textpolicy/buffer/buffer.py +244 -0
textpolicy/buffer/episode.py +383 -0
textpolicy/buffer/sampling.py +438 -0
textpolicy/buffer/storage.py +255 -0
textpolicy/cli.py +67 -0
textpolicy/environment/__init__.py +79 -0
textpolicy/environment/base.py +110 -0
textpolicy/environment/environment.py +46 -0
textpolicy/environment/factory.py +103 -0
textpolicy/environment/gym.py +106 -0
textpolicy/environment/task_suites.py +51 -0
textpolicy/environment/text_generation.py +797 -0
textpolicy/environment/vectorized.py +253 -0
textpolicy/generation/__init__.py +62 -0
textpolicy/generation/lora.py +411 -0
textpolicy/generation/mlx_generation.py +557 -0
textpolicy/generation/reload.py +253 -0
textpolicy/rewards/__init__.py +137 -0
textpolicy/rewards/adapters.py +387 -0
textpolicy/rewards/basic.py +214 -0
textpolicy/rewards/integrated_system.py +338 -0
textpolicy/rewards/mlx_batch_processor.py +447 -0
textpolicy/rewards/registry.py +293 -0
textpolicy/rewards/rollout_rewards.py +410 -0
textpolicy/rewards/verifiers.py +369 -0
textpolicy/rollout/__init__.py +44 -0
textpolicy/rollout/aggregator.py +145 -0
textpolicy/rollout/base.py +108 -0
textpolicy/rollout/rollout.py +142 -0
textpolicy/rollout/runner.py +280 -0
textpolicy/rollout/strategy.py +208 -0
textpolicy/rollout/worker.py +194 -0
textpolicy/training/__init__.py +14 -0
textpolicy/training/metrics.py +242 -0
textpolicy/training/rollout_manager.py +78 -0
textpolicy/training/trainer.py +684 -0
textpolicy/utils/__init__.py +40 -0
textpolicy/utils/benchmarking.py +489 -0
textpolicy/utils/data.py +60 -0
textpolicy/utils/debug.py +170 -0
textpolicy/utils/environment.py +349 -0
textpolicy/utils/logging/__init__.py +22 -0
textpolicy/utils/logging/base.py +48 -0
textpolicy/utils/logging/console.py +61 -0
textpolicy/utils/logging/factory.py +133 -0
textpolicy/utils/logging/multi.py +83 -0
textpolicy/utils/logging/tensorboard.py +65 -0
textpolicy/utils/logging/wandb.py +72 -0
textpolicy/utils/memory.py +118 -0
textpolicy/utils/performance.py +464 -0
textpolicy/utils/timing.py +171 -0
textpolicy/validate.py +101 -0
textpolicy/validation/__init__.py +13 -0
textpolicy/validation/logprob_validation.py +315 -0
textpolicy-0.1.1.dist-info/METADATA +109 -0
textpolicy-0.1.1.dist-info/RECORD +66 -0
{textpolicy-0.0.1.dist-info → textpolicy-0.1.1.dist-info}/WHEEL +1 -1
textpolicy-0.1.1.dist-info/entry_points.txt +2 -0
textpolicy-0.0.1.dist-info/METADATA +0 -10
textpolicy-0.0.1.dist-info/RECORD +0 -6
{textpolicy-0.0.1.dist-info → textpolicy-0.1.1.dist-info}/licenses/LICENSE +0 -0
{textpolicy-0.0.1.dist-info → textpolicy-0.1.1.dist-info}/top_level.txt +0 -0

textpolicy/__init__.py CHANGED Viewed

@@ -0,0 +1,53 @@
+"""
+TextPolicy: RL library for text generation with MLX.
+This module exposes the public API entry points for algorithms,
+training, generation, environment, and rewards.
+"""
+# Submodule imports for building the public API
+from . import algorithms, generation, training
+# Export RL algorithms as defined in textpolicy.algorithms.__all__
+from .algorithms import *  # noqa: F403,F401
+# Export text generation utilities (load_model, generate_tokens, etc.)
+from .generation import *  # noqa: F403,F401
+# Export training components (Trainer, RolloutManager, TrainingMetrics)
+from .training import *  # noqa: F403,F401
+# Export environment components and factory functions
+from .environment import (
+    TextGenerationEnvironment,
+    TextGenerationEnv,
+    create_text_generation_test_env,
+    validate_learning_progress,
+)
+# Export installation validation utilities
+from .validate import validate_installation
+# Export core reward functions and the reward decorator
+from .rewards.basic import length_reward, keyword_reward, perplexity_reward, accuracy_reward
+from .rewards.registry import reward, verifier
+# Build __all__ combining submodule __all__ lists and additional symbols
+__all__ = (
+    algorithms.__all__
+    + generation.__all__
+    + training.__all__
+    + [
+        "TextGenerationEnvironment",
+        "TextGenerationEnv",
+        "create_text_generation_test_env",
+        "validate_learning_progress",
+        "validate_installation",
+        "length_reward",
+        "keyword_reward",
+        "perplexity_reward",
+        "accuracy_reward",
+        "reward",
+        "verifier",
+    ]
+)

textpolicy/__main__.py ADDED Viewed

@@ -0,0 +1,8 @@
+"""Allow `python -m textpolicy` to invoke the CLI."""
+from .cli import main
+if __name__ == "__main__":
+    raise SystemExit(main())

textpolicy/algorithms/__init__.py ADDED Viewed

@@ -0,0 +1,54 @@
+# textpolicy/algorithms/__init__.py
+"""
+Reinforcement learning algorithms for MLX and Apple Silicon.
+GRPO: group-relative advantages with PPO-style clipping.
+GSPO: sequence-level importance sampling (sequence, token, and hybrid variants).
+"""
+from .grpo import (
+    compute_advantages,
+    compute_advantages_dr_grpo,
+    policy_loss,
+    grpo_loss,
+    compute_metrics,
+    entropy_bonus,
+    select_all_data,
+    select_recent_data
+)
+from .gspo import (
+    create_gspo_policy_loss,
+    create_gspo_metrics,
+    policy_loss_sequence,
+    policy_loss_hybrid,
+    create_policy_loss_hybrid,
+    policy_loss_token,
+    compute_metrics_sequence,
+    compute_metrics_hybrid,
+    compute_metrics_token,
+    select_gspo_data
+)
+__all__ = [
+    # GRPO functions
+    "compute_advantages",
+    "compute_advantages_dr_grpo",
+    "policy_loss",
+    "grpo_loss",
+    "compute_metrics",
+    "entropy_bonus",
+    "select_all_data",
+    "select_recent_data",
+    # GSPO functions
+    "create_gspo_policy_loss",
+    "create_gspo_metrics",
+    "policy_loss_sequence",
+    "policy_loss_hybrid",
+    "create_policy_loss_hybrid",
+    "policy_loss_token",
+    "compute_metrics_sequence",
+    "compute_metrics_hybrid",
+    "compute_metrics_token",
+    "select_gspo_data"
+]

textpolicy 0.0.1__py3-none-any.whl → 0.1.1__py3-none-any.whl

textpolicy 0.0.1py3-none-any.whl → 0.1.1py3-none-any.whl