textpolicy 0.1.1__tar.gz → 0.1.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (97) hide show
  1. {textpolicy-0.1.1/textpolicy.egg-info → textpolicy-0.1.3}/PKG-INFO +3 -3
  2. {textpolicy-0.1.1 → textpolicy-0.1.3}/pyproject.toml +4 -4
  3. textpolicy-0.1.3/tests/test_amdahl_bottlenecks.py +723 -0
  4. textpolicy-0.1.3/tests/test_countdown.py +533 -0
  5. textpolicy-0.1.3/tests/test_countdown_baseline.py +467 -0
  6. textpolicy-0.1.3/tests/test_emergence_logger.py +516 -0
  7. textpolicy-0.1.3/tests/test_grpo_stability.py +808 -0
  8. textpolicy-0.1.3/tests/test_mlx_compatibility.py +776 -0
  9. textpolicy-0.1.3/tests/test_training_pipeline.py +240 -0
  10. {textpolicy-0.1.1 → textpolicy-0.1.3}/textpolicy/__init__.py +3 -0
  11. {textpolicy-0.1.1 → textpolicy-0.1.3}/textpolicy/algorithms/__init__.py +29 -4
  12. textpolicy-0.1.3/textpolicy/algorithms/grpo.py +1052 -0
  13. textpolicy-0.1.3/textpolicy/algorithms/length_shaping.py +151 -0
  14. textpolicy-0.1.3/textpolicy/analysis/__init__.py +23 -0
  15. textpolicy-0.1.3/textpolicy/analysis/emergence_logger.py +248 -0
  16. textpolicy-0.1.3/textpolicy/analysis/planning_patterns.py +105 -0
  17. textpolicy-0.1.3/textpolicy/analysis/serialization.py +65 -0
  18. {textpolicy-0.1.1 → textpolicy-0.1.3}/textpolicy/generation/mlx_generation.py +36 -21
  19. textpolicy-0.1.3/textpolicy/tasks/__init__.py +7 -0
  20. textpolicy-0.1.3/textpolicy/tasks/countdown/__init__.py +21 -0
  21. textpolicy-0.1.3/textpolicy/tasks/countdown/dataset.py +163 -0
  22. textpolicy-0.1.3/textpolicy/tasks/countdown/evaluator.py +197 -0
  23. textpolicy-0.1.3/textpolicy/tasks/countdown/prompt.py +89 -0
  24. textpolicy-0.1.3/textpolicy/tasks/countdown/reward.py +56 -0
  25. {textpolicy-0.1.1 → textpolicy-0.1.3}/textpolicy/training/trainer.py +41 -21
  26. {textpolicy-0.1.1 → textpolicy-0.1.3/textpolicy.egg-info}/PKG-INFO +3 -3
  27. {textpolicy-0.1.1 → textpolicy-0.1.3}/textpolicy.egg-info/SOURCES.txt +18 -0
  28. {textpolicy-0.1.1 → textpolicy-0.1.3}/textpolicy.egg-info/requires.txt +2 -2
  29. textpolicy-0.1.1/textpolicy/algorithms/grpo.py +0 -642
  30. {textpolicy-0.1.1 → textpolicy-0.1.3}/LICENSE +0 -0
  31. {textpolicy-0.1.1 → textpolicy-0.1.3}/README.md +0 -0
  32. {textpolicy-0.1.1 → textpolicy-0.1.3}/setup.cfg +0 -0
  33. {textpolicy-0.1.1 → textpolicy-0.1.3}/tests/test_gspo_verification.py +0 -0
  34. {textpolicy-0.1.1 → textpolicy-0.1.3}/tests/test_integration_e2e_training.py +0 -0
  35. {textpolicy-0.1.1 → textpolicy-0.1.3}/tests/test_issue_fixes.py +0 -0
  36. {textpolicy-0.1.1 → textpolicy-0.1.3}/tests/test_reward_signatures.py +0 -0
  37. {textpolicy-0.1.1 → textpolicy-0.1.3}/tests/test_rollout_rewards.py +0 -0
  38. {textpolicy-0.1.1 → textpolicy-0.1.3}/tests/test_runner_step_enforcement.py +0 -0
  39. {textpolicy-0.1.1 → textpolicy-0.1.3}/tests/test_validate_installation.py +0 -0
  40. {textpolicy-0.1.1 → textpolicy-0.1.3}/textpolicy/__main__.py +0 -0
  41. {textpolicy-0.1.1 → textpolicy-0.1.3}/textpolicy/algorithms/gspo.py +0 -0
  42. {textpolicy-0.1.1 → textpolicy-0.1.3}/textpolicy/buffer/__init__.py +0 -0
  43. {textpolicy-0.1.1 → textpolicy-0.1.3}/textpolicy/buffer/buffer.py +0 -0
  44. {textpolicy-0.1.1 → textpolicy-0.1.3}/textpolicy/buffer/episode.py +0 -0
  45. {textpolicy-0.1.1 → textpolicy-0.1.3}/textpolicy/buffer/sampling.py +0 -0
  46. {textpolicy-0.1.1 → textpolicy-0.1.3}/textpolicy/buffer/storage.py +0 -0
  47. {textpolicy-0.1.1 → textpolicy-0.1.3}/textpolicy/cli.py +0 -0
  48. {textpolicy-0.1.1 → textpolicy-0.1.3}/textpolicy/environment/__init__.py +0 -0
  49. {textpolicy-0.1.1 → textpolicy-0.1.3}/textpolicy/environment/base.py +0 -0
  50. {textpolicy-0.1.1 → textpolicy-0.1.3}/textpolicy/environment/environment.py +0 -0
  51. {textpolicy-0.1.1 → textpolicy-0.1.3}/textpolicy/environment/factory.py +0 -0
  52. {textpolicy-0.1.1 → textpolicy-0.1.3}/textpolicy/environment/gym.py +0 -0
  53. {textpolicy-0.1.1 → textpolicy-0.1.3}/textpolicy/environment/task_suites.py +0 -0
  54. {textpolicy-0.1.1 → textpolicy-0.1.3}/textpolicy/environment/text_generation.py +0 -0
  55. {textpolicy-0.1.1 → textpolicy-0.1.3}/textpolicy/environment/vectorized.py +0 -0
  56. {textpolicy-0.1.1 → textpolicy-0.1.3}/textpolicy/generation/__init__.py +0 -0
  57. {textpolicy-0.1.1 → textpolicy-0.1.3}/textpolicy/generation/lora.py +0 -0
  58. {textpolicy-0.1.1 → textpolicy-0.1.3}/textpolicy/generation/reload.py +0 -0
  59. {textpolicy-0.1.1 → textpolicy-0.1.3}/textpolicy/rewards/__init__.py +0 -0
  60. {textpolicy-0.1.1 → textpolicy-0.1.3}/textpolicy/rewards/adapters.py +0 -0
  61. {textpolicy-0.1.1 → textpolicy-0.1.3}/textpolicy/rewards/basic.py +0 -0
  62. {textpolicy-0.1.1 → textpolicy-0.1.3}/textpolicy/rewards/integrated_system.py +0 -0
  63. {textpolicy-0.1.1 → textpolicy-0.1.3}/textpolicy/rewards/mlx_batch_processor.py +0 -0
  64. {textpolicy-0.1.1 → textpolicy-0.1.3}/textpolicy/rewards/registry.py +0 -0
  65. {textpolicy-0.1.1 → textpolicy-0.1.3}/textpolicy/rewards/rollout_rewards.py +0 -0
  66. {textpolicy-0.1.1 → textpolicy-0.1.3}/textpolicy/rewards/verifiers.py +0 -0
  67. {textpolicy-0.1.1 → textpolicy-0.1.3}/textpolicy/rollout/__init__.py +0 -0
  68. {textpolicy-0.1.1 → textpolicy-0.1.3}/textpolicy/rollout/aggregator.py +0 -0
  69. {textpolicy-0.1.1 → textpolicy-0.1.3}/textpolicy/rollout/base.py +0 -0
  70. {textpolicy-0.1.1 → textpolicy-0.1.3}/textpolicy/rollout/rollout.py +0 -0
  71. {textpolicy-0.1.1 → textpolicy-0.1.3}/textpolicy/rollout/runner.py +0 -0
  72. {textpolicy-0.1.1 → textpolicy-0.1.3}/textpolicy/rollout/strategy.py +0 -0
  73. {textpolicy-0.1.1 → textpolicy-0.1.3}/textpolicy/rollout/worker.py +0 -0
  74. {textpolicy-0.1.1 → textpolicy-0.1.3}/textpolicy/training/__init__.py +0 -0
  75. {textpolicy-0.1.1 → textpolicy-0.1.3}/textpolicy/training/metrics.py +0 -0
  76. {textpolicy-0.1.1 → textpolicy-0.1.3}/textpolicy/training/rollout_manager.py +0 -0
  77. {textpolicy-0.1.1 → textpolicy-0.1.3}/textpolicy/utils/__init__.py +0 -0
  78. {textpolicy-0.1.1 → textpolicy-0.1.3}/textpolicy/utils/benchmarking.py +0 -0
  79. {textpolicy-0.1.1 → textpolicy-0.1.3}/textpolicy/utils/data.py +0 -0
  80. {textpolicy-0.1.1 → textpolicy-0.1.3}/textpolicy/utils/debug.py +0 -0
  81. {textpolicy-0.1.1 → textpolicy-0.1.3}/textpolicy/utils/environment.py +0 -0
  82. {textpolicy-0.1.1 → textpolicy-0.1.3}/textpolicy/utils/logging/__init__.py +0 -0
  83. {textpolicy-0.1.1 → textpolicy-0.1.3}/textpolicy/utils/logging/base.py +0 -0
  84. {textpolicy-0.1.1 → textpolicy-0.1.3}/textpolicy/utils/logging/console.py +0 -0
  85. {textpolicy-0.1.1 → textpolicy-0.1.3}/textpolicy/utils/logging/factory.py +0 -0
  86. {textpolicy-0.1.1 → textpolicy-0.1.3}/textpolicy/utils/logging/multi.py +0 -0
  87. {textpolicy-0.1.1 → textpolicy-0.1.3}/textpolicy/utils/logging/tensorboard.py +0 -0
  88. {textpolicy-0.1.1 → textpolicy-0.1.3}/textpolicy/utils/logging/wandb.py +0 -0
  89. {textpolicy-0.1.1 → textpolicy-0.1.3}/textpolicy/utils/memory.py +0 -0
  90. {textpolicy-0.1.1 → textpolicy-0.1.3}/textpolicy/utils/performance.py +0 -0
  91. {textpolicy-0.1.1 → textpolicy-0.1.3}/textpolicy/utils/timing.py +0 -0
  92. {textpolicy-0.1.1 → textpolicy-0.1.3}/textpolicy/validate.py +0 -0
  93. {textpolicy-0.1.1 → textpolicy-0.1.3}/textpolicy/validation/__init__.py +0 -0
  94. {textpolicy-0.1.1 → textpolicy-0.1.3}/textpolicy/validation/logprob_validation.py +0 -0
  95. {textpolicy-0.1.1 → textpolicy-0.1.3}/textpolicy.egg-info/dependency_links.txt +0 -0
  96. {textpolicy-0.1.1 → textpolicy-0.1.3}/textpolicy.egg-info/entry_points.txt +0 -0
  97. {textpolicy-0.1.1 → textpolicy-0.1.3}/textpolicy.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: textpolicy
3
- Version: 0.1.1
3
+ Version: 0.1.3
4
4
  Summary: Reinforcement learning for text generation on MLX (Apple Silicon): GRPO/GSPO, environments, rollout, rewards, LoRA/QLoRA
5
5
  Project-URL: Homepage, https://github.com/teilomillet/textpolicy
6
6
  Project-URL: Repository, https://github.com/teilomillet/textpolicy
@@ -16,8 +16,8 @@ Requires-Python: >=3.12
16
16
  Description-Content-Type: text/markdown
17
17
  License-File: LICENSE
18
18
  Requires-Dist: numpy>=2.3.2
19
- Requires-Dist: mlx>=0.21.0
20
- Requires-Dist: mlx-lm>=0.21.0
19
+ Requires-Dist: mlx>=0.22.0
20
+ Requires-Dist: mlx-lm>=0.22.0
21
21
  Requires-Dist: gymnasium>=0.29.0
22
22
  Requires-Dist: psutil>=7.0.0
23
23
  Requires-Dist: wandb>=0.21.1
@@ -7,15 +7,15 @@ include = ["textpolicy*"]
7
7
 
8
8
  [project]
9
9
  name = "textpolicy"
10
- version = "0.1.1"
10
+ version = "0.1.3"
11
11
  description = "Reinforcement learning for text generation on MLX (Apple Silicon): GRPO/GSPO, environments, rollout, rewards, LoRA/QLoRA"
12
12
  readme = "README.md"
13
13
  requires-python = ">=3.12"
14
14
  dependencies = [
15
15
  "numpy>=2.3.2",
16
- "mlx>=0.21.0", # Core MLX framework for Apple Silicon acceleration
17
- "mlx-lm>=0.21.0", # MLX language models for inference
18
- "gymnasium>=0.29.0",
16
+ "mlx>=0.22.0", # Core MLX framework for Apple Silicon acceleration (tested up to 0.30.5)
17
+ "mlx-lm>=0.22.0", # MLX language models for inference (tested up to 0.30.6)
18
+ "gymnasium>=0.29.0",
19
19
  "psutil>=7.0.0",
20
20
  "wandb>=0.21.1",
21
21
  "aiohttp>=3.12.15",