textpolicy 0.1.1__tar.gz → 0.1.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {textpolicy-0.1.1/textpolicy.egg-info → textpolicy-0.1.3}/PKG-INFO +3 -3
- {textpolicy-0.1.1 → textpolicy-0.1.3}/pyproject.toml +4 -4
- textpolicy-0.1.3/tests/test_amdahl_bottlenecks.py +723 -0
- textpolicy-0.1.3/tests/test_countdown.py +533 -0
- textpolicy-0.1.3/tests/test_countdown_baseline.py +467 -0
- textpolicy-0.1.3/tests/test_emergence_logger.py +516 -0
- textpolicy-0.1.3/tests/test_grpo_stability.py +808 -0
- textpolicy-0.1.3/tests/test_mlx_compatibility.py +776 -0
- textpolicy-0.1.3/tests/test_training_pipeline.py +240 -0
- {textpolicy-0.1.1 → textpolicy-0.1.3}/textpolicy/__init__.py +3 -0
- {textpolicy-0.1.1 → textpolicy-0.1.3}/textpolicy/algorithms/__init__.py +29 -4
- textpolicy-0.1.3/textpolicy/algorithms/grpo.py +1052 -0
- textpolicy-0.1.3/textpolicy/algorithms/length_shaping.py +151 -0
- textpolicy-0.1.3/textpolicy/analysis/__init__.py +23 -0
- textpolicy-0.1.3/textpolicy/analysis/emergence_logger.py +248 -0
- textpolicy-0.1.3/textpolicy/analysis/planning_patterns.py +105 -0
- textpolicy-0.1.3/textpolicy/analysis/serialization.py +65 -0
- {textpolicy-0.1.1 → textpolicy-0.1.3}/textpolicy/generation/mlx_generation.py +36 -21
- textpolicy-0.1.3/textpolicy/tasks/__init__.py +7 -0
- textpolicy-0.1.3/textpolicy/tasks/countdown/__init__.py +21 -0
- textpolicy-0.1.3/textpolicy/tasks/countdown/dataset.py +163 -0
- textpolicy-0.1.3/textpolicy/tasks/countdown/evaluator.py +197 -0
- textpolicy-0.1.3/textpolicy/tasks/countdown/prompt.py +89 -0
- textpolicy-0.1.3/textpolicy/tasks/countdown/reward.py +56 -0
- {textpolicy-0.1.1 → textpolicy-0.1.3}/textpolicy/training/trainer.py +41 -21
- {textpolicy-0.1.1 → textpolicy-0.1.3/textpolicy.egg-info}/PKG-INFO +3 -3
- {textpolicy-0.1.1 → textpolicy-0.1.3}/textpolicy.egg-info/SOURCES.txt +18 -0
- {textpolicy-0.1.1 → textpolicy-0.1.3}/textpolicy.egg-info/requires.txt +2 -2
- textpolicy-0.1.1/textpolicy/algorithms/grpo.py +0 -642
- {textpolicy-0.1.1 → textpolicy-0.1.3}/LICENSE +0 -0
- {textpolicy-0.1.1 → textpolicy-0.1.3}/README.md +0 -0
- {textpolicy-0.1.1 → textpolicy-0.1.3}/setup.cfg +0 -0
- {textpolicy-0.1.1 → textpolicy-0.1.3}/tests/test_gspo_verification.py +0 -0
- {textpolicy-0.1.1 → textpolicy-0.1.3}/tests/test_integration_e2e_training.py +0 -0
- {textpolicy-0.1.1 → textpolicy-0.1.3}/tests/test_issue_fixes.py +0 -0
- {textpolicy-0.1.1 → textpolicy-0.1.3}/tests/test_reward_signatures.py +0 -0
- {textpolicy-0.1.1 → textpolicy-0.1.3}/tests/test_rollout_rewards.py +0 -0
- {textpolicy-0.1.1 → textpolicy-0.1.3}/tests/test_runner_step_enforcement.py +0 -0
- {textpolicy-0.1.1 → textpolicy-0.1.3}/tests/test_validate_installation.py +0 -0
- {textpolicy-0.1.1 → textpolicy-0.1.3}/textpolicy/__main__.py +0 -0
- {textpolicy-0.1.1 → textpolicy-0.1.3}/textpolicy/algorithms/gspo.py +0 -0
- {textpolicy-0.1.1 → textpolicy-0.1.3}/textpolicy/buffer/__init__.py +0 -0
- {textpolicy-0.1.1 → textpolicy-0.1.3}/textpolicy/buffer/buffer.py +0 -0
- {textpolicy-0.1.1 → textpolicy-0.1.3}/textpolicy/buffer/episode.py +0 -0
- {textpolicy-0.1.1 → textpolicy-0.1.3}/textpolicy/buffer/sampling.py +0 -0
- {textpolicy-0.1.1 → textpolicy-0.1.3}/textpolicy/buffer/storage.py +0 -0
- {textpolicy-0.1.1 → textpolicy-0.1.3}/textpolicy/cli.py +0 -0
- {textpolicy-0.1.1 → textpolicy-0.1.3}/textpolicy/environment/__init__.py +0 -0
- {textpolicy-0.1.1 → textpolicy-0.1.3}/textpolicy/environment/base.py +0 -0
- {textpolicy-0.1.1 → textpolicy-0.1.3}/textpolicy/environment/environment.py +0 -0
- {textpolicy-0.1.1 → textpolicy-0.1.3}/textpolicy/environment/factory.py +0 -0
- {textpolicy-0.1.1 → textpolicy-0.1.3}/textpolicy/environment/gym.py +0 -0
- {textpolicy-0.1.1 → textpolicy-0.1.3}/textpolicy/environment/task_suites.py +0 -0
- {textpolicy-0.1.1 → textpolicy-0.1.3}/textpolicy/environment/text_generation.py +0 -0
- {textpolicy-0.1.1 → textpolicy-0.1.3}/textpolicy/environment/vectorized.py +0 -0
- {textpolicy-0.1.1 → textpolicy-0.1.3}/textpolicy/generation/__init__.py +0 -0
- {textpolicy-0.1.1 → textpolicy-0.1.3}/textpolicy/generation/lora.py +0 -0
- {textpolicy-0.1.1 → textpolicy-0.1.3}/textpolicy/generation/reload.py +0 -0
- {textpolicy-0.1.1 → textpolicy-0.1.3}/textpolicy/rewards/__init__.py +0 -0
- {textpolicy-0.1.1 → textpolicy-0.1.3}/textpolicy/rewards/adapters.py +0 -0
- {textpolicy-0.1.1 → textpolicy-0.1.3}/textpolicy/rewards/basic.py +0 -0
- {textpolicy-0.1.1 → textpolicy-0.1.3}/textpolicy/rewards/integrated_system.py +0 -0
- {textpolicy-0.1.1 → textpolicy-0.1.3}/textpolicy/rewards/mlx_batch_processor.py +0 -0
- {textpolicy-0.1.1 → textpolicy-0.1.3}/textpolicy/rewards/registry.py +0 -0
- {textpolicy-0.1.1 → textpolicy-0.1.3}/textpolicy/rewards/rollout_rewards.py +0 -0
- {textpolicy-0.1.1 → textpolicy-0.1.3}/textpolicy/rewards/verifiers.py +0 -0
- {textpolicy-0.1.1 → textpolicy-0.1.3}/textpolicy/rollout/__init__.py +0 -0
- {textpolicy-0.1.1 → textpolicy-0.1.3}/textpolicy/rollout/aggregator.py +0 -0
- {textpolicy-0.1.1 → textpolicy-0.1.3}/textpolicy/rollout/base.py +0 -0
- {textpolicy-0.1.1 → textpolicy-0.1.3}/textpolicy/rollout/rollout.py +0 -0
- {textpolicy-0.1.1 → textpolicy-0.1.3}/textpolicy/rollout/runner.py +0 -0
- {textpolicy-0.1.1 → textpolicy-0.1.3}/textpolicy/rollout/strategy.py +0 -0
- {textpolicy-0.1.1 → textpolicy-0.1.3}/textpolicy/rollout/worker.py +0 -0
- {textpolicy-0.1.1 → textpolicy-0.1.3}/textpolicy/training/__init__.py +0 -0
- {textpolicy-0.1.1 → textpolicy-0.1.3}/textpolicy/training/metrics.py +0 -0
- {textpolicy-0.1.1 → textpolicy-0.1.3}/textpolicy/training/rollout_manager.py +0 -0
- {textpolicy-0.1.1 → textpolicy-0.1.3}/textpolicy/utils/__init__.py +0 -0
- {textpolicy-0.1.1 → textpolicy-0.1.3}/textpolicy/utils/benchmarking.py +0 -0
- {textpolicy-0.1.1 → textpolicy-0.1.3}/textpolicy/utils/data.py +0 -0
- {textpolicy-0.1.1 → textpolicy-0.1.3}/textpolicy/utils/debug.py +0 -0
- {textpolicy-0.1.1 → textpolicy-0.1.3}/textpolicy/utils/environment.py +0 -0
- {textpolicy-0.1.1 → textpolicy-0.1.3}/textpolicy/utils/logging/__init__.py +0 -0
- {textpolicy-0.1.1 → textpolicy-0.1.3}/textpolicy/utils/logging/base.py +0 -0
- {textpolicy-0.1.1 → textpolicy-0.1.3}/textpolicy/utils/logging/console.py +0 -0
- {textpolicy-0.1.1 → textpolicy-0.1.3}/textpolicy/utils/logging/factory.py +0 -0
- {textpolicy-0.1.1 → textpolicy-0.1.3}/textpolicy/utils/logging/multi.py +0 -0
- {textpolicy-0.1.1 → textpolicy-0.1.3}/textpolicy/utils/logging/tensorboard.py +0 -0
- {textpolicy-0.1.1 → textpolicy-0.1.3}/textpolicy/utils/logging/wandb.py +0 -0
- {textpolicy-0.1.1 → textpolicy-0.1.3}/textpolicy/utils/memory.py +0 -0
- {textpolicy-0.1.1 → textpolicy-0.1.3}/textpolicy/utils/performance.py +0 -0
- {textpolicy-0.1.1 → textpolicy-0.1.3}/textpolicy/utils/timing.py +0 -0
- {textpolicy-0.1.1 → textpolicy-0.1.3}/textpolicy/validate.py +0 -0
- {textpolicy-0.1.1 → textpolicy-0.1.3}/textpolicy/validation/__init__.py +0 -0
- {textpolicy-0.1.1 → textpolicy-0.1.3}/textpolicy/validation/logprob_validation.py +0 -0
- {textpolicy-0.1.1 → textpolicy-0.1.3}/textpolicy.egg-info/dependency_links.txt +0 -0
- {textpolicy-0.1.1 → textpolicy-0.1.3}/textpolicy.egg-info/entry_points.txt +0 -0
- {textpolicy-0.1.1 → textpolicy-0.1.3}/textpolicy.egg-info/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: textpolicy
|
|
3
|
-
Version: 0.1.
|
|
3
|
+
Version: 0.1.3
|
|
4
4
|
Summary: Reinforcement learning for text generation on MLX (Apple Silicon): GRPO/GSPO, environments, rollout, rewards, LoRA/QLoRA
|
|
5
5
|
Project-URL: Homepage, https://github.com/teilomillet/textpolicy
|
|
6
6
|
Project-URL: Repository, https://github.com/teilomillet/textpolicy
|
|
@@ -16,8 +16,8 @@ Requires-Python: >=3.12
|
|
|
16
16
|
Description-Content-Type: text/markdown
|
|
17
17
|
License-File: LICENSE
|
|
18
18
|
Requires-Dist: numpy>=2.3.2
|
|
19
|
-
Requires-Dist: mlx>=0.
|
|
20
|
-
Requires-Dist: mlx-lm>=0.
|
|
19
|
+
Requires-Dist: mlx>=0.22.0
|
|
20
|
+
Requires-Dist: mlx-lm>=0.22.0
|
|
21
21
|
Requires-Dist: gymnasium>=0.29.0
|
|
22
22
|
Requires-Dist: psutil>=7.0.0
|
|
23
23
|
Requires-Dist: wandb>=0.21.1
|
|
@@ -7,15 +7,15 @@ include = ["textpolicy*"]
|
|
|
7
7
|
|
|
8
8
|
[project]
|
|
9
9
|
name = "textpolicy"
|
|
10
|
-
version = "0.1.
|
|
10
|
+
version = "0.1.3"
|
|
11
11
|
description = "Reinforcement learning for text generation on MLX (Apple Silicon): GRPO/GSPO, environments, rollout, rewards, LoRA/QLoRA"
|
|
12
12
|
readme = "README.md"
|
|
13
13
|
requires-python = ">=3.12"
|
|
14
14
|
dependencies = [
|
|
15
15
|
"numpy>=2.3.2",
|
|
16
|
-
"mlx>=0.
|
|
17
|
-
"mlx-lm>=0.
|
|
18
|
-
"gymnasium>=0.29.0",
|
|
16
|
+
"mlx>=0.22.0", # Core MLX framework for Apple Silicon acceleration (tested up to 0.30.5)
|
|
17
|
+
"mlx-lm>=0.22.0", # MLX language models for inference (tested up to 0.30.6)
|
|
18
|
+
"gymnasium>=0.29.0",
|
|
19
19
|
"psutil>=7.0.0",
|
|
20
20
|
"wandb>=0.21.1",
|
|
21
21
|
"aiohttp>=3.12.15",
|