textpolicy 0.1.0__tar.gz → 0.1.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (79) hide show
  1. {textpolicy-0.1.0/textpolicy.egg-info → textpolicy-0.1.1}/PKG-INFO +12 -2
  2. textpolicy-0.1.1/pyproject.toml +56 -0
  3. textpolicy-0.1.1/tests/test_issue_fixes.py +218 -0
  4. {textpolicy-0.1.0 → textpolicy-0.1.1}/textpolicy/__init__.py +2 -1
  5. {textpolicy-0.1.0 → textpolicy-0.1.1}/textpolicy/environment/text_generation.py +12 -4
  6. {textpolicy-0.1.0 → textpolicy-0.1.1/textpolicy.egg-info}/PKG-INFO +12 -2
  7. {textpolicy-0.1.0 → textpolicy-0.1.1}/textpolicy.egg-info/SOURCES.txt +1 -0
  8. textpolicy-0.1.0/pyproject.toml +0 -30
  9. {textpolicy-0.1.0 → textpolicy-0.1.1}/LICENSE +0 -0
  10. {textpolicy-0.1.0 → textpolicy-0.1.1}/README.md +0 -0
  11. {textpolicy-0.1.0 → textpolicy-0.1.1}/setup.cfg +0 -0
  12. {textpolicy-0.1.0 → textpolicy-0.1.1}/tests/test_gspo_verification.py +0 -0
  13. {textpolicy-0.1.0 → textpolicy-0.1.1}/tests/test_integration_e2e_training.py +0 -0
  14. {textpolicy-0.1.0 → textpolicy-0.1.1}/tests/test_reward_signatures.py +0 -0
  15. {textpolicy-0.1.0 → textpolicy-0.1.1}/tests/test_rollout_rewards.py +0 -0
  16. {textpolicy-0.1.0 → textpolicy-0.1.1}/tests/test_runner_step_enforcement.py +0 -0
  17. {textpolicy-0.1.0 → textpolicy-0.1.1}/tests/test_validate_installation.py +0 -0
  18. {textpolicy-0.1.0 → textpolicy-0.1.1}/textpolicy/__main__.py +0 -0
  19. {textpolicy-0.1.0 → textpolicy-0.1.1}/textpolicy/algorithms/__init__.py +0 -0
  20. {textpolicy-0.1.0 → textpolicy-0.1.1}/textpolicy/algorithms/grpo.py +0 -0
  21. {textpolicy-0.1.0 → textpolicy-0.1.1}/textpolicy/algorithms/gspo.py +0 -0
  22. {textpolicy-0.1.0 → textpolicy-0.1.1}/textpolicy/buffer/__init__.py +0 -0
  23. {textpolicy-0.1.0 → textpolicy-0.1.1}/textpolicy/buffer/buffer.py +0 -0
  24. {textpolicy-0.1.0 → textpolicy-0.1.1}/textpolicy/buffer/episode.py +0 -0
  25. {textpolicy-0.1.0 → textpolicy-0.1.1}/textpolicy/buffer/sampling.py +0 -0
  26. {textpolicy-0.1.0 → textpolicy-0.1.1}/textpolicy/buffer/storage.py +0 -0
  27. {textpolicy-0.1.0 → textpolicy-0.1.1}/textpolicy/cli.py +0 -0
  28. {textpolicy-0.1.0 → textpolicy-0.1.1}/textpolicy/environment/__init__.py +0 -0
  29. {textpolicy-0.1.0 → textpolicy-0.1.1}/textpolicy/environment/base.py +0 -0
  30. {textpolicy-0.1.0 → textpolicy-0.1.1}/textpolicy/environment/environment.py +0 -0
  31. {textpolicy-0.1.0 → textpolicy-0.1.1}/textpolicy/environment/factory.py +0 -0
  32. {textpolicy-0.1.0 → textpolicy-0.1.1}/textpolicy/environment/gym.py +0 -0
  33. {textpolicy-0.1.0 → textpolicy-0.1.1}/textpolicy/environment/task_suites.py +0 -0
  34. {textpolicy-0.1.0 → textpolicy-0.1.1}/textpolicy/environment/vectorized.py +0 -0
  35. {textpolicy-0.1.0 → textpolicy-0.1.1}/textpolicy/generation/__init__.py +0 -0
  36. {textpolicy-0.1.0 → textpolicy-0.1.1}/textpolicy/generation/lora.py +0 -0
  37. {textpolicy-0.1.0 → textpolicy-0.1.1}/textpolicy/generation/mlx_generation.py +0 -0
  38. {textpolicy-0.1.0 → textpolicy-0.1.1}/textpolicy/generation/reload.py +0 -0
  39. {textpolicy-0.1.0 → textpolicy-0.1.1}/textpolicy/rewards/__init__.py +0 -0
  40. {textpolicy-0.1.0 → textpolicy-0.1.1}/textpolicy/rewards/adapters.py +0 -0
  41. {textpolicy-0.1.0 → textpolicy-0.1.1}/textpolicy/rewards/basic.py +0 -0
  42. {textpolicy-0.1.0 → textpolicy-0.1.1}/textpolicy/rewards/integrated_system.py +0 -0
  43. {textpolicy-0.1.0 → textpolicy-0.1.1}/textpolicy/rewards/mlx_batch_processor.py +0 -0
  44. {textpolicy-0.1.0 → textpolicy-0.1.1}/textpolicy/rewards/registry.py +0 -0
  45. {textpolicy-0.1.0 → textpolicy-0.1.1}/textpolicy/rewards/rollout_rewards.py +0 -0
  46. {textpolicy-0.1.0 → textpolicy-0.1.1}/textpolicy/rewards/verifiers.py +0 -0
  47. {textpolicy-0.1.0 → textpolicy-0.1.1}/textpolicy/rollout/__init__.py +0 -0
  48. {textpolicy-0.1.0 → textpolicy-0.1.1}/textpolicy/rollout/aggregator.py +0 -0
  49. {textpolicy-0.1.0 → textpolicy-0.1.1}/textpolicy/rollout/base.py +0 -0
  50. {textpolicy-0.1.0 → textpolicy-0.1.1}/textpolicy/rollout/rollout.py +0 -0
  51. {textpolicy-0.1.0 → textpolicy-0.1.1}/textpolicy/rollout/runner.py +0 -0
  52. {textpolicy-0.1.0 → textpolicy-0.1.1}/textpolicy/rollout/strategy.py +0 -0
  53. {textpolicy-0.1.0 → textpolicy-0.1.1}/textpolicy/rollout/worker.py +0 -0
  54. {textpolicy-0.1.0 → textpolicy-0.1.1}/textpolicy/training/__init__.py +0 -0
  55. {textpolicy-0.1.0 → textpolicy-0.1.1}/textpolicy/training/metrics.py +0 -0
  56. {textpolicy-0.1.0 → textpolicy-0.1.1}/textpolicy/training/rollout_manager.py +0 -0
  57. {textpolicy-0.1.0 → textpolicy-0.1.1}/textpolicy/training/trainer.py +0 -0
  58. {textpolicy-0.1.0 → textpolicy-0.1.1}/textpolicy/utils/__init__.py +0 -0
  59. {textpolicy-0.1.0 → textpolicy-0.1.1}/textpolicy/utils/benchmarking.py +0 -0
  60. {textpolicy-0.1.0 → textpolicy-0.1.1}/textpolicy/utils/data.py +0 -0
  61. {textpolicy-0.1.0 → textpolicy-0.1.1}/textpolicy/utils/debug.py +0 -0
  62. {textpolicy-0.1.0 → textpolicy-0.1.1}/textpolicy/utils/environment.py +0 -0
  63. {textpolicy-0.1.0 → textpolicy-0.1.1}/textpolicy/utils/logging/__init__.py +0 -0
  64. {textpolicy-0.1.0 → textpolicy-0.1.1}/textpolicy/utils/logging/base.py +0 -0
  65. {textpolicy-0.1.0 → textpolicy-0.1.1}/textpolicy/utils/logging/console.py +0 -0
  66. {textpolicy-0.1.0 → textpolicy-0.1.1}/textpolicy/utils/logging/factory.py +0 -0
  67. {textpolicy-0.1.0 → textpolicy-0.1.1}/textpolicy/utils/logging/multi.py +0 -0
  68. {textpolicy-0.1.0 → textpolicy-0.1.1}/textpolicy/utils/logging/tensorboard.py +0 -0
  69. {textpolicy-0.1.0 → textpolicy-0.1.1}/textpolicy/utils/logging/wandb.py +0 -0
  70. {textpolicy-0.1.0 → textpolicy-0.1.1}/textpolicy/utils/memory.py +0 -0
  71. {textpolicy-0.1.0 → textpolicy-0.1.1}/textpolicy/utils/performance.py +0 -0
  72. {textpolicy-0.1.0 → textpolicy-0.1.1}/textpolicy/utils/timing.py +0 -0
  73. {textpolicy-0.1.0 → textpolicy-0.1.1}/textpolicy/validate.py +0 -0
  74. {textpolicy-0.1.0 → textpolicy-0.1.1}/textpolicy/validation/__init__.py +0 -0
  75. {textpolicy-0.1.0 → textpolicy-0.1.1}/textpolicy/validation/logprob_validation.py +0 -0
  76. {textpolicy-0.1.0 → textpolicy-0.1.1}/textpolicy.egg-info/dependency_links.txt +0 -0
  77. {textpolicy-0.1.0 → textpolicy-0.1.1}/textpolicy.egg-info/entry_points.txt +0 -0
  78. {textpolicy-0.1.0 → textpolicy-0.1.1}/textpolicy.egg-info/requires.txt +0 -0
  79. {textpolicy-0.1.0 → textpolicy-0.1.1}/textpolicy.egg-info/top_level.txt +0 -0
@@ -1,7 +1,17 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: textpolicy
3
- Version: 0.1.0
4
- Summary: MLX-optimized reward and verification system for text generation RL
3
+ Version: 0.1.1
4
+ Summary: Reinforcement learning for text generation on MLX (Apple Silicon): GRPO/GSPO, environments, rollout, rewards, LoRA/QLoRA
5
+ Project-URL: Homepage, https://github.com/teilomillet/textpolicy
6
+ Project-URL: Repository, https://github.com/teilomillet/textpolicy
7
+ Project-URL: Documentation, https://github.com/teilomillet/textpolicy#readme
8
+ Project-URL: Changelog, https://github.com/teilomillet/textpolicy/blob/main/CHANGELOG.md
9
+ Keywords: reinforcement-learning,text-generation,mlx,apple-silicon,lora,qlora,grpo,gspo,rlhf
10
+ Classifier: Programming Language :: Python :: 3
11
+ Classifier: Operating System :: MacOS
12
+ Classifier: Intended Audience :: Developers
13
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
14
+ Classifier: License :: OSI Approved :: MIT License
5
15
  Requires-Python: >=3.12
6
16
  Description-Content-Type: text/markdown
7
17
  License-File: LICENSE
@@ -0,0 +1,56 @@
1
+ [build-system]
2
+ requires = ["setuptools>=61.0", "wheel"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [tool.setuptools.packages.find]
6
+ include = ["textpolicy*"]
7
+
8
+ [project]
9
+ name = "textpolicy"
10
+ version = "0.1.1"
11
+ description = "Reinforcement learning for text generation on MLX (Apple Silicon): GRPO/GSPO, environments, rollout, rewards, LoRA/QLoRA"
12
+ readme = "README.md"
13
+ requires-python = ">=3.12"
14
+ dependencies = [
15
+ "numpy>=2.3.2",
16
+ "mlx>=0.21.0", # Core MLX framework for Apple Silicon acceleration
17
+ "mlx-lm>=0.21.0", # MLX language models for inference
18
+ "gymnasium>=0.29.0",
19
+ "psutil>=7.0.0",
20
+ "wandb>=0.21.1",
21
+ "aiohttp>=3.12.15",
22
+ "pytest>=8.4.1",
23
+ ]
24
+
25
+ keywords = [
26
+ "reinforcement-learning",
27
+ "text-generation",
28
+ "mlx",
29
+ "apple-silicon",
30
+ "lora",
31
+ "qlora",
32
+ "grpo",
33
+ "gspo",
34
+ "rlhf",
35
+ ]
36
+
37
+ classifiers = [
38
+ "Programming Language :: Python :: 3",
39
+ "Operating System :: MacOS",
40
+ "Intended Audience :: Developers",
41
+ "Topic :: Scientific/Engineering :: Artificial Intelligence",
42
+ "License :: OSI Approved :: MIT License",
43
+ ]
44
+
45
+ [project.urls]
46
+ Homepage = "https://github.com/teilomillet/textpolicy"
47
+ Repository = "https://github.com/teilomillet/textpolicy"
48
+ Documentation = "https://github.com/teilomillet/textpolicy#readme"
49
+ Changelog = "https://github.com/teilomillet/textpolicy/blob/main/CHANGELOG.md"
50
+
51
+ [project.scripts]
52
+ textpolicy = "textpolicy.cli:main"
53
+
54
+ [project.optional-dependencies]
55
+ external = ["aiohttp>=3.8.0", "pydantic>=2.0.0"]
56
+ dev = ["pytest>=7.0.0", "black>=22.0.0", "ruff>=0.1.0"]
@@ -0,0 +1,218 @@
1
+ """
2
+ Tests for GitHub Issues #2 and #3 fixes.
3
+
4
+ Issue #2: TextGenerationEnv cannot pass example metadata to reward functions
5
+ Issue #3: Export @verifier decorator at top level for API consistency
6
+ """
7
+
8
+ import pytest
9
+
10
+
11
+ class TestIssue3VerifierExport:
12
+ """Test that @verifier decorator is exported at top level (Issue #3)."""
13
+
14
+ def test_verifier_accessible_at_top_level(self):
15
+ """tp.verifier should be accessible without deep import."""
16
+ import textpolicy as tp
17
+
18
+ assert hasattr(tp, "verifier"), "verifier should be exported at top level"
19
+ assert callable(tp.verifier), "verifier should be callable"
20
+
21
+ def test_verifier_same_as_deep_import(self):
22
+ """tp.verifier should be the same function as deep import."""
23
+ import textpolicy as tp
24
+ from textpolicy.rewards import verifier
25
+
26
+ assert tp.verifier is verifier, "tp.verifier should be same as textpolicy.rewards.verifier"
27
+
28
+ def test_verifier_decorator_works(self):
29
+ """@tp.verifier should work as a decorator."""
30
+ import textpolicy as tp
31
+
32
+ @tp.verifier
33
+ def my_test_verifier(prompt, completion, example, **kwargs):
34
+ return len(completion) > 0
35
+
36
+ # Verify it's registered and callable
37
+ assert callable(my_test_verifier)
38
+ result = my_test_verifier("test prompt", "test completion", {})
39
+ assert isinstance(result, bool)
40
+
41
+ def test_reward_and_verifier_both_at_top_level(self):
42
+ """Both @reward and @verifier should be at top level for API consistency."""
43
+ import textpolicy as tp
44
+
45
+ assert hasattr(tp, "reward"), "reward should be at top level"
46
+ assert hasattr(tp, "verifier"), "verifier should be at top level"
47
+
48
+
49
+ @pytest.mark.integration
50
+ class TestIssue2ExamplesParameter:
51
+ """Test that TextGenerationEnv passes example metadata to reward functions (Issue #2)."""
52
+
53
+ @pytest.fixture
54
+ def dummy_tokenizer(self):
55
+ """Provide a minimal tokenizer for tests."""
56
+ class DummyTokenizer:
57
+ def encode(self, text):
58
+ return [ord(c) % 256 for c in text]
59
+
60
+ def decode(self, ids):
61
+ return "".join(chr(int(i) % 256) for i in ids)
62
+
63
+ return DummyTokenizer()
64
+
65
+ def test_env_accepts_examples_parameter(self, dummy_tokenizer):
66
+ """TextGenerationEnv should accept an examples parameter."""
67
+ from textpolicy.environment.text_generation import TextGenerationEnv
68
+
69
+ prompts = ["Hello", "World"]
70
+ examples = [{"key": "value1"}, {"key": "value2"}]
71
+
72
+ def reward_fn(prompt, completion, example, **kwargs):
73
+ return 1.0
74
+
75
+ # Should not raise
76
+ env = TextGenerationEnv(
77
+ prompts=prompts,
78
+ reward_fn=reward_fn,
79
+ tokenizer=dummy_tokenizer,
80
+ examples=examples,
81
+ )
82
+ assert env.examples == examples
83
+
84
+ def test_env_defaults_to_empty_dicts_when_no_examples(self, dummy_tokenizer):
85
+ """When examples not provided, should default to empty dicts."""
86
+ from textpolicy.environment.text_generation import TextGenerationEnv
87
+
88
+ prompts = ["Hello", "World"]
89
+
90
+ def reward_fn(prompt, completion, example, **kwargs):
91
+ return 1.0
92
+
93
+ env = TextGenerationEnv(
94
+ prompts=prompts, reward_fn=reward_fn, tokenizer=dummy_tokenizer
95
+ )
96
+
97
+ assert env.examples == [{}, {}]
98
+
99
+ def test_env_validates_examples_length(self, dummy_tokenizer):
100
+ """Should raise ValueError if examples length != prompts length."""
101
+ from textpolicy.environment.text_generation import TextGenerationEnv
102
+
103
+ prompts = ["Hello", "World"]
104
+ examples = [{"key": "value1"}] # Wrong length
105
+
106
+ def reward_fn(prompt, completion, example, **kwargs):
107
+ return 1.0
108
+
109
+ with pytest.raises(ValueError, match="examples length.*must match prompts length"):
110
+ TextGenerationEnv(
111
+ prompts=prompts,
112
+ reward_fn=reward_fn,
113
+ tokenizer=dummy_tokenizer,
114
+ examples=examples,
115
+ )
116
+
117
+ def test_example_passed_to_reward_function(self, dummy_tokenizer):
118
+ """Reward function should receive the correct example for each prompt."""
119
+ from textpolicy.environment.text_generation import TextGenerationEnv
120
+
121
+ prompts = ["Question 1", "Question 2"]
122
+ examples = [
123
+ {"db_id": "database_1", "gold_sql": "SELECT 1"},
124
+ {"db_id": "database_2", "gold_sql": "SELECT 2"},
125
+ ]
126
+
127
+ received_examples = []
128
+
129
+ def capture_reward(prompt, completion, example, **kwargs):
130
+ received_examples.append(example.copy())
131
+ return 1.0
132
+
133
+ env = TextGenerationEnv(
134
+ prompts=prompts,
135
+ reward_fn=capture_reward,
136
+ tokenizer=dummy_tokenizer,
137
+ examples=examples,
138
+ )
139
+
140
+ # Episode 0 should use examples[0]
141
+ env.reset()
142
+ env.step("some response")
143
+ assert received_examples[0] == {"db_id": "database_1", "gold_sql": "SELECT 1"}
144
+
145
+ # Episode 1 should use examples[1]
146
+ env.reset()
147
+ env.step("another response")
148
+ assert received_examples[1] == {"db_id": "database_2", "gold_sql": "SELECT 2"}
149
+
150
+ def test_examples_cycle_with_prompts(self, dummy_tokenizer):
151
+ """Examples should cycle correctly when prompts cycle."""
152
+ from textpolicy.environment.text_generation import TextGenerationEnv
153
+
154
+ prompts = ["P1", "P2"]
155
+ examples = [{"idx": 0}, {"idx": 1}]
156
+
157
+ received_indices = []
158
+
159
+ def capture_reward(prompt, completion, example, **kwargs):
160
+ received_indices.append(example.get("idx"))
161
+ return 1.0
162
+
163
+ env = TextGenerationEnv(
164
+ prompts=prompts,
165
+ reward_fn=capture_reward,
166
+ tokenizer=dummy_tokenizer,
167
+ examples=examples,
168
+ )
169
+
170
+ # Run 4 episodes (should cycle through prompts twice)
171
+ for _ in range(4):
172
+ env.reset()
173
+ env.step("response")
174
+
175
+ # Should have received [0, 1, 0, 1]
176
+ assert received_indices == [0, 1, 0, 1]
177
+
178
+ def test_litmus_test_from_issue(self, dummy_tokenizer):
179
+ """Run the exact litmus test from Issue #2."""
180
+ from textpolicy.environment.text_generation import TextGenerationEnv
181
+ import textpolicy as tp
182
+
183
+ examples = [
184
+ {"db_id": "concert_singer", "gold_sql": "SELECT COUNT(*) FROM singer"},
185
+ {"db_id": "pets_1", "gold_sql": "SELECT COUNT(*) FROM pets"},
186
+ ]
187
+ prompts = [
188
+ "Schema: singer(id, name)\nQuestion: How many singers?",
189
+ "Schema: pets(id, name)\nQuestion: How many pets?",
190
+ ]
191
+
192
+ captured_db_ids = []
193
+
194
+ @tp.reward
195
+ def check_example(prompt, completion, example, **kwargs):
196
+ db_id = example.get("db_id")
197
+ captured_db_ids.append(db_id)
198
+ return 1.0
199
+
200
+ env = TextGenerationEnv(prompts, check_example, examples=examples, tokenizer=dummy_tokenizer)
201
+
202
+ # First episode
203
+ env.reset()
204
+ env.step("some action")
205
+
206
+ # Should have captured 'concert_singer'
207
+ assert captured_db_ids[0] == "concert_singer", f"Expected 'concert_singer', got {captured_db_ids[0]}"
208
+
209
+ # Second episode
210
+ env.reset()
211
+ env.step("another action")
212
+
213
+ # Should have captured 'pets_1'
214
+ assert captured_db_ids[1] == "pets_1", f"Expected 'pets_1', got {captured_db_ids[1]}"
215
+
216
+
217
+ if __name__ == "__main__":
218
+ pytest.main([__file__, "-v"])
@@ -30,7 +30,7 @@ from .validate import validate_installation
30
30
 
31
31
  # Export core reward functions and the reward decorator
32
32
  from .rewards.basic import length_reward, keyword_reward, perplexity_reward, accuracy_reward
33
- from .rewards.registry import reward
33
+ from .rewards.registry import reward, verifier
34
34
 
35
35
  # Build __all__ combining submodule __all__ lists and additional symbols
36
36
  __all__ = (
@@ -48,5 +48,6 @@ __all__ = (
48
48
  "perplexity_reward",
49
49
  "accuracy_reward",
50
50
  "reward",
51
+ "verifier",
51
52
  ]
52
53
  )
@@ -630,24 +630,31 @@ class TextGenerationEnv(Environment):
630
630
  reward_fn: Callable[[str, str, dict], float],
631
631
  max_tokens: int = 25,
632
632
  seed: int = 42,
633
- tokenizer: Any = None
633
+ tokenizer: Any = None,
634
+ examples: Optional[List[dict]] = None
634
635
  ):
635
636
  """
636
637
  Initialize simple text generation environment.
637
-
638
+
638
639
  Args:
639
640
  prompts: List of prompts to cycle through
640
641
  reward_fn: Function that computes reward from (prompt, completion, example)
641
642
  max_tokens: Maximum tokens to generate per response
642
643
  seed: Random seed for reproducible behavior
643
644
  tokenizer: Tokenizer for converting prompts to tokens (required for MLX compatibility)
645
+ examples: Optional list of example dicts to pass to reward function. If provided,
646
+ must have same length as prompts. examples[i] is passed when prompts[i] is used.
644
647
  """
645
648
  super().__init__()
646
649
 
647
650
  if tokenizer is None:
648
651
  raise ValueError("tokenizer is required for TextGenerationEnv to work with MLX rollout system")
649
-
652
+
653
+ if examples is not None and len(examples) != len(prompts):
654
+ raise ValueError(f"examples length ({len(examples)}) must match prompts length ({len(prompts)})")
655
+
650
656
  self.prompts = prompts
657
+ self.examples = examples if examples is not None else [{} for _ in prompts]
651
658
  self.reward_fn = reward_fn
652
659
  self.max_tokens = max_tokens
653
660
  self.tokenizer = tokenizer
@@ -735,10 +742,11 @@ class TextGenerationEnv(Environment):
735
742
 
736
743
  # Compute reward using provided reward function
737
744
  # Pass tokenizer for EOS token detection and truncation detection
745
+ prompt_index = self.current_episode % len(self.prompts)
738
746
  reward = self.reward_fn(
739
747
  prompt=self.current_prompt,
740
748
  completion=response_text,
741
- example={},
749
+ example=self.examples[prompt_index],
742
750
  tokenizer=self.tokenizer, # Pass tokenizer for EOS detection
743
751
  truncated=truncated # Pass truncation flag from environment
744
752
  )
@@ -1,7 +1,17 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: textpolicy
3
- Version: 0.1.0
4
- Summary: MLX-optimized reward and verification system for text generation RL
3
+ Version: 0.1.1
4
+ Summary: Reinforcement learning for text generation on MLX (Apple Silicon): GRPO/GSPO, environments, rollout, rewards, LoRA/QLoRA
5
+ Project-URL: Homepage, https://github.com/teilomillet/textpolicy
6
+ Project-URL: Repository, https://github.com/teilomillet/textpolicy
7
+ Project-URL: Documentation, https://github.com/teilomillet/textpolicy#readme
8
+ Project-URL: Changelog, https://github.com/teilomillet/textpolicy/blob/main/CHANGELOG.md
9
+ Keywords: reinforcement-learning,text-generation,mlx,apple-silicon,lora,qlora,grpo,gspo,rlhf
10
+ Classifier: Programming Language :: Python :: 3
11
+ Classifier: Operating System :: MacOS
12
+ Classifier: Intended Audience :: Developers
13
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
14
+ Classifier: License :: OSI Approved :: MIT License
5
15
  Requires-Python: >=3.12
6
16
  Description-Content-Type: text/markdown
7
17
  License-File: LICENSE
@@ -3,6 +3,7 @@ README.md
3
3
  pyproject.toml
4
4
  tests/test_gspo_verification.py
5
5
  tests/test_integration_e2e_training.py
6
+ tests/test_issue_fixes.py
6
7
  tests/test_reward_signatures.py
7
8
  tests/test_rollout_rewards.py
8
9
  tests/test_runner_step_enforcement.py
@@ -1,30 +0,0 @@
1
- [build-system]
2
- requires = ["setuptools>=61.0", "wheel"]
3
- build-backend = "setuptools.build_meta"
4
-
5
- [tool.setuptools.packages.find]
6
- include = ["textpolicy*"]
7
-
8
- [project]
9
- name = "textpolicy"
10
- version = "0.1.0"
11
- description = "MLX-optimized reward and verification system for text generation RL"
12
- readme = "README.md"
13
- requires-python = ">=3.12"
14
- dependencies = [
15
- "numpy>=2.3.2",
16
- "mlx>=0.21.0", # Core MLX framework for Apple Silicon acceleration
17
- "mlx-lm>=0.21.0", # MLX language models for inference
18
- "gymnasium>=0.29.0",
19
- "psutil>=7.0.0",
20
- "wandb>=0.21.1",
21
- "aiohttp>=3.12.15",
22
- "pytest>=8.4.1",
23
- ]
24
-
25
- [project.scripts]
26
- textpolicy = "textpolicy.cli:main"
27
-
28
- [project.optional-dependencies]
29
- external = ["aiohttp>=3.8.0", "pydantic>=2.0.0"]
30
- dev = ["pytest>=7.0.0", "black>=22.0.0", "ruff>=0.1.0"]
File without changes
File without changes
File without changes
File without changes