PyPI - textpolicy - Versions diffs - 0.1.0__py3-none-any.whl → 0.1.2__py3-none-any.whl - Mend

textpolicy 0.1.0py3-none-any.whl → 0.1.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

textpolicy/__init__.py CHANGED Viewed

@@ -30,7 +30,7 @@ from .validate import validate_installation
 # Export core reward functions and the reward decorator
 from .rewards.basic import length_reward, keyword_reward, perplexity_reward, accuracy_reward
-from .rewards.registry import reward
+from .rewards.registry import reward, verifier
 # Build __all__ combining submodule __all__ lists and additional symbols
 __all__ = (
@@ -48,5 +48,6 @@ __all__ = (
         "perplexity_reward",
         "accuracy_reward",
         "reward",
+        "verifier",
     ]
 )

textpolicy/environment/text_generation.py CHANGED Viewed

@@ -630,24 +630,31 @@ class TextGenerationEnv(Environment):
         reward_fn: Callable[[str, str, dict], float],
         max_tokens: int = 25,
         seed: int = 42,
-        tokenizer: Any = None
+        tokenizer: Any = None,
+        examples: Optional[List[dict]] = None
     ):
         """
         Initialize simple text generation environment.
         Args:
             prompts: List of prompts to cycle through
             reward_fn: Function that computes reward from (prompt, completion, example)
             max_tokens: Maximum tokens to generate per response
             seed: Random seed for reproducible behavior
             tokenizer: Tokenizer for converting prompts to tokens (required for MLX compatibility)
+            examples: Optional list of example dicts to pass to reward function. If provided,
+                      must have same length as prompts. examples[i] is passed when prompts[i] is used.
         """
         super().__init__()
         if tokenizer is None:
             raise ValueError("tokenizer is required for TextGenerationEnv to work with MLX rollout system")
+        if examples is not None and len(examples) != len(prompts):
+            raise ValueError(f"examples length ({len(examples)}) must match prompts length ({len(prompts)})")
         self.prompts = prompts
+        self.examples = examples if examples is not None else [{} for _ in prompts]
         self.reward_fn = reward_fn
         self.max_tokens = max_tokens
         self.tokenizer = tokenizer
@@ -735,10 +742,11 @@ class TextGenerationEnv(Environment):
         # Compute reward using provided reward function
         # Pass tokenizer for EOS token detection and truncation detection
+        prompt_index = self.current_episode % len(self.prompts)
         reward = self.reward_fn(
             prompt=self.current_prompt,
             completion=response_text,
-            example={},
+            example=self.examples[prompt_index],
             tokenizer=self.tokenizer,  # Pass tokenizer for EOS detection
             truncated=truncated        # Pass truncation flag from environment
         )

{textpolicy-0.1.0.dist-info → textpolicy-0.1.2.dist-info}/METADATA RENAMED Viewed

@@ -1,13 +1,23 @@
 Metadata-Version: 2.4
 Name: textpolicy
-Version: 0.1.0
-Summary: MLX-optimized reward and verification system for text generation RL
+Version: 0.1.2
+Summary: Reinforcement learning for text generation on MLX (Apple Silicon): GRPO/GSPO, environments, rollout, rewards, LoRA/QLoRA
+Project-URL: Homepage, https://github.com/teilomillet/textpolicy
+Project-URL: Repository, https://github.com/teilomillet/textpolicy
+Project-URL: Documentation, https://github.com/teilomillet/textpolicy#readme
+Project-URL: Changelog, https://github.com/teilomillet/textpolicy/blob/main/CHANGELOG.md
+Keywords: reinforcement-learning,text-generation,mlx,apple-silicon,lora,qlora,grpo,gspo,rlhf
+Classifier: Programming Language :: Python :: 3
+Classifier: Operating System :: MacOS
+Classifier: Intended Audience :: Developers
+Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
+Classifier: License :: OSI Approved :: MIT License
 Requires-Python: >=3.12
 Description-Content-Type: text/markdown
 License-File: LICENSE
 Requires-Dist: numpy>=2.3.2
-Requires-Dist: mlx>=0.21.0
-Requires-Dist: mlx-lm>=0.21.0
+Requires-Dist: mlx>=0.22.0
+Requires-Dist: mlx-lm>=0.22.0
 Requires-Dist: gymnasium>=0.29.0
 Requires-Dist: psutil>=7.0.0
 Requires-Dist: wandb>=0.21.1

{textpolicy-0.1.0.dist-info → textpolicy-0.1.2.dist-info}/RECORD RENAMED Viewed

@@ -1,4 +1,4 @@
-textpolicy/__init__.py,sha256=u4u0fIHfAvXFN2ATHCsG0Tx4xGfOcfuOITBTmKbGhrw,1576
+textpolicy/__init__.py,sha256=vDAHJ826gKuTZUjcAftzz-RTX8KuOjH50Uj1RMhjTIQ,1606
 textpolicy/__main__.py,sha256=IlGmjJaW-DJUC7yhxUhbwNOZA3GxkeQGkVbFdS3_wBI,136
 textpolicy/cli.py,sha256=3CcJzrRlin1pgd6Mh312Xp3-EihHtTSvhakyYpdfacs,2107
 textpolicy/validate.py,sha256=lxmegz83B_c-PS3cFHaaL3c9fgWrEaLsDLkpPFtSj8Y,3780
@@ -16,7 +16,7 @@ textpolicy/environment/environment.py,sha256=o8-RY6wj5xrzDBp77HoY2At3XlBwvreF3DK
 textpolicy/environment/factory.py,sha256=pebQo1_M3sMF8Pdc9yvpdXzRXfIDllKJoAQAjQbif0E,3124
 textpolicy/environment/gym.py,sha256=P8Bi8PlDtcWWa9uLuCjkhZnYRVs-mg6iSJVSBkG99f8,3186
 textpolicy/environment/task_suites.py,sha256=ssPnw2Y3eGYaskWf8dUab4rNu_Bx5L284b3VdhgvSPM,1544
-textpolicy/environment/text_generation.py,sha256=BXSJS_05Q89cPFfdXcUKxOXSZm3HBR3KMi55BnOdoLY,31258
+textpolicy/environment/text_generation.py,sha256=Jql0pEfrPp9tqNsPOAdIP-UYoAUsfV969TMR2uPkUp4,31837
 textpolicy/environment/vectorized.py,sha256=ZROtpmdbh1Oi8c0b0D_vmVzqI16Cp2WZTmkjkRbMoDg,9932
 textpolicy/generation/__init__.py,sha256=J3dc0SPAZChJTsRn47tz8FfIp3XwNgZ-8_H9VBpQYvQ,1266
 textpolicy/generation/lora.py,sha256=xSKRczJY20BrkkU1SSgBtDc30tZjdFE7FhEZPUEoiyg,13747
@@ -58,9 +58,9 @@ textpolicy/utils/logging/tensorboard.py,sha256=aY9YMReSJkWEhy6SdAAUlHSB4lzDecivB
 textpolicy/utils/logging/wandb.py,sha256=U4pxuZNOz2l8XiymK8OFbCpiRTBOLNtnZakC_udttfQ,2206
 textpolicy/validation/__init__.py,sha256=KcyppNi91w0bF51gZ0ykUIKEiF7z6TT37uuavMFScnA,328
 textpolicy/validation/logprob_validation.py,sha256=G_CCy5NRDUTmo7WZIChhNVM3NtP1VmWAjdd5z6TIvos,11749
-textpolicy-0.1.0.dist-info/licenses/LICENSE,sha256=AYDHSNRbiqZt4HHH1gaOoQ2hjYjK4bqw4Vd9UyKzx18,1065
-textpolicy-0.1.0.dist-info/METADATA,sha256=XdyIh8e2IIRymRf31vu1MuVM2aaut2qsZ5PcsjHrl9Y,3199
-textpolicy-0.1.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-textpolicy-0.1.0.dist-info/entry_points.txt,sha256=d0Cj5boT6k_l_beVPWPt9LZMllsN4kbIUmsNsn1BANE,51
-textpolicy-0.1.0.dist-info/top_level.txt,sha256=Ww6_QEF71dI-AYCaugiGeGcgMoFAixSOszSoRsyX-E0,11
-textpolicy-0.1.0.dist-info/RECORD,,
+textpolicy-0.1.2.dist-info/licenses/LICENSE,sha256=AYDHSNRbiqZt4HHH1gaOoQ2hjYjK4bqw4Vd9UyKzx18,1065
+textpolicy-0.1.2.dist-info/METADATA,sha256=HXAh6fGcTtNez86WFNlr6OnIQZNcswptUXPnBSmXQHM,3895
+textpolicy-0.1.2.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
+textpolicy-0.1.2.dist-info/entry_points.txt,sha256=d0Cj5boT6k_l_beVPWPt9LZMllsN4kbIUmsNsn1BANE,51
+textpolicy-0.1.2.dist-info/top_level.txt,sha256=Ww6_QEF71dI-AYCaugiGeGcgMoFAixSOszSoRsyX-E0,11
+textpolicy-0.1.2.dist-info/RECORD,,

{textpolicy-0.1.0.dist-info → textpolicy-0.1.2.dist-info}/WHEEL RENAMED Viewed

@@ -1,5 +1,5 @@
 Wheel-Version: 1.0
-Generator: setuptools (80.9.0)
+Generator: setuptools (80.10.2)
 Root-Is-Purelib: true
 Tag: py3-none-any

{textpolicy-0.1.0.dist-info → textpolicy-0.1.2.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{textpolicy-0.1.0.dist-info → textpolicy-0.1.2.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{textpolicy-0.1.0.dist-info → textpolicy-0.1.2.dist-info}/top_level.txt RENAMED Viewed

File without changes

textpolicy 0.1.0__py3-none-any.whl → 0.1.2__py3-none-any.whl

textpolicy 0.1.0py3-none-any.whl → 0.1.2py3-none-any.whl