PyPI - wisent - Versions diffs - 0.5.9__py3-none-any.whl → 0.5.11__py3-none-any.whl - Mend

wisent 0.5.9py3-none-any.whl → 0.5.11py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of wisent might be problematic. Click here for more details.

Files changed (8) hide show

wisent/__init__.py CHANGED Viewed

	@@ -1 +1 @@
1	- __version__ = "0.5.9"
1	+ __version__ = "0.5.11"

wisent/core/models/wisent_model.py CHANGED Viewed

@@ -95,6 +95,7 @@ class WisentModel:
         elif self.device == "cuda":
             load_kwargs["dtype"] = torch.float16
             load_kwargs["device_map"] = "auto"
+            load_kwargs["attn_implementation"] = "flash_attention_2"  # Use flash attention for CUDA
         else:
             load_kwargs["dtype"] = torch.float32
             load_kwargs["device_map"] = None
@@ -336,7 +337,7 @@ class WisentModel:
     @torch.inference_mode()
     def generate(
         self,
-        inputs: list[list[ChatMessage]],
+        inputs: list[list[ChatMessage]] | str,
         max_new_tokens: int = 128,
         temperature: float = 0.7,
         top_p: float = 0.95,
@@ -345,6 +346,7 @@ class WisentModel:
         use_steering: bool = False,
         steering_plan: SteeringPlan | None = None,
         enable_thinking: bool = True,
+        prompt_is_formatted: bool = False,
         **gen_kwargs: Any,
     ) -> list[str]:
         """
@@ -352,7 +354,7 @@ class WisentModel:
         attributes:
             inputs:
-                list of chat messages (each a list of {'role','content'} dicts).
+                list of chat messages (each a list of {'role','content'} dicts) OR pre-formatted string.
             max_new_tokens:
                 max tokens to generate (beyond the prompt).
             temperature:
@@ -370,6 +372,8 @@ class WisentModel:
                 If None, uses the internal plan.
             enable_thinking:
                 If False, disable thinking/reasoning mode (prevents <think> tags for supported models like Qwen).
+            prompt_is_formatted:
+                If True, inputs is a pre-formatted string with chat template already applied.
             **gen_kwargs:
                 additional kwargs passed to 'model.generate()'.
@@ -448,7 +452,22 @@ class WisentModel:
         if use_steering:
             self.apply_steering(steering_plan)
-        batch = self._batch_encode(inputs, add_generation_prompt=True, enable_thinking=enable_thinking)
+        if prompt_is_formatted and isinstance(inputs, str):
+            # Direct tokenization of pre-formatted prompt
+            tokenizer_output = self.tokenizer(
+                inputs,
+                return_tensors="pt",
+                padding=False,  # Single prompt, no padding needed
+                truncation=True,  # Avoid errors on long inputs
+                max_length=self.tokenizer.model_max_length  # Use model's actual limit
+            )
+            batch = {
+                "input_ids": tokenizer_output["input_ids"],
+                "attention_mask": tokenizer_output["attention_mask"]
+            }
+        else:
+            # Current behavior: apply chat template
+            batch = self._batch_encode(inputs, add_generation_prompt=True, enable_thinking=enable_thinking)
         gen_out = self.hf_model.generate(
             **batch,
@@ -612,7 +631,7 @@ class WisentModel:
     @torch.inference_mode()
     def generate_stream(
         self,
-        inputs: list[list[ChatMessage]],
+        inputs: list[list[ChatMessage]] | str,
         max_new_tokens: int = 128,
         temperature: float = 0.7,
         top_p: float = 0.95,
@@ -622,6 +641,7 @@ class WisentModel:
         skip_prompt: bool = True,
         skip_special_tokens: bool = True,
         enable_thinking: bool = True,
+        prompt_is_formatted: bool = False,
         **gen_kwargs: Any,
     ) -> Iterable[str]:
         """
@@ -630,7 +650,8 @@ class WisentModel:
         attributes:
             inputs:
-                list of chat messages (each a list of {'role','content'} dicts). Currently only one conversation is supported.
+                list of chat messages (each a list of {'role','content'} dicts) OR pre-formatted string.
+                Currently only one conversation is supported.
             max_new_tokens:
                 max tokens to generate (beyond the prompt).
             temperature:
@@ -650,6 +671,8 @@ class WisentModel:
                 if True, special tokens are removed from the yielded text.
             enable_thinking:
                 If False, disable thinking/reasoning mode (prevents <think> tags for supported models like Qwen).
+            prompt_is_formatted:
+                If True, inputs is a pre-formatted string with chat template already applied.
             **gen_kwargs:
                 additional kwargs passed to 'model.generate()'.
@@ -657,14 +680,29 @@ class WisentModel:
             generated text chunks (str), as they become available.
         """
-        if len(inputs) != 1:
-            raise ValueError(
-                f"generate_stream currently supports exactly one conversation at a time (got {len(inputs)})."
-            )
         if use_steering:
             self.apply_steering(steering_plan)
-        batch = self._batch_encode(inputs, add_generation_prompt=True, enable_thinking=enable_thinking)
+        if prompt_is_formatted and isinstance(inputs, str):
+            # Direct tokenization of pre-formatted prompt
+            tokenizer_output = self.tokenizer(
+                inputs,
+                return_tensors="pt",
+                padding=False,  # Single prompt, no padding needed
+                truncation=True,  # Avoid errors on long inputs
+                max_length=self.tokenizer.model_max_length  # Use model's actual limit
+            )
+            batch = {
+                "input_ids": tokenizer_output["input_ids"],
+                "attention_mask": tokenizer_output["attention_mask"]
+            }
+        else:
+            # Current behavior: apply chat template
+            if not isinstance(inputs, list) or len(inputs) != 1:
+                raise ValueError(
+                    f"generate_stream currently supports exactly one conversation at a time (got {type(inputs)} with {len(inputs) if isinstance(inputs, list) else 'N/A'} items)."
+                )
+            batch = self._batch_encode(inputs, add_generation_prompt=True, enable_thinking=enable_thinking)
         streamer = TextIteratorStreamer(
             self.tokenizer,

wisent/core/multi_steering.py CHANGED Viewed

@@ -166,17 +166,19 @@ class MultiSteering:
         max_new_tokens: int = 100,
         temperature: float = 0.7,
         top_p: float = 0.9,
-        enable_thinking: bool = True
+        enable_thinking: bool = True,
+        prompt_is_formatted: bool = False
     ) -> Iterable[str]:
         """Apply the combined steering vector to generate text with streaming.
         Args:
             model: WisentModel instance to use for generation
-            prompt: Input prompt
+            prompt: Input prompt (either raw text or pre-formatted with chat template)
             max_new_tokens: Maximum tokens to generate
             temperature: Sampling temperature
             top_p: Top-p sampling parameter
             enable_thinking: If False, disable thinking/reasoning mode (prevents <think> tags for supported models like Qwen)
+            prompt_is_formatted: If True, prompt already has chat template applied
         Yields:
             Generated text chunks
@@ -191,7 +193,8 @@ class MultiSteering:
             raise MultiSteeringError("No layer information available")
         print(f"\n🎯 Applying combined steering vector at layer {self.layer}")
-        print(f"Prompt: {prompt}")
+        print(f"Prompt: {prompt[:100]}..." if len(prompt) > 100 else f"Prompt: {prompt}")
+        print(f"Prompt is formatted: {prompt_is_formatted}")
         print("=" * 50)
         # Create SteeringPlan from the combined vector
@@ -202,13 +205,19 @@ class MultiSteering:
             normalize=False  # Already normalized in combine_vectors
         )
-        # Format prompt as chat messages
-        messages: list[ChatMessage] = [{"role": "user", "content": prompt}]
+        # Handle prompt formatting
+        if prompt_is_formatted:
+            # Prompt already has chat template applied - pass as string directly
+            inputs = prompt
+        else:
+            # Format prompt as chat messages (current behavior)
+            messages: list[ChatMessage] = [{"role": "user", "content": prompt}]
+            inputs = [messages]
         try:
             # Use WisentModel's generate_stream with steering
             yield from model.generate_stream(
-                inputs=[messages],
+                inputs=inputs,
                 max_new_tokens=max_new_tokens,
                 temperature=temperature,
                 top_p=top_p,
@@ -216,7 +225,8 @@ class MultiSteering:
                 steering_plan=steering_plan,
                 skip_prompt=True,
                 skip_special_tokens=True,
-                enable_thinking=enable_thinking
+                enable_thinking=enable_thinking,
+                prompt_is_formatted=prompt_is_formatted
             )
         except Exception as e:
@@ -231,17 +241,19 @@ class MultiSteering:
         max_new_tokens: int = 100,
         temperature: float = 0.7,
         top_p: float = 0.9,
-        enable_thinking: bool = True
+        enable_thinking: bool = True,
+        prompt_is_formatted: bool = False
     ) -> str:
         """Apply the combined steering vector to generate text (non-streaming).
         Args:
             model: WisentModel instance to use for generation
-            prompt: Input prompt
+            prompt: Input prompt (either raw text or pre-formatted with chat template)
             max_new_tokens: Maximum tokens to generate
             temperature: Sampling temperature
             top_p: Top-p sampling parameter
             enable_thinking: If False, disable thinking/reasoning mode (prevents <think> tags for supported models like Qwen)
+            prompt_is_formatted: If True, prompt already has chat template applied
         Returns:
             Generated text
@@ -256,7 +268,8 @@ class MultiSteering:
             raise MultiSteeringError("No layer information available")
         print(f"\n🎯 Applying combined steering vector at layer {self.layer}")
-        print(f"Prompt: {prompt}")
+        print(f"Prompt: {prompt[:100]}..." if len(prompt) > 100 else f"Prompt: {prompt}")
+        print(f"Prompt is formatted: {prompt_is_formatted}")
         print("=" * 50)
         # Create SteeringPlan from the combined vector
@@ -267,19 +280,26 @@ class MultiSteering:
             normalize=False  # Already normalized in combine_vectors
         )
-        # Format prompt as chat messages
-        messages: list[ChatMessage] = [{"role": "user", "content": prompt}]
+        # Handle prompt formatting
+        if prompt_is_formatted:
+            # Prompt already has chat template applied - pass as string directly
+            inputs = prompt
+        else:
+            # Format prompt as chat messages (current behavior)
+            messages: list[ChatMessage] = [{"role": "user", "content": prompt}]
+            inputs = [messages]
         try:
             # Use WisentModel's generate with steering
             outputs = model.generate(
-                inputs=[messages],
+                inputs=inputs,
                 max_new_tokens=max_new_tokens,
                 temperature=temperature,
                 top_p=top_p,
                 use_steering=True,
                 steering_plan=steering_plan,
-                enable_thinking=enable_thinking
+                enable_thinking=enable_thinking,
+                prompt_is_formatted=prompt_is_formatted
             )
             return outputs[0] if outputs else ""

{wisent-0.5.9.dist-info → wisent-0.5.11.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: wisent
-Version: 0.5.9
+Version: 0.5.11
 Summary: Monitor and guard against harmful content in language models
 Home-page: https://github.com/yourusername/wisent-activation-guardrails
 Author: Wisent Team

{wisent-0.5.9.dist-info → wisent-0.5.11.dist-info}/RECORD RENAMED Viewed

@@ -1,4 +1,4 @@
-wisent/__init__.py,sha256=JXLyhF5WmLgRZBfWGz9zWe2g5ISKSLpn2jp8yLaC-s4,22
+wisent/__init__.py,sha256=xFez9dUQrcuZqZRWuEIsCbMskoR-Ke1_uUZ51Kyt1tw,23
 wisent/benchmarks/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 wisent/benchmarks/coding/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 wisent/benchmarks/coding/metrics/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -64,7 +64,7 @@ wisent/core/managed_cached_benchmarks.py,sha256=JbvpZ1fgSuQQhyQVKEvqrQZRHGqfnjo9
 wisent/core/mixed_benchmark_sampler.py,sha256=tKQCHUXVuYeCyx4VZt8O1hGyB-TOY_SQ_SYi8cyApII,13585
 wisent/core/model_config_manager.py,sha256=rQAdSmk3GFlZXyHp3fSV1bORxiZWhmzIz1uo3H4JtkA,12009
 wisent/core/model_persistence.py,sha256=6_vc1Ndujd4v0O68giINSTvYhmb7-AiacWwAbqLOrls,10636
-wisent/core/multi_steering.py,sha256=EMaKn4dZPlAsFupEUQZlxTZGJ0-ofpLcTCKQk8HaZL8,12295
+wisent/core/multi_steering.py,sha256=YhVKmf08KacVEYZWLk6t2uNWSv-Pi_zBeLdDopo3QXk,13491
 wisent/core/parser.py,sha256=_YDeSuQMx0zNknz9rX3Ls1YPT1x5eohoY8rfjeoqxV8,69091
 wisent/core/representation.py,sha256=hBl_N9qbr5Gsa7GCQ0nMWRm82RqYEfhd9cyf0PPH5LY,195
 wisent/core/sample_size_optimizer.py,sha256=6wegGXZpdGpiR4R0YJ1D2JqLr6yinMndEx2gB5FL80s,23666
@@ -134,7 +134,7 @@ wisent/core/evaluators/oracles/interactive.py,sha256=f3v2_N17fKzGyeOxONRJbrbn8i5
 wisent/core/evaluators/oracles/nlp_evaluator.py,sha256=KxbnF-I2IFbBQpoYyjQKGbYh4NErsEuhTCRYX_Tob8o,18220
 wisent/core/evaluators/oracles/user_specified.py,sha256=V1dKrNj3Oq7UC_I7DT0WGnktP7R_DSW6UAwDdrA8SnE,2360
 wisent/core/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-wisent/core/models/wisent_model.py,sha256=_NDi4oHZnwtUbusqPw8vw1_YYifbsRnD_g25M2uCf08,29772
+wisent/core/models/wisent_model.py,sha256=_XQpakCPJGdzeeSd0gPxp0yd057HisA69uAaW-katDo,31788
 wisent/core/models/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 wisent/core/models/core/atoms.py,sha256=_Bpz0Sfiq6_VswThIltUwNGj_ukl5MhAg8RrgMKwEBM,15756
 wisent/core/optuna/__init__.py,sha256=sTfwRnrRyKrCNVsF_qCjBDFEZC0ZmUZ7m6IE0iHfTVs,1914
@@ -213,8 +213,8 @@ wisent/synthetic/generators/diversities/core/__init__.py,sha256=47DEQpj8HBSa-_TI
 wisent/synthetic/generators/diversities/core/core.py,sha256=TjSj5T7NE5kRH-ABcFqb1Hz_j3Z6F_TcV-95uHD5Xw8,2201
 wisent/synthetic/generators/diversities/methods/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 wisent/synthetic/generators/diversities/methods/fast_diversity.py,sha256=Z2UzTbzyJFM_ToxCoXM_LQQQ1Jc6BZknrbpikTG1MRw,8522
-wisent-0.5.9.dist-info/licenses/LICENSE,sha256=wy0iaw8b2tyqZAfKHib3lP3PJ9o88FDCg92oUHh3sDQ,1073
-wisent-0.5.9.dist-info/METADATA,sha256=lAomuCOIdAio3ai9_IunQG9hytR1WWJ3UjtiScFw9kc,2424
-wisent-0.5.9.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-wisent-0.5.9.dist-info/top_level.txt,sha256=2Ts9Iyldnb3auIN2HBBaHPknRy7nSRDm2f6RGzYgr8A,7
-wisent-0.5.9.dist-info/RECORD,,
+wisent-0.5.11.dist-info/licenses/LICENSE,sha256=wy0iaw8b2tyqZAfKHib3lP3PJ9o88FDCg92oUHh3sDQ,1073
+wisent-0.5.11.dist-info/METADATA,sha256=na_FE2Pdt5j8nixhi8wFTOJYLzWweXPgylqL_3Pklx4,2425
+wisent-0.5.11.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+wisent-0.5.11.dist-info/top_level.txt,sha256=2Ts9Iyldnb3auIN2HBBaHPknRy7nSRDm2f6RGzYgr8A,7
+wisent-0.5.11.dist-info/RECORD,,

{wisent-0.5.9.dist-info → wisent-0.5.11.dist-info}/WHEEL RENAMED Viewed

File without changes

{wisent-0.5.9.dist-info → wisent-0.5.11.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{wisent-0.5.9.dist-info → wisent-0.5.11.dist-info}/top_level.txt RENAMED Viewed

File without changes

wisent 0.5.9__py3-none-any.whl → 0.5.11__py3-none-any.whl

Potentially problematic release.

wisent 0.5.9py3-none-any.whl → 0.5.11py3-none-any.whl