PyPI - validmind - Versions diffs - 2.8.20__py3-none-any.whl → 2.8.26__py3-none-any.whl - Mend

validmind 2.8.20py3-none-any.whl → 2.8.26py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (38) hide show

validmind/tests/prompt_validation/NegativeInstruction.py CHANGED Viewed

@@ -52,7 +52,7 @@ Prompt:
 @tags("llm", "zero_shot", "few_shot")
 @tasks("text_classification", "text_summarization")
-def NegativeInstruction(model, min_threshold=7):
+def NegativeInstruction(model, min_threshold=7, judge_llm=None):
     """
     Evaluates and grades the use of affirmative, proactive language over negative instructions in LLM prompts.
@@ -101,6 +101,7 @@ def NegativeInstruction(model, min_threshold=7):
     response = call_model(
         system_prompt=SYSTEM,
         user_prompt=USER.format(prompt_to_test=model.prompt.template),
+        judge_llm=judge_llm,
     )
     score = get_score(response)
     explanation = get_explanation(response)

validmind/tests/prompt_validation/Robustness.py CHANGED Viewed

@@ -25,7 +25,7 @@ Contradictions, edge cases, typos, bad phrasing, distracting, complex or out-of-
 Be creative and think step-by-step how you would break the prompt.
 Then generate {num_tests} inputs for the user-submitted prompt template that would break the prompt.
 Each input should be different from the others.
-Each input should be retured as a new line in your response.
+Each input should be returned as a new line in your response.
 Respond only with the values to be inserted into the prompt template and do not include quotes, explanations or any extra text.
 Example:
@@ -56,7 +56,7 @@ Input:
 @tags("llm", "zero_shot", "few_shot")
 @tasks("text_classification", "text_summarization")
-def Robustness(model, dataset, num_tests=10):
+def Robustness(model, dataset, num_tests=10, judge_llm=None):
     """
     Assesses the robustness of prompts provided to a Large Language Model under varying conditions and contexts. This test
     specifically measures the model's ability to generate correct classifications with the given prompt even when the
@@ -112,6 +112,7 @@ def Robustness(model, dataset, num_tests=10):
     generated_inputs = call_model(
         system_prompt=SYSTEM.format(num_tests=num_tests),
         user_prompt=USER.format(prompt_to_test=model.prompt.template),
+        judge_llm=judge_llm,
     ).split("\n")
     responses = model.predict(

validmind/tests/prompt_validation/Specificity.py CHANGED Viewed

@@ -52,7 +52,7 @@ Prompt:
 @tags("llm", "zero_shot", "few_shot")
 @tasks("text_classification", "text_summarization")
-def Specificity(model, min_threshold=7):
+def Specificity(model, min_threshold=7, judge_llm=None):
     """
     Evaluates and scores the specificity of prompts provided to a Large Language Model (LLM), based on clarity, detail,
     and relevance.
@@ -97,6 +97,7 @@ def Specificity(model, min_threshold=7):
     response = call_model(
         system_prompt=SYSTEM,
         user_prompt=USER.format(prompt_to_test=model.prompt.template),
+        judge_llm=judge_llm,
     )
     score = get_score(response)
     explanation = get_explanation(response)

validmind/tests/prompt_validation/ai_powered_test.py CHANGED Viewed

@@ -4,7 +4,7 @@
 import re
-from validmind.ai.utils import get_client_and_model, is_configured
+from validmind.ai.utils import get_judge_config, is_configured
 missing_prompt_message = """
 Cannot run prompt validation tests on a model with no prompt.
@@ -21,7 +21,12 @@ my_vm_model = vm.init_model(
 def call_model(
-    system_prompt: str, user_prompt: str, temperature: float = 0.0, seed: int = 42
+    system_prompt: str,
+    user_prompt: str,
+    temperature: float = 0.0,
+    seed: int = 42,
+    judge_llm=None,
+    judge_embeddings=None,
 ):
     """Call LLM with the given prompts and return the response"""
     if not is_configured():
@@ -31,21 +36,17 @@ def call_model(
             "enabled for your account."
         )
-    client, model = get_client_and_model()
-    return (
-        client.chat.completions.create(
-            model=model,
-            messages=[
-                {"role": "system", "content": system_prompt.strip("\n").strip()},
-                {"role": "user", "content": user_prompt.strip("\n").strip()},
-            ],
-            temperature=temperature,
-            seed=seed,
-        )
-        .choices[0]
-        .message.content
-    )
+    judge_llm, judge_embeddings = get_judge_config(judge_llm, judge_embeddings)
+    messages = [
+        ("system", system_prompt.strip("\n").strip()),
+        ("user", user_prompt.strip("\n").strip()),
+    ]
+    return judge_llm.invoke(
+        messages,
+        temperature=temperature,
+        seed=seed,
+    ).content
 def get_score(response: str):

validmind/vm_models/dataset/dataset.py CHANGED Viewed

@@ -47,6 +47,7 @@ class VMDataset(VMInput):
         target_class_labels (Dict): The class labels for the target columns.
         df (pd.DataFrame): The dataset as a pandas DataFrame.
         extra_columns (Dict): Extra columns to include in the dataset.
+        copy_data (bool): Whether to copy the data. Defaults to True.
     """
     def __repr__(self):
@@ -66,6 +67,7 @@ class VMDataset(VMInput):
         text_column: str = None,
         extra_columns: dict = None,
         target_class_labels: dict = None,
+        copy_data: bool = True,
     ):
         """
         Initializes a VMDataset instance.
@@ -82,6 +84,7 @@ class VMDataset(VMInput):
             feature_columns (str, optional): The feature column names of the dataset. Defaults to None.
             text_column (str, optional): The text column name of the dataset for nlp tasks. Defaults to None.
             target_class_labels (Dict, optional): The class labels for the target columns. Defaults to None.
+            copy_data (bool, optional): Whether to copy the data. Defaults to True.
         """
         # initialize input_id
         self.input_id = input_id
@@ -112,6 +115,7 @@ class VMDataset(VMInput):
         self.target_class_labels = target_class_labels
         self.extra_columns = ExtraColumns.from_dict(extra_columns)
         self._set_feature_columns(feature_columns)
+        self._copy_data = copy_data
         if model:
             self.assign_predictions(model)
@@ -129,16 +133,19 @@ class VMDataset(VMInput):
             excluded = [self.target_column, *self.extra_columns.flatten()]
             self.feature_columns = [col for col in self.columns if col not in excluded]
-        self.feature_columns_numeric = (
-            self._df[self.feature_columns]
-            .select_dtypes(include=[np.number])
-            .columns.tolist()
-        )
-        self.feature_columns_categorical = (
-            self._df[self.feature_columns]
-            .select_dtypes(include=[object, pd.Categorical])
-            .columns.tolist()
-        )
+        # Get dtypes without loading data into memory
+        feature_dtypes = self._df[self.feature_columns].dtypes
+        self.feature_columns_numeric = feature_dtypes[
+            feature_dtypes.apply(lambda x: pd.api.types.is_numeric_dtype(x))
+        ].index.tolist()
+        self.feature_columns_categorical = feature_dtypes[
+            feature_dtypes.apply(
+                lambda x: pd.api.types.is_categorical_dtype(x)
+                or pd.api.types.is_object_dtype(x)
+            )
+        ].index.tolist()
     def _add_column(self, column_name, column_values):
         column_values = np.array(column_values)
@@ -397,8 +404,18 @@ class VMDataset(VMInput):
             assert self.target_column not in columns
             columns.append(self.target_column)
-        # return a copy to prevent accidental modification
-        return as_df(self._df[columns]).copy()
+        # Check if all columns in self._df are requested
+        all_columns = set(columns) == set(self._df.columns)
+        # For copy_data=False and all columns: return exact same DataFrame object
+        if not self._copy_data and all_columns:
+            return self._df
+        # For copy_data=False and subset of columns: return view with shared data
+        elif not self._copy_data:
+            return as_df(self._df[columns])
+        # For copy_data=True: return independent copy with duplicated data
+        else:
+            return as_df(self._df[columns]).copy()
     @property
     def x(self) -> np.ndarray:
@@ -522,9 +539,10 @@ class DataFrameDataset(VMDataset):
         text_column: str = None,
         target_class_labels: dict = None,
         date_time_index: bool = False,
+        copy_data: bool = True,
     ):
         """
-        Initializes a DataFrameDataset instance.
+        Initializes a DataFrameDataset instance, preserving original pandas dtypes.
         Args:
             raw_dataset (pd.DataFrame): The raw dataset as a pandas DataFrame.
@@ -536,25 +554,44 @@ class DataFrameDataset(VMDataset):
             text_column (str, optional): The text column name of the dataset for NLP tasks. Defaults to None.
             target_class_labels (dict, optional): The class labels for the target columns. Defaults to None.
             date_time_index (bool, optional): Whether to use date-time index. Defaults to False.
+            copy_data (bool, optional): Whether to create a copy of the input data. Defaults to True.
         """
+        VMInput.__init__(self)
+        self.input_id = input_id
         index = None
         if isinstance(raw_dataset.index, pd.Index):
             index = raw_dataset.index.values
+        self.index = index
-        super().__init__(
-            raw_dataset=raw_dataset.values,
-            input_id=input_id,
-            model=model,
-            index_name=raw_dataset.index.name,
-            index=index,
-            columns=raw_dataset.columns.to_list(),
-            target_column=target_column,
-            extra_columns=extra_columns,
-            feature_columns=feature_columns,
-            text_column=text_column,
-            target_class_labels=target_class_labels,
-            date_time_index=date_time_index,
-        )
+        # Store the DataFrame directly
+        self._df = raw_dataset
+        if date_time_index:
+            self._df = convert_index_to_datetime(self._df)
+        self.columns = raw_dataset.columns.tolist()
+        self.column_aliases = {}
+        self.target_column = target_column
+        self.text_column = text_column
+        self.target_class_labels = target_class_labels
+        self.extra_columns = ExtraColumns.from_dict(extra_columns)
+        self._copy_data = copy_data
+        # Add warning when copy_data is False
+        if not copy_data:
+            logger.warning(
+                "Dataset initialized with copy_data=False. Changes to the original DataFrame "
+                "may affect this dataset. Use this option only when memory efficiency is critical "
+                "and you won't modify the source data."
+            )
+        self._set_feature_columns(feature_columns)
+        if model:
+            self.assign_predictions(model)
 class PolarsDataset(VMDataset):

validmind/vm_models/result/__init__.py CHANGED Viewed

@@ -2,6 +2,20 @@
 # See the LICENSE file in the root of this repository for details.
 # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
-from .result import ErrorResult, RawData, Result, ResultTable, TestResult
+from .result import (
+    ErrorResult,
+    RawData,
+    Result,
+    ResultTable,
+    TestResult,
+    TextGenerationResult,
+)
-__all__ = ["ErrorResult", "RawData", "Result", "ResultTable", "TestResult"]
+__all__ = [
+    "ErrorResult",
+    "RawData",
+    "Result",
+    "ResultTable",
+    "TestResult",
+    "TextGenerationResult",
+]

validmind/vm_models/result/result.py CHANGED Viewed

@@ -129,6 +129,7 @@ class Result:
     result_id: str = None
     name: str = None
+    result_type: str = None
     def __str__(self) -> str:
         """May be overridden by subclasses."""
@@ -445,6 +446,7 @@ class TestResult(Result):
     async def log_async(
         self,
         section_id: str = None,
+        content_id: str = None,
         position: int = None,
         config: Dict[str, bool] = None,
     ):
@@ -464,7 +466,7 @@ class TestResult(Result):
                 )
             )
-        if self.tables or self.figures or self.description:
+        if self.tables:
             tasks.append(
                 api_client.alog_test_result(
                     result=self.serialize(),
@@ -473,30 +475,32 @@ class TestResult(Result):
                     config=config,
                 )
             )
+        if self.figures:
             tasks.extend(
                 [api_client.alog_figure(figure) for figure in (self.figures or [])]
             )
+        if self.description:
+            revision_name = (
+                AI_REVISION_NAME
+                if self._was_description_generated
+                else DEFAULT_REVISION_NAME
+            )
-            if self.description:
-                revision_name = (
-                    AI_REVISION_NAME
-                    if self._was_description_generated
-                    else DEFAULT_REVISION_NAME
-                )
-                tasks.append(
-                    update_metadata(
-                        content_id=f"test_description:{self.result_id}::{revision_name}",
-                        text=self.description,
-                    )
+            tasks.append(
+                update_metadata(
+                    content_id=f"{content_id}::{revision_name}"
+                    if content_id
+                    else f"test_description:{self.result_id}::{revision_name}",
+                    text=self.description,
                 )
+            )
         return await asyncio.gather(*tasks)
     def log(
         self,
         section_id: str = None,
+        content_id: str = None,
         position: int = None,
         unsafe: bool = False,
         config: Dict[str, bool] = None,
@@ -506,6 +510,7 @@ class TestResult(Result):
         Args:
             section_id (str): The section ID within the model document to insert the
                 test result.
+            content_id (str): The content ID to log the result to.
             position (int): The position (index) within the section to insert the test
                 result.
             unsafe (bool): If True, log the result even if it contains sensitive data
@@ -533,6 +538,7 @@ class TestResult(Result):
         run_async(
             self.log_async,
             section_id=section_id,
+            content_id=content_id,
             position=position,
             config=config,
         )
@@ -568,3 +574,110 @@ class TestResult(Result):
             raise InvalidParameterError(
                 f"Values for config keys must be boolean. Non-boolean values found for keys: {', '.join(non_bool_keys)}"
             )
+@dataclass
+class TextGenerationResult(Result):
+    """Test result."""
+    name: str = "Text Generation Result"
+    ref_id: str = None
+    title: Optional[str] = None
+    doc: Optional[str] = None
+    description: Optional[Union[str, DescriptionFuture]] = None
+    params: Optional[Dict[str, Any]] = None
+    metadata: Optional[Dict[str, Any]] = None
+    _was_description_generated: bool = False
+    def __post_init__(self):
+        if self.ref_id is None:
+            self.ref_id = str(uuid4())
+    def __repr__(self) -> str:
+        attrs = [
+            attr
+            for attr in [
+                "doc",
+                "description",
+                "params",
+            ]
+            if getattr(self, attr) is not None
+            and (
+                len(getattr(self, attr)) > 0
+                if isinstance(getattr(self, attr), list)
+                else True
+            )
+        ]
+        return f'TextGenerationResult("{self.result_id}", {", ".join(attrs)})'
+    def __getattribute__(self, name):
+        # lazy load description if its a DescriptionFuture (generated in background)
+        if name == "description":
+            description = super().__getattribute__("description")
+            if isinstance(description, DescriptionFuture):
+                self._was_description_generated = True
+                self.description = description.get_description()
+        return super().__getattribute__(name)
+    @property
+    def test_name(self) -> str:
+        """Get the test name, using custom title if available."""
+        return self.title or test_id_to_name(self.result_id)
+    def to_widget(self):
+        template_data = {
+            "test_name": self.test_name,
+            "description": self.description.replace("h3", "strong"),
+            "params": (
+                json.dumps(self.params, cls=NumpyEncoder, indent=2)
+                if self.params
+                else None
+            ),
+        }
+        rendered = get_result_template().render(**template_data)
+        widgets = [HTML(rendered)]
+        return VBox(widgets)
+    def serialize(self):
+        """Serialize the result for the API."""
+        return {
+            "test_name": self.result_id,
+            "title": self.title,
+            "ref_id": self.ref_id,
+            "params": self.params,
+            "metadata": self.metadata,
+        }
+    async def log_async(
+        self,
+        content_id: str = None,
+    ):
+        return await asyncio.gather(
+            update_metadata(
+                content_id=f"{content_id}",
+                text=self.description,
+            )
+        )
+    def log(
+        self,
+        content_id: str = None,
+    ):
+        """Log the result to ValidMind.
+        Args:
+            section_id (str): The section ID within the model document to insert the
+                test result.
+            content_id (str): The content ID to log the result to.
+            position (int): The position (index) within the section to insert the test
+                result.
+        """
+        run_async(
+            self.log_async,
+            content_id=content_id,
+        )

{validmind-2.8.20.dist-info → validmind-2.8.26.dist-info}/METADATA RENAMED Viewed

@@ -1,11 +1,11 @@
 Metadata-Version: 2.3
 Name: validmind
-Version: 2.8.20
+Version: 2.8.26
 Summary: ValidMind Library
 License: Commercial License
 Author: Andres Rodriguez
 Author-email: andres@validmind.ai
-Requires-Python: >=3.8.1,<3.12
+Requires-Python: >=3.9.0,<3.12
 Classifier: License :: Other/Proprietary License
 Classifier: Programming Language :: Python :: 3
 Classifier: Programming Language :: Python :: 3.9
@@ -22,6 +22,7 @@ Requires-Dist: bert-score (>=0.3.13)
 Requires-Dist: catboost
 Requires-Dist: datasets (>=2.10.0,<3.0.0)
 Requires-Dist: evaluate
+Requires-Dist: h11 (>=0.16.0)
 Requires-Dist: ipywidgets
 Requires-Dist: kaleido (>=0.2.1,!=0.2.1.post1)
 Requires-Dist: langchain-openai (>=0.1.8) ; extra == "all" or extra == "llm"
@@ -53,7 +54,7 @@ Requires-Dist: statsmodels
 Requires-Dist: tabulate (>=0.8.9,<0.9.0)
 Requires-Dist: textblob (>=0.18.0.post0,<0.19.0)
 Requires-Dist: tiktoken
-Requires-Dist: torch (>=1.10.0) ; extra == "all" or extra == "llm" or extra == "pytorch"
+Requires-Dist: torch (==2.7.0) ; extra == "all" or extra == "llm" or extra == "pytorch"
 Requires-Dist: tqdm
 Requires-Dist: transformers (>=4.32.0,<5.0.0) ; extra == "all" or extra == "huggingface" or extra == "llm"
 Requires-Dist: xgboost (>=1.5.2,<3)

validmind 2.8.20__py3-none-any.whl → 2.8.26__py3-none-any.whl

validmind 2.8.20py3-none-any.whl → 2.8.26py3-none-any.whl