PyPI - validmind - Versions diffs - 2.8.28__py3-none-any.whl → 2.9.1__py3-none-any.whl - Mend

validmind 2.8.28py3-none-any.whl → 2.9.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (233) hide show

validmind/__version__.py CHANGED Viewed

	@@ -1 +1 @@
1	- __version__ = "2.8.28"
1	+ __version__ = "2.9.1"

validmind/ai/utils.py CHANGED Viewed

@@ -3,9 +3,8 @@
 # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
 import os
-from urllib.parse import urljoin
-from openai import AzureOpenAI, Client, OpenAI
+from openai import AzureOpenAI, OpenAI
 from ..logging import get_logger
 from ..utils import md_to_html
@@ -83,28 +82,9 @@ def get_client_and_model():
         logger.debug(f"Using Azure OpenAI {__model} for generating descriptions")
     else:
-        try:
-            # TODO: fix circular import
-            from ..api_client import get_ai_key, get_api_host
-            response = get_ai_key()
-            __client = Client(
-                base_url=(
-                    # TODO: improve this to be a bit more dynamic
-                    "http://localhost:4000/genai"
-                    if "localhost" in get_api_host()
-                    else urljoin(get_api_host(), "/genai")
-                ),
-                api_key=response["key"],
-            )
-            __model = "gpt-4o"  # TODO: backend should tell us which model to use
-            logger.debug(f"Using ValidMind {__model} for generating descriptions")
-        except Exception as e:
-            logger.debug(f"Failed to get API key: {e}")
-            raise ValueError(
-                "OPENAI_API_KEY, AZURE_OPENAI_KEY must be set, or your account "
-                "must be setup to use ValidMind's LLM in order to use LLM features"
-            )
+        raise ValueError(
+            "OPENAI_API_KEY, AZURE_OPENAI_KEY must be setup to use LLM features"
+        )
     return __client, __model

validmind/api_client.py CHANGED Viewed

@@ -22,7 +22,7 @@ from ipywidgets import HTML, Accordion
 from .client_config import client_config
 from .errors import MissingAPICredentialsError, MissingModelIdError, raise_api_error
-from .logging import get_logger, init_sentry, send_single_error
+from .logging import get_logger, init_sentry, log_api_operation, send_single_error
 from .utils import NumpyEncoder, is_html, md_to_html, run_async
 from .vm_models import Figure
@@ -85,7 +85,7 @@ def _get_session() -> aiohttp.ClientSession:
     if not __api_session or __api_session.closed:
         __api_session = aiohttp.ClientSession(
             headers=_get_api_headers(),
-            timeout=aiohttp.ClientTimeout(total=30),
+            timeout=aiohttp.ClientTimeout(total=int(os.getenv("VM_API_TIMEOUT", 30))),
         )
     return __api_session
@@ -304,6 +304,10 @@ async def alog_metadata(
         raise e
+@log_api_operation(
+    operation_name="Sending figure to ValidMind API",
+    extract_key=lambda figure: figure.key,
+)
 async def alog_figure(figure: Figure) -> Dict[str, Any]:
     """Logs a figure.
@@ -525,21 +529,6 @@ def log_metric(
     )
-def get_ai_key() -> Dict[str, Any]:
-    """Calls the API to get an API key for our LLM proxy."""
-    r = requests.get(
-        url=_get_url("ai/key"),
-        headers=_get_api_headers(),
-    )
-    if r.status_code != 200:
-        # TODO: improve error handling when there's no Open AI API or AI key available
-        # logger.error("Could not get AI key from ValidMind API")
-        raise_api_error(r.text)
-    return r.json()
 def generate_test_result_description(test_result_data: Dict[str, Any]) -> str:
     r = requests.post(
         url=_get_url("ai/generate/test_result_description"),

validmind/logging.py CHANGED Viewed

@@ -170,6 +170,54 @@ async def log_performance_async(
     return wrap
+def log_api_operation(
+    operation_name: Optional[str] = None,
+    logger: Optional[logging.Logger] = None,
+    extract_key: Optional[Callable] = None,
+    force: bool = False,
+) -> Callable[[F], F]:
+    """Decorator to log API operations like figure uploads.
+    Args:
+        operation_name (str, optional): The name of the operation. Defaults to function name.
+        logger (logging.Logger, optional): The logger to use. Defaults to None.
+        extract_key (Callable, optional): Function to extract a key from args for logging.
+        force (bool, optional): Whether to force logging even if env var is off.
+    Returns:
+        Callable: The decorated function.
+    """
+    def decorator(func: F) -> F:
+        # check if log level is set to debug
+        if _get_log_level() != logging.DEBUG and not force:
+            return func
+        nonlocal logger
+        if logger is None:
+            logger = get_logger()
+        nonlocal operation_name
+        if operation_name is None:
+            operation_name = func.__name__
+        async def wrapped(*args: Any, **kwargs: Any) -> Any:
+            # Try to extract a meaningful identifier from the arguments
+            identifier = ""
+            if extract_key and args:
+                try:
+                    identifier = f": {extract_key(args[0])}"
+                except (AttributeError, IndexError):
+                    pass
+            logger.debug(f"{operation_name}{identifier}")
+            return await func(*args, **kwargs)
+        return wrapped
+    return decorator
 def send_single_error(error: Exception) -> None:
     """Send a single error to Sentry.

validmind/models/function.py CHANGED Viewed

@@ -35,7 +35,8 @@ class FunctionModel(VMModel):
     Attributes:
         predict_fn (callable): The predict function that should take a dictionary of
-            input features and return a prediction.
+            input features and return a prediction. Can return simple values or
+            dictionary objects.
         input_id (str, optional): The input ID for the model. Defaults to None.
         name (str, optional): The name of the model. Defaults to the name of the predict_fn.
         prompt (Prompt, optional): If using a prompt, the prompt object that defines the template
@@ -55,6 +56,13 @@ class FunctionModel(VMModel):
             X (pandas.DataFrame): The input features to predict on
         Returns:
-            List[Any]: The predictions
+            List[Any]: The predictions. Can contain simple values or dictionary objects
+                       depending on what the predict_fn returns.
         """
-        return [self.predict_fn(x) for x in X.to_dict(orient="records")]
+        predictions = []
+        for x in X.to_dict(orient="records"):
+            result = self.predict_fn(x)
+            # Handle both simple values and complex dictionary returns
+            predictions.append(result)
+        return predictions

validmind/tests/__init__.py CHANGED Viewed

@@ -43,6 +43,8 @@ __all__ = [
     "data_validation",
     "model_validation",
     "prompt_validation",
+    "plots",
+    "stats",
     "list_tests",
     "load_test",
     "describe_test",

validmind/tests/__types__.py CHANGED Viewed

@@ -187,6 +187,10 @@ TestID = Union[
         "validmind.ongoing_monitoring.ScoreBandsDrift",
         "validmind.ongoing_monitoring.ScorecardHistogramDrift",
         "validmind.ongoing_monitoring.TargetPredictionDistributionPlot",
+        "validmind.plots.BoxPlot",
+        "validmind.plots.CorrelationHeatmap",
+        "validmind.plots.HistogramPlot",
+        "validmind.plots.ViolinPlot",
         "validmind.prompt_validation.Bias",
         "validmind.prompt_validation.Clarity",
         "validmind.prompt_validation.Conciseness",
@@ -194,11 +198,25 @@ TestID = Union[
         "validmind.prompt_validation.NegativeInstruction",
         "validmind.prompt_validation.Robustness",
         "validmind.prompt_validation.Specificity",
+        "validmind.stats.CorrelationAnalysis",
+        "validmind.stats.DescriptiveStats",
+        "validmind.stats.NormalityTests",
+        "validmind.stats.OutlierDetection",
         "validmind.unit_metrics.classification.Accuracy",
         "validmind.unit_metrics.classification.F1",
         "validmind.unit_metrics.classification.Precision",
         "validmind.unit_metrics.classification.ROC_AUC",
         "validmind.unit_metrics.classification.Recall",
+        "validmind.unit_metrics.classification.individual.AbsoluteError",
+        "validmind.unit_metrics.classification.individual.BrierScore",
+        "validmind.unit_metrics.classification.individual.CalibrationError",
+        "validmind.unit_metrics.classification.individual.ClassBalance",
+        "validmind.unit_metrics.classification.individual.Confidence",
+        "validmind.unit_metrics.classification.individual.Correctness",
+        "validmind.unit_metrics.classification.individual.LogLoss",
+        "validmind.unit_metrics.classification.individual.OutlierScore",
+        "validmind.unit_metrics.classification.individual.ProbabilityError",
+        "validmind.unit_metrics.classification.individual.Uncertainty",
         "validmind.unit_metrics.regression.AdjustedRSquaredScore",
         "validmind.unit_metrics.regression.GiniCoefficient",
         "validmind.unit_metrics.regression.HuberLoss",

validmind/tests/data_validation/ACFandPACFPlot.py CHANGED Viewed

@@ -2,6 +2,8 @@
 # See the LICENSE file in the root of this repository for details.
 # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
+from typing import Tuple
 import pandas as pd
 import plotly.graph_objects as go
 from statsmodels.tsa.stattools import acf, pacf
@@ -12,7 +14,7 @@ from validmind.vm_models import VMDataset
 @tags("time_series_data", "forecasting", "statistical_test", "visualization")
 @tasks("regression")
-def ACFandPACFPlot(dataset: VMDataset):
+def ACFandPACFPlot(dataset: VMDataset) -> Tuple[go.Figure, RawData]:
     """
     Analyzes time series data using Autocorrelation Function (ACF) and Partial Autocorrelation Function (PACF) plots to
     reveal trends and correlations.

validmind/tests/data_validation/ADF.py CHANGED Viewed

@@ -2,6 +2,8 @@
 # See the LICENSE file in the root of this repository for details.
 # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
+from typing import Dict
 import pandas as pd
 from statsmodels.tsa.stattools import adfuller
@@ -16,7 +18,7 @@ logger = get_logger(__name__)
     "time_series_data", "statsmodels", "forecasting", "statistical_test", "stationarity"
 )
 @tasks("regression")
-def ADF(dataset: VMDataset):
+def ADF(dataset: VMDataset) -> Dict[str, pd.DataFrame]:
     """
     Assesses the stationarity of a time series dataset using the Augmented Dickey-Fuller (ADF) test.

validmind/tests/data_validation/AutoAR.py CHANGED Viewed

@@ -2,6 +2,8 @@
 # See the LICENSE file in the root of this repository for details.
 # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
+from typing import Dict
 import pandas as pd
 from statsmodels.tsa.ar_model import AutoReg
 from statsmodels.tsa.stattools import adfuller
@@ -15,7 +17,7 @@ logger = get_logger(__name__)
 @tags("time_series_data", "statsmodels", "forecasting", "statistical_test")
 @tasks("regression")
-def AutoAR(dataset: VMDataset, max_ar_order: int = 3):
+def AutoAR(dataset: VMDataset, max_ar_order: int = 3) -> Dict[str, pd.DataFrame]:
     """
     Automatically identifies the optimal Autoregressive (AR) order for a time series using BIC and AIC criteria.

validmind/tests/data_validation/AutoMA.py CHANGED Viewed

@@ -2,6 +2,8 @@
 # See the LICENSE file in the root of this repository for details.
 # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
+from typing import Dict, Tuple
 import pandas as pd
 from statsmodels.tsa.arima.model import ARIMA
 from statsmodels.tsa.stattools import adfuller
@@ -15,7 +17,9 @@ logger = get_logger(__name__)
 @tags("time_series_data", "statsmodels", "forecasting", "statistical_test")
 @tasks("regression")
-def AutoMA(dataset: VMDataset, max_ma_order: int = 3):
+def AutoMA(
+    dataset: VMDataset, max_ma_order: int = 3
+) -> Tuple[Dict[str, pd.DataFrame], RawData]:
     """
     Automatically selects the optimal Moving Average (MA) order for each variable in a time series dataset based on
     minimal BIC and AIC values.

validmind/tests/data_validation/AutoStationarity.py CHANGED Viewed

@@ -2,6 +2,8 @@
 # See the LICENSE file in the root of this repository for details.
 # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
+from typing import Dict
 import numpy as np
 import pandas as pd
 from statsmodels.tsa.stattools import adfuller
@@ -12,7 +14,9 @@ from validmind.vm_models import VMDataset
 @tags("time_series_data", "statsmodels", "forecasting", "statistical_test")
 @tasks("regression")
-def AutoStationarity(dataset: VMDataset, max_order: int = 5, threshold: float = 0.05):
+def AutoStationarity(
+    dataset: VMDataset, max_order: int = 5, threshold: float = 0.05
+) -> Dict[str, pd.DataFrame]:
     """
     Automates Augmented Dickey-Fuller test to assess stationarity across multiple time series in a DataFrame.

validmind/tests/data_validation/BivariateScatterPlots.py CHANGED Viewed

@@ -3,15 +3,17 @@
 # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
 import itertools
+from typing import Tuple
 import plotly.express as px
+import plotly.graph_objects as go
 from validmind import RawData, tags, tasks
 @tags("tabular_data", "numerical_data", "visualization")
 @tasks("classification")
-def BivariateScatterPlots(dataset):
+def BivariateScatterPlots(dataset) -> Tuple[go.Figure, RawData]:
     """
     Generates bivariate scatterplots to visually inspect relationships between pairs of numerical predictor variables
     in machine learning classification tasks.

validmind/tests/data_validation/BoxPierce.py CHANGED Viewed

@@ -2,6 +2,9 @@
 # See the LICENSE file in the root of this repository for details.
 # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
+from typing import Tuple
 import pandas as pd
 from statsmodels.stats.diagnostic import acorr_ljungbox
@@ -10,7 +13,7 @@ from validmind import RawData, tags, tasks
 @tasks("regression")
 @tags("time_series_data", "forecasting", "statistical_test", "statsmodels")
-def BoxPierce(dataset):
+def BoxPierce(dataset) -> Tuple[pd.DataFrame, RawData]:
     """
     Detects autocorrelation in time-series data through the Box-Pierce test to validate model performance.

validmind/tests/data_validation/ChiSquaredFeaturesTable.py CHANGED Viewed

@@ -12,7 +12,7 @@ from validmind.errors import SkipTestError
 @tags("tabular_data", "categorical_data", "statistical_test")
 @tasks("classification")
-def ChiSquaredFeaturesTable(dataset, p_threshold=0.05):
+def ChiSquaredFeaturesTable(dataset, p_threshold=0.05) -> pd.DataFrame:
     """
     Assesses the statistical association between categorical features and a target variable using the Chi-Squared test.

validmind/tests/data_validation/ClassImbalance.py CHANGED Viewed

@@ -20,7 +20,7 @@ from validmind.vm_models import VMDataset
 @tasks("classification")
 def ClassImbalance(
     dataset: VMDataset, min_percent_threshold: int = 10
-) -> Tuple[Dict[str, Any], go.Figure, bool]:
+) -> Tuple[Dict[str, Any], go.Figure, bool, RawData]:
     """
     Evaluates and quantifies class distribution imbalance in a dataset used by a machine learning model.

validmind/tests/data_validation/DatasetDescription.py CHANGED Viewed

@@ -4,6 +4,7 @@
 import re
 from collections import Counter
+from typing import Any, Dict, List, Tuple
 import numpy as np
@@ -142,7 +143,9 @@ def describe_column(df, column):
 @tags("tabular_data", "time_series_data", "text_data")
 @tasks("classification", "regression", "text_classification", "text_summarization")
-def DatasetDescription(dataset: VMDataset):
+def DatasetDescription(
+    dataset: VMDataset,
+) -> Tuple[Dict[str, List[Dict[str, Any]]], RawData]:
     """
     Provides comprehensive analysis and statistical summaries of each column in a machine learning model's dataset.

validmind/tests/data_validation/DatasetSplit.py CHANGED Viewed

@@ -2,7 +2,8 @@
 # See the LICENSE file in the root of this repository for details.
 # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
-from typing import List
+from typing import Any, Dict, List, Tuple
 from validmind import RawData, tags, tasks
 from validmind.vm_models import VMDataset
@@ -17,7 +18,7 @@ DATASET_LABELS = {
 @tags("tabular_data", "time_series_data", "text_data")
 @tasks("classification", "regression", "text_classification", "text_summarization")
-def DatasetSplit(datasets: List[VMDataset]):
+def DatasetSplit(datasets: List[VMDataset]) -> Tuple[List[Dict[str, Any]], RawData]:
     """
     Evaluates and visualizes the distribution proportions among training, testing, and validation datasets of an ML
     model.

validmind/tests/data_validation/DescriptiveStatistics.py CHANGED Viewed

@@ -2,6 +2,8 @@
 # See the LICENSE file in the root of this repository for details.
 # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
+from typing import Any, Dict
 import pandas as pd
 from validmind import tags, tasks
@@ -46,7 +48,7 @@ def get_summary_statistics_categorical(df, categorical_fields):
 @tags("tabular_data", "time_series_data", "data_quality")
 @tasks("classification", "regression")
-def DescriptiveStatistics(dataset: VMDataset):
+def DescriptiveStatistics(dataset: VMDataset) -> Dict[str, Any]:
     """
     Performs a detailed descriptive statistical analysis of both numerical and categorical data within a model's
     dataset.

validmind/tests/data_validation/DickeyFullerGLS.py CHANGED Viewed

@@ -2,6 +2,8 @@
 # See the LICENSE file in the root of this repository for details.
 # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
+from typing import Any, Dict, Tuple
 import pandas as pd
 from arch.unitroot import DFGLS
 from numpy.linalg import LinAlgError
@@ -16,7 +18,7 @@ logger = get_logger(__name__)
 @tags("time_series_data", "forecasting", "unit_root_test")
 @tasks("regression")
-def DickeyFullerGLS(dataset: VMDataset):
+def DickeyFullerGLS(dataset: VMDataset) -> Tuple[Dict[str, Any], RawData]:
     """
     Assesses stationarity in time series data using the Dickey-Fuller GLS test to determine the order of integration.

validmind/tests/data_validation/Duplicates.py CHANGED Viewed

@@ -2,6 +2,8 @@
 # See the LICENSE file in the root of this repository for details.
 # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
+from typing import Any, Dict, Tuple
 import pandas as pd
 from validmind import tags, tasks
@@ -9,7 +11,7 @@ from validmind import tags, tasks
 @tags("tabular_data", "data_quality", "text_data")
 @tasks("classification", "regression")
-def Duplicates(dataset, min_threshold=1):
+def Duplicates(dataset, min_threshold=1) -> Tuple[Dict[str, Any], bool]:
     """
     Tests dataset for duplicate entries, ensuring model reliability via data quality verification.

validmind/tests/data_validation/EngleGrangerCoint.py CHANGED Viewed

@@ -2,6 +2,9 @@
 # See the LICENSE file in the root of this repository for details.
 # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
+from typing import Dict
 import pandas as pd
 from statsmodels.tsa.stattools import coint
@@ -12,7 +15,9 @@ from validmind.vm_models import VMDataset
 @tags("time_series_data", "statistical_test", "forecasting")
 @tasks("regression")
-def EngleGrangerCoint(dataset: VMDataset, threshold: float = 0.05):
+def EngleGrangerCoint(
+    dataset: VMDataset, threshold: float = 0.05
+) -> Dict[str, pd.DataFrame]:
     """
     Assesses the degree of co-movement between pairs of time series data using the Engle-Granger cointegration test.

validmind/tests/data_validation/FeatureTargetCorrelationPlot.py CHANGED Viewed

@@ -3,6 +3,8 @@
 # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
+from typing import Tuple
 import numpy as np
 import plotly.graph_objects as go
@@ -11,7 +13,7 @@ from validmind import RawData, tags, tasks
 @tags("tabular_data", "visualization", "correlation")
 @tasks("classification", "regression")
-def FeatureTargetCorrelationPlot(dataset, fig_height=600):
+def FeatureTargetCorrelationPlot(dataset, fig_height=600) -> Tuple[go.Figure, RawData]:
     """
     Visualizes the correlation between input features and the model's target output in a color-coded horizontal bar
     plot.

validmind/tests/data_validation/HighCardinality.py CHANGED Viewed

@@ -2,6 +2,8 @@
 # See the LICENSE file in the root of this repository for details.
 # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
+from typing import Any, Dict, List, Tuple
 from validmind import RawData, tags, tasks
 from validmind.vm_models import VMDataset
@@ -13,7 +15,7 @@ def HighCardinality(
     num_threshold: int = 100,
     percent_threshold: float = 0.1,
     threshold_type: str = "percent",
-):
+) -> Tuple[List[Dict[str, Any]], bool, RawData]:
     """
     Assesses the number of unique values in categorical columns to detect high cardinality and potential overfitting.

validmind/tests/data_validation/HighPearsonCorrelation.py CHANGED Viewed

@@ -2,6 +2,9 @@
 # See the LICENSE file in the root of this repository for details.
 # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
+from typing import Any, Dict, List, Tuple
 from validmind import RawData, tags, tasks
 from validmind.vm_models import VMDataset
@@ -13,7 +16,7 @@ def HighPearsonCorrelation(
     max_threshold: float = 0.3,
     top_n_correlations: int = 10,
     feature_columns: list = None,
-):
+) -> Tuple[List[Dict[str, Any]], bool, RawData]:
     """
     Identifies highly correlated feature pairs in a dataset suggesting feature redundancy or multicollinearity.

validmind/tests/data_validation/IQROutliersBarPlot.py CHANGED Viewed

@@ -2,6 +2,9 @@
 # See the LICENSE file in the root of this repository for details.
 # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
+from typing import Tuple
 import plotly.graph_objects as go
 from validmind import RawData, tags, tasks
@@ -22,7 +25,7 @@ def compute_outliers(series, threshold):
 @tasks("classification", "regression")
 def IQROutliersBarPlot(
     dataset: VMDataset, threshold: float = 1.5, fig_width: int = 800
-):
+) -> Tuple[go.Figure, RawData]:
     """
     Visualizes outlier distribution across percentiles in numerical data using the Interquartile Range (IQR) method.

validmind/tests/data_validation/IQROutliersTable.py CHANGED Viewed

@@ -2,6 +2,9 @@
 # See the LICENSE file in the root of this repository for details.
 # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
+from typing import Any, Dict, Tuple
 from validmind import RawData, tags, tasks
 from validmind.vm_models import VMDataset
@@ -18,7 +21,9 @@ def compute_outliers(series, threshold=1.5):
 @tags("tabular_data", "numerical_data")
 @tasks("classification", "regression")
-def IQROutliersTable(dataset: VMDataset, threshold: float = 1.5):
+def IQROutliersTable(
+    dataset: VMDataset, threshold: float = 1.5
+) -> Tuple[Dict[str, Any], RawData]:
     """
     Determines and summarizes outliers in numerical features using the Interquartile Range method.

validmind/tests/data_validation/IsolationForestOutliers.py CHANGED Viewed

@@ -3,7 +3,9 @@
 # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
 import itertools
+from typing import Tuple
+import matplotlib.figure
 import matplotlib.pyplot as plt
 import seaborn as sns
 from sklearn.ensemble import IsolationForest
@@ -19,7 +21,7 @@ def IsolationForestOutliers(
     random_state: int = 0,
     contamination: float = 0.1,
     feature_columns: list = None,
-):
+) -> Tuple[matplotlib.figure.Figure, RawData]:
     """
     Detects outliers in a dataset using the Isolation Forest algorithm and visualizes results through scatter plots.

validmind/tests/data_validation/JarqueBera.py CHANGED Viewed

@@ -2,6 +2,8 @@
 # See the LICENSE file in the root of this repository for details.
 # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
+from typing import Tuple
 import pandas as pd
 from statsmodels.stats.stattools import jarque_bera
@@ -10,7 +12,7 @@ from validmind import RawData, tags, tasks
 @tasks("classification", "regression")
 @tags("tabular_data", "data_distribution", "statistical_test", "statsmodels")
-def JarqueBera(dataset):
+def JarqueBera(dataset) -> Tuple[pd.DataFrame, RawData]:
     """
     Assesses normality of dataset features in an ML model using the Jarque-Bera test.

validmind/tests/data_validation/KPSS.py CHANGED Viewed

@@ -2,6 +2,8 @@
 # See the LICENSE file in the root of this repository for details.
 # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
+from typing import Any, Dict
 import pandas as pd
 from statsmodels.tsa.stattools import kpss
@@ -15,7 +17,7 @@ logger = get_logger(__name__)
 @tags("time_series_data", "stationarity", "unit_root_test", "statsmodels")
 @tasks("data_validation")
-def KPSS(dataset: VMDataset):
+def KPSS(dataset: VMDataset) -> Dict[str, Any]:
     """
     Assesses the stationarity of time-series data in a machine learning model using the KPSS unit root test.

validmind 2.8.28__py3-none-any.whl → 2.9.1__py3-none-any.whl

validmind 2.8.28py3-none-any.whl → 2.9.1py3-none-any.whl