validmind 2.3.3__py3-none-any.whl → 2.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- validmind/__version__.py +1 -1
- validmind/ai/test_descriptions.py +8 -1
- validmind/ai/utils.py +2 -1
- validmind/client.py +1 -0
- validmind/datasets/regression/fred_timeseries.py +272 -0
- validmind/tests/__init__.py +14 -468
- validmind/tests/__types__.py +10 -0
- validmind/tests/_store.py +102 -0
- validmind/tests/data_validation/ACFandPACFPlot.py +7 -9
- validmind/tests/data_validation/ADF.py +8 -10
- validmind/tests/data_validation/ANOVAOneWayTable.py +8 -10
- validmind/tests/data_validation/AutoAR.py +2 -4
- validmind/tests/data_validation/AutoMA.py +2 -4
- validmind/tests/data_validation/AutoSeasonality.py +8 -10
- validmind/tests/data_validation/AutoStationarity.py +8 -10
- validmind/tests/data_validation/BivariateFeaturesBarPlots.py +8 -10
- validmind/tests/data_validation/BivariateHistograms.py +8 -10
- validmind/tests/data_validation/BivariateScatterPlots.py +8 -10
- validmind/tests/data_validation/ChiSquaredFeaturesTable.py +8 -10
- validmind/tests/data_validation/ClassImbalance.py +2 -4
- validmind/tests/data_validation/DFGLSArch.py +2 -4
- validmind/tests/data_validation/DatasetDescription.py +7 -9
- validmind/tests/data_validation/DatasetSplit.py +8 -9
- validmind/tests/data_validation/DescriptiveStatistics.py +2 -4
- validmind/tests/data_validation/Duplicates.py +2 -4
- validmind/tests/data_validation/EngleGrangerCoint.py +2 -4
- validmind/tests/data_validation/FeatureTargetCorrelationPlot.py +2 -4
- validmind/tests/data_validation/HeatmapFeatureCorrelations.py +2 -4
- validmind/tests/data_validation/HighCardinality.py +2 -4
- validmind/tests/data_validation/HighPearsonCorrelation.py +2 -4
- validmind/tests/data_validation/IQROutliersBarPlot.py +2 -4
- validmind/tests/data_validation/IQROutliersTable.py +2 -4
- validmind/tests/data_validation/IsolationForestOutliers.py +2 -4
- validmind/tests/data_validation/KPSS.py +8 -10
- validmind/tests/data_validation/LaggedCorrelationHeatmap.py +2 -4
- validmind/tests/data_validation/MissingValues.py +2 -4
- validmind/tests/data_validation/MissingValuesBarPlot.py +2 -4
- validmind/tests/data_validation/MissingValuesRisk.py +2 -4
- validmind/tests/data_validation/PearsonCorrelationMatrix.py +2 -4
- validmind/tests/data_validation/PhillipsPerronArch.py +7 -9
- validmind/tests/data_validation/RollingStatsPlot.py +2 -4
- validmind/tests/data_validation/ScatterPlot.py +2 -4
- validmind/tests/data_validation/SeasonalDecompose.py +70 -44
- validmind/tests/data_validation/Skewness.py +2 -4
- validmind/tests/data_validation/SpreadPlot.py +2 -4
- validmind/tests/data_validation/TabularCategoricalBarPlots.py +2 -4
- validmind/tests/data_validation/TabularDateTimeHistograms.py +2 -4
- validmind/tests/data_validation/TabularDescriptionTables.py +2 -4
- validmind/tests/data_validation/TabularNumericalHistograms.py +2 -4
- validmind/tests/data_validation/TargetRateBarPlots.py +2 -4
- validmind/tests/data_validation/TimeSeriesDescription.py +74 -0
- validmind/tests/data_validation/TimeSeriesDescriptiveStatistics.py +76 -0
- validmind/tests/data_validation/TimeSeriesFrequency.py +2 -4
- validmind/tests/data_validation/TimeSeriesHistogram.py +29 -45
- validmind/tests/data_validation/TimeSeriesLinePlot.py +2 -4
- validmind/tests/data_validation/TimeSeriesMissingValues.py +2 -4
- validmind/tests/data_validation/TimeSeriesOutliers.py +32 -45
- validmind/tests/data_validation/TooManyZeroValues.py +2 -4
- validmind/tests/data_validation/UniqueRows.py +2 -4
- validmind/tests/data_validation/WOEBinPlots.py +2 -4
- validmind/tests/data_validation/WOEBinTable.py +2 -4
- validmind/tests/data_validation/ZivotAndrewsArch.py +2 -4
- validmind/tests/data_validation/nlp/CommonWords.py +2 -4
- validmind/tests/data_validation/nlp/Hashtags.py +2 -4
- validmind/tests/data_validation/nlp/Mentions.py +2 -4
- validmind/tests/data_validation/nlp/Punctuations.py +2 -4
- validmind/tests/data_validation/nlp/StopWords.py +2 -4
- validmind/tests/data_validation/nlp/TextDescription.py +2 -4
- validmind/tests/decorator.py +10 -8
- validmind/tests/load.py +264 -0
- validmind/tests/metadata.py +59 -0
- validmind/tests/model_validation/ClusterSizeDistribution.py +5 -7
- validmind/tests/model_validation/FeaturesAUC.py +6 -8
- validmind/tests/model_validation/ModelMetadata.py +8 -9
- validmind/tests/model_validation/ModelMetadataComparison.py +59 -0
- validmind/tests/model_validation/ModelPredictionResiduals.py +103 -0
- validmind/tests/model_validation/RegressionResidualsPlot.py +2 -6
- validmind/tests/model_validation/TimeSeriesPredictionWithCI.py +131 -0
- validmind/tests/model_validation/TimeSeriesPredictionsPlot.py +76 -0
- validmind/tests/model_validation/TimeSeriesR2SquareBySegments.py +103 -0
- validmind/tests/model_validation/embeddings/ClusterDistribution.py +2 -4
- validmind/tests/model_validation/embeddings/CosineSimilarityDistribution.py +2 -4
- validmind/tests/model_validation/embeddings/DescriptiveAnalytics.py +2 -4
- validmind/tests/model_validation/embeddings/EmbeddingsVisualization2D.py +2 -4
- validmind/tests/model_validation/embeddings/StabilityAnalysis.py +2 -4
- validmind/tests/model_validation/sklearn/AdjustedMutualInformation.py +5 -7
- validmind/tests/model_validation/sklearn/AdjustedRandIndex.py +5 -7
- validmind/tests/model_validation/sklearn/ClassifierPerformance.py +7 -9
- validmind/tests/model_validation/sklearn/ClusterCosineSimilarity.py +5 -7
- validmind/tests/model_validation/sklearn/ClusterPerformance.py +5 -7
- validmind/tests/model_validation/sklearn/ClusterPerformanceMetrics.py +2 -7
- validmind/tests/model_validation/sklearn/CompletenessScore.py +5 -7
- validmind/tests/model_validation/sklearn/ConfusionMatrix.py +19 -10
- validmind/tests/model_validation/sklearn/FeatureImportanceComparison.py +83 -0
- validmind/tests/model_validation/sklearn/FowlkesMallowsScore.py +5 -7
- validmind/tests/model_validation/sklearn/HomogeneityScore.py +5 -7
- validmind/tests/model_validation/sklearn/HyperParametersTuning.py +2 -7
- validmind/tests/model_validation/sklearn/KMeansClustersOptimization.py +4 -7
- validmind/tests/model_validation/sklearn/MinimumAccuracy.py +7 -9
- validmind/tests/model_validation/sklearn/MinimumF1Score.py +7 -9
- validmind/tests/model_validation/sklearn/MinimumROCAUCScore.py +7 -9
- validmind/tests/model_validation/sklearn/ModelsPerformanceComparison.py +8 -10
- validmind/tests/model_validation/sklearn/OverfitDiagnosis.py +7 -9
- validmind/tests/model_validation/sklearn/PermutationFeatureImportance.py +9 -11
- validmind/tests/model_validation/sklearn/PopulationStabilityIndex.py +7 -9
- validmind/tests/model_validation/sklearn/PrecisionRecallCurve.py +8 -10
- validmind/tests/model_validation/sklearn/ROCCurve.py +10 -11
- validmind/tests/model_validation/sklearn/RegressionErrors.py +5 -7
- validmind/tests/model_validation/sklearn/RegressionErrorsComparison.py +76 -0
- validmind/tests/model_validation/sklearn/RegressionModelsPerformanceComparison.py +5 -7
- validmind/tests/model_validation/sklearn/RegressionR2Square.py +5 -7
- validmind/tests/model_validation/sklearn/RegressionR2SquareComparison.py +63 -0
- validmind/tests/model_validation/sklearn/RobustnessDiagnosis.py +10 -14
- validmind/tests/model_validation/sklearn/SHAPGlobalImportance.py +8 -10
- validmind/tests/model_validation/sklearn/SilhouettePlot.py +5 -7
- validmind/tests/model_validation/sklearn/TrainingTestDegradation.py +8 -10
- validmind/tests/model_validation/sklearn/VMeasure.py +5 -7
- validmind/tests/model_validation/sklearn/WeakspotsDiagnosis.py +8 -10
- validmind/tests/model_validation/statsmodels/AutoARIMA.py +2 -4
- validmind/tests/model_validation/statsmodels/BoxPierce.py +2 -4
- validmind/tests/model_validation/statsmodels/CumulativePredictionProbabilities.py +3 -4
- validmind/tests/model_validation/statsmodels/DurbinWatsonTest.py +2 -4
- validmind/tests/model_validation/statsmodels/GINITable.py +2 -4
- validmind/tests/model_validation/statsmodels/JarqueBera.py +7 -9
- validmind/tests/model_validation/statsmodels/KolmogorovSmirnov.py +7 -9
- validmind/tests/model_validation/statsmodels/LJungBox.py +2 -4
- validmind/tests/model_validation/statsmodels/Lilliefors.py +7 -9
- validmind/tests/model_validation/statsmodels/PredictionProbabilitiesHistogram.py +2 -4
- validmind/tests/model_validation/statsmodels/RegressionCoeffsPlot.py +2 -4
- validmind/tests/model_validation/statsmodels/RegressionFeatureSignificance.py +7 -9
- validmind/tests/model_validation/statsmodels/RegressionModelForecastPlot.py +2 -4
- validmind/tests/model_validation/statsmodels/RegressionModelForecastPlotLevels.py +2 -4
- validmind/tests/model_validation/statsmodels/RegressionModelSensitivityPlot.py +2 -4
- validmind/tests/model_validation/statsmodels/RegressionModelSummary.py +2 -4
- validmind/tests/model_validation/statsmodels/RegressionModelsCoeffs.py +2 -4
- validmind/tests/model_validation/statsmodels/RegressionPermutationFeatureImportance.py +6 -8
- validmind/tests/model_validation/statsmodels/RunsTest.py +2 -4
- validmind/tests/model_validation/statsmodels/ScorecardHistogram.py +3 -4
- validmind/tests/model_validation/statsmodels/ShapiroWilk.py +2 -4
- validmind/tests/prompt_validation/Bias.py +2 -4
- validmind/tests/prompt_validation/Clarity.py +2 -4
- validmind/tests/prompt_validation/Conciseness.py +2 -4
- validmind/tests/prompt_validation/Delimitation.py +2 -4
- validmind/tests/prompt_validation/NegativeInstruction.py +2 -4
- validmind/tests/prompt_validation/Robustness.py +2 -4
- validmind/tests/prompt_validation/Specificity.py +2 -4
- validmind/tests/run.py +394 -0
- validmind/tests/test_providers.py +12 -0
- validmind/tests/utils.py +16 -0
- validmind/unit_metrics/__init__.py +12 -4
- validmind/unit_metrics/composite.py +3 -0
- validmind/vm_models/test/metric.py +8 -5
- validmind/vm_models/test/result_wrapper.py +2 -1
- validmind/vm_models/test/test.py +14 -11
- validmind/vm_models/test/threshold_test.py +1 -0
- validmind/vm_models/test_suite/runner.py +1 -0
- {validmind-2.3.3.dist-info → validmind-2.4.0.dist-info}/METADATA +70 -36
- {validmind-2.3.3.dist-info → validmind-2.4.0.dist-info}/RECORD +162 -146
- /validmind/datasets/regression/datasets/{lending_club_loan_rates.csv → leanding_club_loan_rates.csv} +0 -0
- {validmind-2.3.3.dist-info → validmind-2.4.0.dist-info}/LICENSE +0 -0
- {validmind-2.3.3.dist-info → validmind-2.4.0.dist-info}/WHEEL +0 -0
- {validmind-2.3.3.dist-info → validmind-2.4.0.dist-info}/entry_points.txt +0 -0
@@ -51,13 +51,11 @@ class ClusterSizeDistribution(Metric):
|
|
51
51
|
|
52
52
|
name = "cluster_size_distribution"
|
53
53
|
required_inputs = ["model", "dataset"]
|
54
|
-
|
55
|
-
|
56
|
-
"
|
57
|
-
|
58
|
-
|
59
|
-
],
|
60
|
-
}
|
54
|
+
tasks = ["clustering"]
|
55
|
+
tags = [
|
56
|
+
"sklearn",
|
57
|
+
"model_performance",
|
58
|
+
]
|
61
59
|
|
62
60
|
def run(self):
|
63
61
|
y_true_train = self.inputs.dataset.y
|
@@ -45,14 +45,12 @@ class FeaturesAUC(Metric):
|
|
45
45
|
"fontsize": 12,
|
46
46
|
"figure_height": 500,
|
47
47
|
}
|
48
|
-
|
49
|
-
|
50
|
-
"
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
],
|
55
|
-
}
|
48
|
+
tasks = ["classification"]
|
49
|
+
tags = [
|
50
|
+
"feature_importance",
|
51
|
+
"AUC",
|
52
|
+
"visualization",
|
53
|
+
]
|
56
54
|
|
57
55
|
def run(self):
|
58
56
|
dataset = self.inputs.dataset
|
@@ -53,15 +53,14 @@ class ModelMetadata(Metric):
|
|
53
53
|
|
54
54
|
name = "model_metadata"
|
55
55
|
required_inputs = ["model"]
|
56
|
-
|
57
|
-
"
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
}
|
56
|
+
tasks = [
|
57
|
+
"classification",
|
58
|
+
"regression",
|
59
|
+
"text_classification",
|
60
|
+
"text_summarization",
|
61
|
+
]
|
62
|
+
|
63
|
+
tags = ["model_metadata"]
|
65
64
|
|
66
65
|
column_labels = {
|
67
66
|
"architecture": "Modeling Technique",
|
@@ -0,0 +1,59 @@
|
|
1
|
+
# Copyright © 2023-2024 ValidMind Inc. All rights reserved.
|
2
|
+
# See the LICENSE file in the root of this repository for details.
|
3
|
+
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
|
+
|
5
|
+
import pandas as pd
|
6
|
+
|
7
|
+
from validmind import tags, tasks
|
8
|
+
from validmind.utils import get_model_info
|
9
|
+
|
10
|
+
|
11
|
+
@tags("model_training", "metadata")
|
12
|
+
@tasks("regression", "time_series_forecasting")
|
13
|
+
def ModelMetadataComparison(models):
|
14
|
+
"""
|
15
|
+
Compare metadata of different models and generate a summary table with the results.
|
16
|
+
|
17
|
+
**Purpose**: The purpose of this function is to compare the metadata of different models, including information about their architecture, framework, framework version, and programming language.
|
18
|
+
|
19
|
+
**Test Mechanism**: The function retrieves the metadata for each model using `get_model_info`, renames columns according to a predefined set of labels, and compiles this information into a summary table.
|
20
|
+
|
21
|
+
**Signs of High Risk**:
|
22
|
+
- Inconsistent or missing metadata across models can indicate potential issues in model documentation or management.
|
23
|
+
- Significant differences in framework versions or programming languages might pose challenges in model integration and deployment.
|
24
|
+
|
25
|
+
**Strengths**:
|
26
|
+
- Provides a clear comparison of essential model metadata.
|
27
|
+
- Standardizes metadata labels for easier interpretation and comparison.
|
28
|
+
- Helps identify potential compatibility or consistency issues across models.
|
29
|
+
|
30
|
+
**Limitations**:
|
31
|
+
- Assumes that the `get_model_info` function returns all necessary metadata fields.
|
32
|
+
- Relies on the correctness and completeness of the metadata provided by each model.
|
33
|
+
- Does not include detailed parameter information, focusing instead on high-level metadata.
|
34
|
+
"""
|
35
|
+
column_labels = {
|
36
|
+
"architecture": "Modeling Technique",
|
37
|
+
"framework": "Modeling Framework",
|
38
|
+
"framework_version": "Framework Version",
|
39
|
+
"language": "Programming Language",
|
40
|
+
}
|
41
|
+
|
42
|
+
description = []
|
43
|
+
|
44
|
+
for model in models:
|
45
|
+
model_info = get_model_info(model)
|
46
|
+
|
47
|
+
# Rename columns based on provided labels
|
48
|
+
model_info_renamed = {
|
49
|
+
column_labels.get(k, k): v for k, v in model_info.items() if k != "params"
|
50
|
+
}
|
51
|
+
|
52
|
+
# Add model name or identifier if available
|
53
|
+
model_info_renamed = {"Model Name": model.input_id, **model_info_renamed}
|
54
|
+
|
55
|
+
description.append(model_info_renamed)
|
56
|
+
|
57
|
+
description_df = pd.DataFrame(description)
|
58
|
+
|
59
|
+
return description_df
|
@@ -0,0 +1,103 @@
|
|
1
|
+
# Copyright © 2023-2024 ValidMind Inc. All rights reserved.
|
2
|
+
# See the LICENSE file in the root of this repository for details.
|
3
|
+
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
|
+
|
5
|
+
import pandas as pd
|
6
|
+
import plotly.graph_objects as go
|
7
|
+
from scipy.stats import kstest
|
8
|
+
|
9
|
+
from validmind import tags, tasks
|
10
|
+
|
11
|
+
|
12
|
+
@tags("regression")
|
13
|
+
@tasks("residual_analysis", "visualization")
|
14
|
+
def ModelPredictionResiduals(
|
15
|
+
datasets, models, nbins=100, p_value_threshold=0.05, start_date=None, end_date=None
|
16
|
+
):
|
17
|
+
"""
|
18
|
+
Plot the residuals and histograms for each model, and generate a summary table
|
19
|
+
with the Kolmogorov-Smirnov normality test results.
|
20
|
+
|
21
|
+
**Purpose**: The purpose of this function is to visualize the residuals of model predictions and
|
22
|
+
assess the normality of residuals using the Kolmogorov-Smirnov test.
|
23
|
+
|
24
|
+
**Test Mechanism**: The function iterates through each dataset-model pair, calculates residuals, and generates
|
25
|
+
two figures for each model: one for the time series of residuals and one for the histogram of residuals.
|
26
|
+
It also calculates the KS test for normality and summarizes the results in a table.
|
27
|
+
|
28
|
+
**Signs of High Risk**:
|
29
|
+
- If the residuals are not normally distributed, it could indicate issues with model assumptions.
|
30
|
+
- High skewness or kurtosis in the residuals may indicate model misspecification.
|
31
|
+
|
32
|
+
**Strengths**:
|
33
|
+
- Provides a clear visualization of residuals over time and their distribution.
|
34
|
+
- Includes statistical tests to assess the normality of residuals.
|
35
|
+
|
36
|
+
**Limitations**:
|
37
|
+
- Assumes that the dataset is provided as a DataFrameDataset object with a .df attribute to access
|
38
|
+
the pandas DataFrame.
|
39
|
+
- Only generates plots for datasets with a datetime index, and will raise an error for other types of indices.
|
40
|
+
"""
|
41
|
+
|
42
|
+
figures = []
|
43
|
+
summary = []
|
44
|
+
|
45
|
+
for dataset, model in zip(datasets, models):
|
46
|
+
df = dataset.df.copy()
|
47
|
+
|
48
|
+
# Filter DataFrame by date range if specified
|
49
|
+
if start_date:
|
50
|
+
df = df[df.index >= pd.to_datetime(start_date)]
|
51
|
+
if end_date:
|
52
|
+
df = df[df.index <= pd.to_datetime(end_date)]
|
53
|
+
|
54
|
+
y_true = dataset.y
|
55
|
+
y_pred = dataset.y_pred(model)
|
56
|
+
residuals = y_true - y_pred
|
57
|
+
|
58
|
+
# Plot residuals
|
59
|
+
residuals_fig = go.Figure()
|
60
|
+
residuals_fig.add_trace(
|
61
|
+
go.Scatter(x=df.index, y=residuals, mode="lines", name="Residuals")
|
62
|
+
)
|
63
|
+
residuals_fig.update_layout(
|
64
|
+
title=f"Residuals for {model.input_id}",
|
65
|
+
xaxis_title="Date",
|
66
|
+
yaxis_title="Residuals",
|
67
|
+
font=dict(size=16),
|
68
|
+
showlegend=False,
|
69
|
+
)
|
70
|
+
figures.append(residuals_fig)
|
71
|
+
|
72
|
+
# Plot histogram of residuals
|
73
|
+
hist_fig = go.Figure()
|
74
|
+
hist_fig.add_trace(go.Histogram(x=residuals, nbinsx=nbins, name="Residuals"))
|
75
|
+
hist_fig.update_layout(
|
76
|
+
title=f"Histogram of Residuals for {model.input_id}",
|
77
|
+
xaxis_title="Residuals",
|
78
|
+
yaxis_title="Frequency",
|
79
|
+
font=dict(size=16),
|
80
|
+
showlegend=False,
|
81
|
+
)
|
82
|
+
figures.append(hist_fig)
|
83
|
+
|
84
|
+
# Perform KS normality test
|
85
|
+
ks_stat, p_value = kstest(
|
86
|
+
residuals, "norm", args=(residuals.mean(), residuals.std())
|
87
|
+
)
|
88
|
+
ks_normality = "Normal" if p_value > p_value_threshold else "Not Normal"
|
89
|
+
|
90
|
+
summary.append(
|
91
|
+
{
|
92
|
+
"Model": model.input_id,
|
93
|
+
"KS Statistic": ks_stat,
|
94
|
+
"p-value": p_value,
|
95
|
+
"KS Normality": ks_normality,
|
96
|
+
"p-value Threshold": p_value_threshold,
|
97
|
+
}
|
98
|
+
)
|
99
|
+
|
100
|
+
# Create a summary DataFrame for the KS normality test results
|
101
|
+
summary_df = pd.DataFrame(summary)
|
102
|
+
|
103
|
+
return (summary_df, *figures)
|
@@ -52,12 +52,8 @@ class RegressionResidualsPlot(Metric):
|
|
52
52
|
|
53
53
|
name = "regression_residuals_plot"
|
54
54
|
required_inputs = ["model", "dataset"]
|
55
|
-
|
56
|
-
|
57
|
-
"tags": [
|
58
|
-
"model_performance",
|
59
|
-
],
|
60
|
-
}
|
55
|
+
tasks = ["regression"]
|
56
|
+
tags = ["model_performance"]
|
61
57
|
default_params = {"bin_size": 0.1}
|
62
58
|
|
63
59
|
def run(self):
|
@@ -0,0 +1,131 @@
|
|
1
|
+
# Copyright © 2023-2024 ValidMind Inc. All rights reserved.
|
2
|
+
# See the LICENSE file in the root of this repository for details.
|
3
|
+
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
|
+
|
5
|
+
import numpy as np
|
6
|
+
import pandas as pd
|
7
|
+
import plotly.graph_objects as go
|
8
|
+
from scipy.stats import norm
|
9
|
+
|
10
|
+
from validmind import tags, tasks
|
11
|
+
|
12
|
+
|
13
|
+
@tags("model_predictions", "visualization")
|
14
|
+
@tasks("regression", "time_series_forecasting")
|
15
|
+
def TimeSeriesPredictionWithCI(dataset, model, confidence=0.95):
|
16
|
+
"""
|
17
|
+
Plot actual vs predicted values for a time series with confidence intervals and compute breaches.
|
18
|
+
|
19
|
+
**Purpose**: The purpose of this function is to visualize the actual versus predicted values for time series data, including confidence intervals, and to compute and report the number of breaches beyond these intervals.
|
20
|
+
|
21
|
+
**Test Mechanism**: The function calculates the standard deviation of prediction errors, determines the confidence intervals, and counts the number of actual values that fall outside these intervals (breaches). It then generates a plot with the actual values, predicted values, and confidence intervals, and returns a DataFrame summarizing the breach information.
|
22
|
+
|
23
|
+
**Signs of High Risk**:
|
24
|
+
- A high number of breaches indicates that the model's predictions are not reliable within the specified confidence level.
|
25
|
+
- Significant deviations between actual and predicted values may highlight model inadequacies or issues with data quality.
|
26
|
+
|
27
|
+
**Strengths**:
|
28
|
+
- Provides a visual representation of prediction accuracy and the uncertainty around predictions.
|
29
|
+
- Includes a statistical measure of prediction reliability through confidence intervals.
|
30
|
+
- Computes and reports breaches, offering a quantitative assessment of prediction performance.
|
31
|
+
|
32
|
+
**Limitations**:
|
33
|
+
- Assumes that the dataset is provided as a DataFrameDataset object with a datetime index.
|
34
|
+
- Requires that `dataset.y_pred(model)` returns the predicted values for the model.
|
35
|
+
- The calculation of confidence intervals assumes normally distributed errors, which may not hold for all datasets.
|
36
|
+
"""
|
37
|
+
dataset_name = dataset.input_id
|
38
|
+
model_name = model.input_id
|
39
|
+
time_index = dataset.df.index # Assuming the index of the dataset is datetime
|
40
|
+
|
41
|
+
# Get actual and predicted values
|
42
|
+
y_true = dataset.y
|
43
|
+
y_pred = dataset.y_pred(model)
|
44
|
+
|
45
|
+
# Compute the standard deviation of the errors
|
46
|
+
errors = y_true - y_pred
|
47
|
+
std_error = np.std(errors)
|
48
|
+
|
49
|
+
# Compute z-score for the given confidence level
|
50
|
+
z_score = norm.ppf(1 - (1 - confidence) / 2)
|
51
|
+
|
52
|
+
# Compute confidence intervals
|
53
|
+
lower_conf = y_pred - z_score * std_error
|
54
|
+
upper_conf = y_pred + z_score * std_error
|
55
|
+
|
56
|
+
# Calculate breaches
|
57
|
+
upper_breaches = (y_true > upper_conf).sum()
|
58
|
+
lower_breaches = (y_true < lower_conf).sum()
|
59
|
+
total_breaches = upper_breaches + lower_breaches
|
60
|
+
|
61
|
+
# Create DataFrame
|
62
|
+
breaches_df = pd.DataFrame(
|
63
|
+
{
|
64
|
+
"Confidence Level": [confidence],
|
65
|
+
"Total Breaches": [total_breaches],
|
66
|
+
"Upper Breaches": [upper_breaches],
|
67
|
+
"Lower Breaches": [lower_breaches],
|
68
|
+
}
|
69
|
+
)
|
70
|
+
|
71
|
+
# Plotting
|
72
|
+
fig = go.Figure()
|
73
|
+
|
74
|
+
# Plot actual values
|
75
|
+
fig.add_trace(
|
76
|
+
go.Scatter(
|
77
|
+
x=time_index,
|
78
|
+
y=y_true,
|
79
|
+
mode="lines",
|
80
|
+
name="Actual Values",
|
81
|
+
line=dict(color="blue"),
|
82
|
+
)
|
83
|
+
)
|
84
|
+
|
85
|
+
# Plot predicted values
|
86
|
+
fig.add_trace(
|
87
|
+
go.Scatter(
|
88
|
+
x=time_index,
|
89
|
+
y=y_pred,
|
90
|
+
mode="lines",
|
91
|
+
name=f"Predicted by {model_name}",
|
92
|
+
line=dict(color="red"),
|
93
|
+
)
|
94
|
+
)
|
95
|
+
|
96
|
+
# Add confidence interval lower bound as an invisible line
|
97
|
+
fig.add_trace(
|
98
|
+
go.Scatter(
|
99
|
+
x=time_index,
|
100
|
+
y=lower_conf,
|
101
|
+
mode="lines",
|
102
|
+
line=dict(width=0),
|
103
|
+
showlegend=False,
|
104
|
+
name="CI Lower",
|
105
|
+
)
|
106
|
+
)
|
107
|
+
|
108
|
+
# Add confidence interval upper bound and fill area
|
109
|
+
fig.add_trace(
|
110
|
+
go.Scatter(
|
111
|
+
x=time_index,
|
112
|
+
y=upper_conf,
|
113
|
+
mode="lines",
|
114
|
+
fill="tonexty",
|
115
|
+
fillcolor="rgba(200, 200, 200, 0.5)",
|
116
|
+
line=dict(width=0),
|
117
|
+
showlegend=True,
|
118
|
+
name="Confidence Interval",
|
119
|
+
)
|
120
|
+
)
|
121
|
+
|
122
|
+
# Update layout
|
123
|
+
fig.update_layout(
|
124
|
+
title=f"Time Series Actual vs Predicted Values for {dataset_name} and {model_name}",
|
125
|
+
xaxis_title="Time",
|
126
|
+
yaxis_title="Values",
|
127
|
+
legend_title="Legend",
|
128
|
+
template="plotly_white",
|
129
|
+
)
|
130
|
+
|
131
|
+
return fig, breaches_df
|
@@ -0,0 +1,76 @@
|
|
1
|
+
# Copyright © 2023-2024 ValidMind Inc. All rights reserved.
|
2
|
+
# See the LICENSE file in the root of this repository for details.
|
3
|
+
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
|
+
|
5
|
+
import plotly.express as px
|
6
|
+
import plotly.graph_objects as go
|
7
|
+
|
8
|
+
from validmind import tags, tasks
|
9
|
+
|
10
|
+
|
11
|
+
@tags("model_predictions", "visualization")
|
12
|
+
@tasks("regression", "time_series_forecasting")
|
13
|
+
def TimeSeriesPredictionsPlot(datasets, models):
|
14
|
+
"""
|
15
|
+
Plot actual vs predicted values for time series data and generate a visual comparison for each model.
|
16
|
+
|
17
|
+
**Purpose**: The purpose of this function is to visualize the actual versus predicted values for time series data across different models.
|
18
|
+
|
19
|
+
**Test Mechanism**: The function iterates through each dataset-model pair, plots the actual values from the dataset, and overlays the predicted values from each model using Plotly for interactive visualization.
|
20
|
+
|
21
|
+
**Signs of High Risk**:
|
22
|
+
- Large discrepancies between actual and predicted values indicate poor model performance.
|
23
|
+
- Systematic deviations in predicted values can highlight model bias or issues with data patterns.
|
24
|
+
|
25
|
+
**Strengths**:
|
26
|
+
- Provides a clear visual comparison of model predictions against actual values.
|
27
|
+
- Uses Plotly for interactive and visually appealing plots.
|
28
|
+
- Can handle multiple models and datasets, displaying them with distinct colors.
|
29
|
+
|
30
|
+
**Limitations**:
|
31
|
+
- Assumes that the dataset is provided as a DataFrameDataset object with a datetime index.
|
32
|
+
- Requires that `dataset.y_pred(model)` returns the predicted values for the model.
|
33
|
+
- Visualization might become cluttered with a large number of models or datasets.
|
34
|
+
"""
|
35
|
+
fig = go.Figure()
|
36
|
+
|
37
|
+
# Use Plotly's color sequence for different model predictions
|
38
|
+
colors = px.colors.qualitative.Plotly
|
39
|
+
|
40
|
+
# Plot actual values from the first dataset
|
41
|
+
dataset = datasets[0]
|
42
|
+
time_index = dataset.df.index # Assuming the index of the dataset is datetime
|
43
|
+
fig.add_trace(
|
44
|
+
go.Scatter(
|
45
|
+
x=time_index,
|
46
|
+
y=dataset.y,
|
47
|
+
mode="lines",
|
48
|
+
name="Actual Values",
|
49
|
+
line=dict(color="blue"),
|
50
|
+
)
|
51
|
+
)
|
52
|
+
|
53
|
+
# Plot predicted values for each dataset-model pair
|
54
|
+
for idx, (dataset, model) in enumerate(zip(datasets, models)):
|
55
|
+
model_name = model.input_id
|
56
|
+
y_pred = dataset.y_pred(model)
|
57
|
+
fig.add_trace(
|
58
|
+
go.Scatter(
|
59
|
+
x=time_index,
|
60
|
+
y=y_pred,
|
61
|
+
mode="lines",
|
62
|
+
name=f"Predicted by {model_name}",
|
63
|
+
line=dict(color=colors[idx % len(colors)]),
|
64
|
+
)
|
65
|
+
)
|
66
|
+
|
67
|
+
# Update layout
|
68
|
+
fig.update_layout(
|
69
|
+
title="Time Series Actual vs Predicted Values",
|
70
|
+
xaxis_title="Time",
|
71
|
+
yaxis_title="Values",
|
72
|
+
legend_title="Legend",
|
73
|
+
template="plotly_white",
|
74
|
+
)
|
75
|
+
|
76
|
+
return fig
|
@@ -0,0 +1,103 @@
|
|
1
|
+
# Copyright © 2023-2024 ValidMind Inc. All rights reserved.
|
2
|
+
# See the LICENSE file in the root of this repository for details.
|
3
|
+
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
|
+
|
5
|
+
|
6
|
+
import pandas as pd
|
7
|
+
import plotly.express as px
|
8
|
+
from sklearn import metrics
|
9
|
+
|
10
|
+
from validmind import tags, tasks
|
11
|
+
|
12
|
+
|
13
|
+
@tags("model_performance", "sklearn")
|
14
|
+
@tasks("regression", "time_series_forecasting")
|
15
|
+
def TimeSeriesR2SquareBySegments(datasets, models, segments=None):
|
16
|
+
"""
|
17
|
+
Plot R-Squared values for each model over specified time segments and generate a bar chart
|
18
|
+
with the results.
|
19
|
+
|
20
|
+
**Purpose**: The purpose of this function is to plot the R-Squared values for different models applied to various segments of the time series data.
|
21
|
+
|
22
|
+
**Parameters**:
|
23
|
+
- datasets: List of datasets to evaluate.
|
24
|
+
- models: List of models to evaluate.
|
25
|
+
- segments: Dictionary with 'start_date' and 'end_date' keys containing lists of start and end dates for each segments. If None, the time series will be segmented into two halves.
|
26
|
+
|
27
|
+
**Test Mechanism**: The function iterates through each dataset-model pair, calculates the R-Squared values for specified time segments, and generates a bar chart with these results.
|
28
|
+
|
29
|
+
**Signs of High Risk**:
|
30
|
+
- If the R-Squared values are significantly low for certain segments, it could indicate that the model is not explaining much of the variability in the dataset for those segments.
|
31
|
+
|
32
|
+
**Strengths**:
|
33
|
+
- Provides a visual representation of model performance across different time segments.
|
34
|
+
- Allows for identification of segments where models perform poorly.
|
35
|
+
|
36
|
+
**Limitations**:
|
37
|
+
- Assumes that the dataset is provided as a DataFrameDataset object with `y`, `y_pred`, and `feature_columns` attributes.
|
38
|
+
- Requires that `dataset.y_pred(model)` returns the predicted values for the model.
|
39
|
+
- Assumes that `y_true` and `y_pred` are pandas Series with datetime indices.
|
40
|
+
"""
|
41
|
+
results_list = []
|
42
|
+
|
43
|
+
for dataset, model in zip(datasets, models):
|
44
|
+
dataset_name = dataset.input_id
|
45
|
+
model_name = model.input_id
|
46
|
+
|
47
|
+
y_true = dataset.y
|
48
|
+
y_pred = dataset.y_pred(model)
|
49
|
+
|
50
|
+
# Ensure y_true and y_pred are pandas Series with the same index
|
51
|
+
if not isinstance(y_true, pd.Series):
|
52
|
+
y_true = pd.Series(y_true, index=dataset.df.index)
|
53
|
+
if not isinstance(y_pred, pd.Series):
|
54
|
+
y_pred = pd.Series(y_pred, index=dataset.df.index)
|
55
|
+
|
56
|
+
index = dataset.df.index
|
57
|
+
|
58
|
+
if segments is None:
|
59
|
+
mid_point = len(index) // 2
|
60
|
+
segments = {
|
61
|
+
"start_date": [index.min(), index[mid_point]],
|
62
|
+
"end_date": [index[mid_point - 1], index.max()],
|
63
|
+
}
|
64
|
+
|
65
|
+
for segment_index, (start_date, end_date) in enumerate(
|
66
|
+
zip(segments["start_date"], segments["end_date"])
|
67
|
+
):
|
68
|
+
mask = (index >= start_date) & (index <= end_date)
|
69
|
+
y_true_segment = y_true.loc[mask]
|
70
|
+
y_pred_segment = y_pred.loc[mask]
|
71
|
+
|
72
|
+
if len(y_true_segment) > 0 and len(y_pred_segment) > 0:
|
73
|
+
r2s = metrics.r2_score(y_true_segment, y_pred_segment)
|
74
|
+
results_list.append(
|
75
|
+
{
|
76
|
+
"Model": model_name,
|
77
|
+
"Dataset": dataset_name,
|
78
|
+
"Segments": f"Segment {segment_index + 1}",
|
79
|
+
"Start Date": start_date,
|
80
|
+
"End Date": end_date,
|
81
|
+
"R-Squared": r2s,
|
82
|
+
}
|
83
|
+
)
|
84
|
+
|
85
|
+
# Convert results list to a DataFrame
|
86
|
+
results_df = pd.DataFrame(results_list)
|
87
|
+
|
88
|
+
# Plotting
|
89
|
+
fig = px.bar(
|
90
|
+
results_df,
|
91
|
+
x="Segments",
|
92
|
+
y="R-Squared",
|
93
|
+
color="Model",
|
94
|
+
barmode="group",
|
95
|
+
title="R-Squared Comparison by Segment and Model",
|
96
|
+
labels={
|
97
|
+
"R-Squared": "R-Squared Value",
|
98
|
+
"Segment": "Time Segment",
|
99
|
+
"Model": "Model",
|
100
|
+
},
|
101
|
+
)
|
102
|
+
|
103
|
+
return fig, results_df
|
@@ -51,10 +51,8 @@ class ClusterDistribution(Metric):
|
|
51
51
|
default_params = {
|
52
52
|
"num_clusters": 5,
|
53
53
|
}
|
54
|
-
|
55
|
-
|
56
|
-
"tags": ["llm", "text_data", "text_embeddings", "visualization"],
|
57
|
-
}
|
54
|
+
tasks = ["feature_extraction"]
|
55
|
+
tags = ["llm", "text_data", "text_embeddings", "visualization"]
|
58
56
|
|
59
57
|
def run(self):
|
60
58
|
# run kmeans clustering on embeddings
|
@@ -50,10 +50,8 @@ class CosineSimilarityDistribution(Metric):
|
|
50
50
|
|
51
51
|
name = "Text Embeddings Cosine Similarity Distribution"
|
52
52
|
required_inputs = ["model", "dataset"]
|
53
|
-
|
54
|
-
|
55
|
-
"tags": ["llm", "text_data", "text_embeddings", "visualization"],
|
56
|
-
}
|
53
|
+
tasks = ["feature_extraction"]
|
54
|
+
tags = ["llm", "text_data", "text_embeddings", "visualization"]
|
57
55
|
|
58
56
|
def run(self):
|
59
57
|
# Compute cosine similarity
|
@@ -53,10 +53,8 @@ class DescriptiveAnalytics(Metric):
|
|
53
53
|
|
54
54
|
name = "Descriptive Analytics for Text Embeddings Models"
|
55
55
|
required_inputs = ["model", "dataset"]
|
56
|
-
|
57
|
-
|
58
|
-
"tags": ["llm", "text_data", "text_embeddings", "visualization"],
|
59
|
-
}
|
56
|
+
tasks = ["feature_extraction"]
|
57
|
+
tags = ["llm", "text_data", "text_embeddings", "visualization"]
|
60
58
|
|
61
59
|
def run(self):
|
62
60
|
# Assuming y_pred returns a 2D array of embeddings [samples, features]
|
@@ -53,10 +53,8 @@ class EmbeddingsVisualization2D(Metric):
|
|
53
53
|
"cluster_column": None,
|
54
54
|
"perplexity": 30,
|
55
55
|
}
|
56
|
-
|
57
|
-
|
58
|
-
"tags": ["llm", "text_data", "text_embeddings", "visualization"],
|
59
|
-
}
|
56
|
+
tasks = ["feature_extraction"]
|
57
|
+
tags = ["llm", "text_data", "text_embeddings", "visualization"]
|
60
58
|
|
61
59
|
def run(self):
|
62
60
|
cluster_column = self.params.get("cluster_column")
|
@@ -29,10 +29,8 @@ class StabilityAnalysis(ThresholdTest):
|
|
29
29
|
default_params = {
|
30
30
|
"mean_similarity_threshold": 0.7,
|
31
31
|
}
|
32
|
-
|
33
|
-
|
34
|
-
"tags": ["llm", "text_data", "text_embeddings", "visualization"],
|
35
|
-
}
|
32
|
+
tasks = ["feature_extraction"]
|
33
|
+
tags = ["llm", "text_data", "text_embeddings", "visualization"]
|
36
34
|
|
37
35
|
@abstractmethod
|
38
36
|
def perturb_data(self, data: str) -> str:
|
@@ -48,13 +48,11 @@ class AdjustedMutualInformation(ClusterPerformance):
|
|
48
48
|
|
49
49
|
name = "adjusted_mutual_information"
|
50
50
|
required_inputs = ["model", "datasets"]
|
51
|
-
|
52
|
-
|
53
|
-
"
|
54
|
-
|
55
|
-
|
56
|
-
],
|
57
|
-
}
|
51
|
+
tasks = ["clustering"]
|
52
|
+
tags = [
|
53
|
+
"sklearn",
|
54
|
+
"model_performance",
|
55
|
+
]
|
58
56
|
|
59
57
|
def metric_info(self):
|
60
58
|
return {"Adjusted Mutual Information": metrics.adjusted_mutual_info_score}
|
@@ -47,13 +47,11 @@ class AdjustedRandIndex(ClusterPerformance):
|
|
47
47
|
|
48
48
|
name = "adjusted_rand_index"
|
49
49
|
required_inputs = ["model", "datasets"]
|
50
|
-
|
51
|
-
|
52
|
-
"
|
53
|
-
|
54
|
-
|
55
|
-
],
|
56
|
-
}
|
50
|
+
tasks = ["clustering"]
|
51
|
+
tags = [
|
52
|
+
"sklearn",
|
53
|
+
"model_performance",
|
54
|
+
]
|
57
55
|
|
58
56
|
def metric_info(self):
|
59
57
|
return {"Adjusted Rand Index": metrics.adjusted_rand_score}
|