validmind 2.2.5__py3-none-any.whl → 2.3.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- validmind/__version__.py +1 -1
- validmind/{ai.py → ai/test_descriptions.py} +127 -69
- validmind/ai/utils.py +104 -0
- validmind/api_client.py +70 -31
- validmind/client.py +5 -5
- validmind/logging.py +38 -32
- validmind/models/foundation.py +10 -6
- validmind/models/function.py +3 -1
- validmind/models/metadata.py +1 -1
- validmind/test_suites/__init__.py +1 -7
- validmind/test_suites/regression.py +0 -16
- validmind/test_suites/statsmodels_timeseries.py +1 -1
- validmind/tests/data_validation/ACFandPACFPlot.py +36 -27
- validmind/tests/{model_validation/statsmodels → data_validation}/ADF.py +42 -13
- validmind/tests/data_validation/BivariateScatterPlots.py +38 -41
- validmind/tests/{model_validation/statsmodels → data_validation}/DFGLSArch.py +67 -11
- validmind/tests/data_validation/HeatmapFeatureCorrelations.py +1 -1
- validmind/tests/data_validation/HighPearsonCorrelation.py +12 -3
- validmind/tests/data_validation/IsolationForestOutliers.py +2 -2
- validmind/tests/{model_validation/statsmodels → data_validation}/KPSS.py +64 -11
- validmind/tests/{model_validation/statsmodels → data_validation}/PhillipsPerronArch.py +65 -11
- validmind/tests/data_validation/ScatterPlot.py +1 -1
- validmind/tests/data_validation/SeasonalDecompose.py +12 -7
- validmind/tests/data_validation/TabularDateTimeHistograms.py +29 -33
- validmind/tests/data_validation/WOEBinPlots.py +1 -1
- validmind/tests/data_validation/WOEBinTable.py +1 -1
- validmind/tests/{model_validation/statsmodels → data_validation}/ZivotAndrewsArch.py +65 -11
- validmind/tests/data_validation/nlp/CommonWords.py +1 -1
- validmind/tests/data_validation/nlp/Hashtags.py +1 -1
- validmind/tests/data_validation/nlp/Mentions.py +1 -1
- validmind/tests/data_validation/nlp/PolarityAndSubjectivity.py +2 -1
- validmind/tests/data_validation/nlp/Punctuations.py +1 -1
- validmind/tests/data_validation/nlp/Sentiment.py +1 -1
- validmind/tests/data_validation/nlp/TextDescription.py +5 -1
- validmind/tests/data_validation/nlp/Toxicity.py +1 -1
- validmind/tests/decorator.py +1 -1
- validmind/tests/model_validation/FeaturesAUC.py +5 -3
- validmind/tests/model_validation/embeddings/CosineSimilarityComparison.py +4 -0
- validmind/tests/model_validation/embeddings/CosineSimilarityHeatmap.py +4 -0
- validmind/tests/model_validation/embeddings/EuclideanDistanceComparison.py +4 -0
- validmind/tests/model_validation/embeddings/EuclideanDistanceHeatmap.py +4 -0
- validmind/tests/model_validation/embeddings/PCAComponentsPairwisePlots.py +4 -0
- validmind/tests/model_validation/embeddings/TSNEComponentsPairwisePlots.py +4 -0
- validmind/tests/model_validation/ragas/AnswerCorrectness.py +3 -3
- validmind/tests/model_validation/ragas/AnswerRelevance.py +5 -4
- validmind/tests/model_validation/ragas/AnswerSimilarity.py +5 -4
- validmind/tests/model_validation/ragas/AspectCritique.py +14 -8
- validmind/tests/model_validation/ragas/ContextEntityRecall.py +3 -4
- validmind/tests/model_validation/ragas/ContextPrecision.py +4 -5
- validmind/tests/model_validation/ragas/ContextRecall.py +3 -4
- validmind/tests/model_validation/ragas/ContextRelevancy.py +5 -4
- validmind/tests/model_validation/ragas/Faithfulness.py +6 -5
- validmind/tests/model_validation/ragas/utils.py +35 -9
- validmind/tests/model_validation/sklearn/ClusterPerformance.py +2 -2
- validmind/tests/model_validation/sklearn/ClusterPerformanceMetrics.py +1 -1
- validmind/tests/model_validation/sklearn/ModelsPerformanceComparison.py +6 -8
- validmind/tests/model_validation/sklearn/RegressionErrors.py +1 -1
- validmind/tests/model_validation/sklearn/RegressionModelsPerformanceComparison.py +14 -8
- validmind/tests/model_validation/sklearn/RegressionR2Square.py +1 -1
- validmind/tests/model_validation/statsmodels/DurbinWatsonTest.py +1 -1
- validmind/tests/model_validation/statsmodels/GINITable.py +1 -1
- validmind/tests/model_validation/statsmodels/JarqueBera.py +1 -1
- validmind/tests/model_validation/statsmodels/KolmogorovSmirnov.py +1 -1
- validmind/tests/model_validation/statsmodels/LJungBox.py +1 -1
- validmind/tests/model_validation/statsmodels/Lilliefors.py +1 -1
- validmind/tests/model_validation/statsmodels/RegressionCoeffsPlot.py +4 -0
- validmind/tests/model_validation/statsmodels/RegressionFeatureSignificance.py +9 -4
- validmind/tests/model_validation/statsmodels/RegressionModelsCoeffs.py +2 -2
- validmind/tests/model_validation/statsmodels/RunsTest.py +1 -1
- validmind/tests/model_validation/statsmodels/ShapiroWilk.py +1 -1
- validmind/tests/prompt_validation/Bias.py +14 -11
- validmind/tests/prompt_validation/Clarity.py +14 -11
- validmind/tests/prompt_validation/Conciseness.py +14 -11
- validmind/tests/prompt_validation/Delimitation.py +14 -11
- validmind/tests/prompt_validation/NegativeInstruction.py +14 -11
- validmind/tests/prompt_validation/Robustness.py +11 -11
- validmind/tests/prompt_validation/Specificity.py +14 -11
- validmind/tests/prompt_validation/ai_powered_test.py +53 -75
- validmind/unit_metrics/composite.py +2 -1
- validmind/utils.py +4 -49
- validmind/vm_models/dataset/dataset.py +17 -3
- validmind/vm_models/dataset/utils.py +2 -2
- validmind/vm_models/model.py +1 -1
- validmind/vm_models/test/metric.py +1 -8
- validmind/vm_models/test/result_wrapper.py +27 -34
- validmind/vm_models/test/test.py +3 -0
- validmind/vm_models/test/threshold_test.py +1 -1
- validmind/vm_models/test_suite/runner.py +12 -6
- validmind/vm_models/test_suite/summary.py +18 -7
- validmind/vm_models/test_suite/test.py +13 -20
- {validmind-2.2.5.dist-info → validmind-2.3.1.dist-info}/METADATA +1 -1
- {validmind-2.2.5.dist-info → validmind-2.3.1.dist-info}/RECORD +95 -104
- validmind/tests/data_validation/DefaultRatesbyRiskBandPlot.py +0 -114
- validmind/tests/data_validation/PiTCreditScoresHistogram.py +0 -150
- validmind/tests/data_validation/PiTPDHistogram.py +0 -152
- validmind/tests/model_validation/statsmodels/ADFTest.py +0 -88
- validmind/tests/model_validation/statsmodels/FeatureImportanceAndSignificance.py +0 -198
- validmind/tests/model_validation/statsmodels/PDRatingClassPlot.py +0 -151
- validmind/tests/model_validation/statsmodels/RegressionModelInsampleComparison.py +0 -146
- validmind/tests/model_validation/statsmodels/RegressionModelOutsampleComparison.py +0 -144
- validmind/tests/model_validation/statsmodels/RegressionModelsPerformance.py +0 -127
- validmind/tests/model_validation/statsmodels/ResidualsVisualInspection.py +0 -130
- {validmind-2.2.5.dist-info → validmind-2.3.1.dist-info}/LICENSE +0 -0
- {validmind-2.2.5.dist-info → validmind-2.3.1.dist-info}/WHEEL +0 -0
- {validmind-2.2.5.dist-info → validmind-2.3.1.dist-info}/entry_points.txt +0 -0
@@ -4,9 +4,13 @@
|
|
4
4
|
|
5
5
|
from dataclasses import dataclass
|
6
6
|
|
7
|
+
import pandas as pd
|
7
8
|
from statsmodels.tsa.stattools import kpss
|
8
9
|
|
9
|
-
from validmind.
|
10
|
+
from validmind.logging import get_logger
|
11
|
+
from validmind.vm_models import Metric, ResultSummary, ResultTable, ResultTableMetadata
|
12
|
+
|
13
|
+
logger = get_logger(__name__)
|
10
14
|
|
11
15
|
|
12
16
|
@dataclass
|
@@ -64,14 +68,63 @@ class KPSS(Metric):
|
|
64
68
|
"""
|
65
69
|
dataset = self.inputs.dataset.df
|
66
70
|
|
67
|
-
|
71
|
+
# Check if the dataset is a time series
|
72
|
+
if not isinstance(dataset.index, (pd.DatetimeIndex, pd.PeriodIndex)):
|
73
|
+
raise ValueError(
|
74
|
+
"Dataset index must be a datetime or period index for time series analysis."
|
75
|
+
)
|
76
|
+
|
77
|
+
# Preprocessing: Drop rows with any NaN values
|
78
|
+
if dataset.isnull().values.any():
|
79
|
+
logger.warning(
|
80
|
+
"Dataset contains missing values. Rows with NaNs will be dropped."
|
81
|
+
)
|
82
|
+
dataset = dataset.dropna()
|
83
|
+
|
84
|
+
# Convert to numeric and handle non-numeric data
|
85
|
+
dataset = dataset.apply(pd.to_numeric, errors="coerce")
|
86
|
+
|
87
|
+
# Initialize a list to store KPSS results
|
88
|
+
kpss_values = []
|
89
|
+
|
68
90
|
for col in dataset.columns:
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
91
|
+
try:
|
92
|
+
kpss_stat, pvalue, usedlag, critical_values = kpss(dataset[col].values)
|
93
|
+
kpss_values.append(
|
94
|
+
{
|
95
|
+
"Variable": col,
|
96
|
+
"stat": kpss_stat,
|
97
|
+
"pvalue": pvalue,
|
98
|
+
"usedlag": usedlag,
|
99
|
+
"critical_values": critical_values,
|
100
|
+
}
|
101
|
+
)
|
102
|
+
except Exception as e:
|
103
|
+
logger.error(f"Error processing column '{col}': {e}")
|
104
|
+
kpss_values.append(
|
105
|
+
{
|
106
|
+
"Variable": col,
|
107
|
+
"stat": None,
|
108
|
+
"pvalue": None,
|
109
|
+
"usedlag": None,
|
110
|
+
"critical_values": None,
|
111
|
+
"error": str(e),
|
112
|
+
}
|
113
|
+
)
|
114
|
+
|
115
|
+
return self.cache_results({"kpss_results": kpss_values})
|
116
|
+
|
117
|
+
def summary(self, metric_value):
|
118
|
+
"""
|
119
|
+
Build a table for summarizing the KPSS results
|
120
|
+
"""
|
121
|
+
kpss_results = metric_value["kpss_results"]
|
122
|
+
|
123
|
+
return ResultSummary(
|
124
|
+
results=[
|
125
|
+
ResultTable(
|
126
|
+
data=kpss_results,
|
127
|
+
metadata=ResultTableMetadata(title="KPSS Test Results"),
|
128
|
+
)
|
129
|
+
]
|
130
|
+
)
|
@@ -4,9 +4,14 @@
|
|
4
4
|
|
5
5
|
from dataclasses import dataclass
|
6
6
|
|
7
|
+
import pandas as pd
|
7
8
|
from arch.unitroot import PhillipsPerron
|
9
|
+
from numpy.linalg import LinAlgError
|
8
10
|
|
9
|
-
from validmind.
|
11
|
+
from validmind.logging import get_logger
|
12
|
+
from validmind.vm_models import Metric, ResultSummary, ResultTable, ResultTableMetadata
|
13
|
+
|
14
|
+
logger = get_logger(__name__)
|
10
15
|
|
11
16
|
|
12
17
|
@dataclass
|
@@ -62,14 +67,63 @@ class PhillipsPerronArch(Metric):
|
|
62
67
|
"""
|
63
68
|
dataset = self.inputs.dataset.df
|
64
69
|
|
65
|
-
|
70
|
+
# Check if the dataset is a time series
|
71
|
+
if not isinstance(dataset.index, (pd.DatetimeIndex, pd.PeriodIndex)):
|
72
|
+
raise ValueError(
|
73
|
+
"Dataset index must be a datetime or period index for time series analysis."
|
74
|
+
)
|
75
|
+
|
76
|
+
# Preprocessing: Drop rows with any NaN values
|
77
|
+
if dataset.isnull().values.any():
|
78
|
+
logger.warning(
|
79
|
+
"Dataset contains missing values. Rows with NaNs will be dropped."
|
80
|
+
)
|
81
|
+
dataset = dataset.dropna()
|
82
|
+
|
83
|
+
# Convert to numeric and handle non-numeric data
|
84
|
+
dataset = dataset.apply(pd.to_numeric, errors="coerce")
|
85
|
+
|
86
|
+
# Initialize a list to store Phillips-Perron results
|
87
|
+
pp_values = []
|
88
|
+
|
66
89
|
for col in dataset.columns:
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
90
|
+
try:
|
91
|
+
pp = PhillipsPerron(dataset[col].values)
|
92
|
+
pp_values.append(
|
93
|
+
{
|
94
|
+
"Variable": col,
|
95
|
+
"stat": pp.stat,
|
96
|
+
"pvalue": pp.pvalue,
|
97
|
+
"usedlag": pp.lags,
|
98
|
+
"nobs": pp.nobs,
|
99
|
+
}
|
100
|
+
)
|
101
|
+
except LinAlgError as e:
|
102
|
+
logger.error(f"Error processing column '{col}': {e}")
|
103
|
+
pp_values.append(
|
104
|
+
{
|
105
|
+
"Variable": col,
|
106
|
+
"stat": None,
|
107
|
+
"pvalue": None,
|
108
|
+
"usedlag": None,
|
109
|
+
"nobs": None,
|
110
|
+
"error": str(e),
|
111
|
+
}
|
112
|
+
)
|
113
|
+
|
114
|
+
return self.cache_results({"phillips_perron_results": pp_values})
|
115
|
+
|
116
|
+
def summary(self, metric_value):
|
117
|
+
"""
|
118
|
+
Build a table for summarizing the Phillips-Perron results
|
119
|
+
"""
|
120
|
+
pp_results = metric_value["phillips_perron_results"]
|
121
|
+
|
122
|
+
return ResultSummary(
|
123
|
+
results=[
|
124
|
+
ResultTable(
|
125
|
+
data=pp_results,
|
126
|
+
metadata=ResultTableMetadata(title="Phillips-Perron Test Results"),
|
127
|
+
)
|
128
|
+
]
|
129
|
+
)
|
@@ -51,7 +51,7 @@ class ScatterPlot(Metric):
|
|
51
51
|
"""
|
52
52
|
|
53
53
|
name = "scatter_plot"
|
54
|
-
required_inputs = ["dataset"
|
54
|
+
required_inputs = ["dataset"]
|
55
55
|
metadata = {
|
56
56
|
"task_types": ["classification", "regression"],
|
57
57
|
"tags": ["tabular_data", "visualization"],
|
@@ -90,14 +90,18 @@ class SeasonalDecompose(Metric):
|
|
90
90
|
dfs = [
|
91
91
|
pd.DataFrame(series)
|
92
92
|
.pipe(
|
93
|
-
lambda x:
|
94
|
-
|
95
|
-
|
93
|
+
lambda x: (
|
94
|
+
x.reset_index()
|
95
|
+
if not isinstance(x.index, pd.DatetimeIndex)
|
96
|
+
else x.reset_index().rename(columns={x.index.name: "Date"})
|
97
|
+
)
|
96
98
|
)
|
97
99
|
.assign(
|
98
|
-
Date=lambda x:
|
99
|
-
|
100
|
-
|
100
|
+
Date=lambda x: (
|
101
|
+
x["Date"].astype(str)
|
102
|
+
if "Date" in x.columns
|
103
|
+
else x.index.astype(str)
|
104
|
+
)
|
101
105
|
)
|
102
106
|
for series in results.values()
|
103
107
|
]
|
@@ -200,7 +204,8 @@ class SeasonalDecompose(Metric):
|
|
200
204
|
)
|
201
205
|
else:
|
202
206
|
warnings.warn(
|
203
|
-
f"No frequency could be inferred for variable '{col}'.
|
207
|
+
f"No frequency could be inferred for variable '{col}'. "
|
208
|
+
"Skipping seasonal decomposition and plots for this variable."
|
204
209
|
)
|
205
210
|
|
206
211
|
return self.cache_results(results, figures=figures)
|
@@ -2,8 +2,8 @@
|
|
2
2
|
# See the LICENSE file in the root of this repository for details.
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
|
-
import
|
6
|
-
import
|
5
|
+
import pandas as pd
|
6
|
+
import plotly.graph_objects as go
|
7
7
|
|
8
8
|
from validmind.vm_models import Figure, Metric
|
9
9
|
|
@@ -50,45 +50,41 @@ class TabularDateTimeHistograms(Metric):
|
|
50
50
|
|
51
51
|
metadata = {
|
52
52
|
"task_types": ["classification", "regression"],
|
53
|
-
"tags": ["
|
53
|
+
"tags": ["time_series_data", "visualization"],
|
54
54
|
}
|
55
55
|
|
56
56
|
def run(self):
|
57
57
|
df = self.inputs.dataset.df
|
58
58
|
|
59
|
-
#
|
60
|
-
|
61
|
-
|
62
|
-
if len(datetime_columns) == 0:
|
63
|
-
raise ValueError("No datetime columns found in the dataset")
|
59
|
+
# Check if the index is a datetime type
|
60
|
+
if not isinstance(df.index, (pd.DatetimeIndex, pd.PeriodIndex)):
|
61
|
+
raise ValueError("Index must be a datetime type")
|
64
62
|
|
65
63
|
figures = []
|
66
|
-
for col in datetime_columns:
|
67
|
-
plt.figure()
|
68
|
-
fig, _ = plt.subplots()
|
69
|
-
|
70
|
-
# Calculate the difference between consecutive dates and convert to days
|
71
|
-
date_diffs = df[col].sort_values().diff().dt.days.dropna()
|
72
|
-
|
73
|
-
# Filter out 0 values
|
74
|
-
date_diffs = date_diffs[date_diffs != 0]
|
75
|
-
|
76
|
-
ax = sns.histplot(date_diffs, kde=False, bins=30)
|
77
|
-
plt.title(f"{col}", weight="bold", fontsize=20)
|
78
|
-
|
79
|
-
plt.xticks(fontsize=18)
|
80
|
-
plt.yticks(fontsize=18)
|
81
|
-
ax.set_xlabel("Days Between Consecutive Dates", fontsize=18)
|
82
|
-
ax.set_ylabel("Frequency", fontsize=18)
|
83
|
-
figures.append(
|
84
|
-
Figure(
|
85
|
-
for_object=self,
|
86
|
-
key=f"{self.key}:{col}",
|
87
|
-
figure=fig,
|
88
|
-
)
|
89
|
-
)
|
90
64
|
|
91
|
-
|
65
|
+
# Calculate the difference between consecutive dates in the index
|
66
|
+
date_diffs = df.index.to_series().sort_values().diff().dt.days.dropna()
|
67
|
+
|
68
|
+
# Filter out 0 values
|
69
|
+
date_diffs = date_diffs[date_diffs != 0]
|
70
|
+
|
71
|
+
# Create a histogram using Plotly
|
72
|
+
fig = go.Figure()
|
73
|
+
fig.add_trace(go.Histogram(x=date_diffs, nbinsx=30))
|
74
|
+
fig.update_layout(
|
75
|
+
title="Index",
|
76
|
+
xaxis_title="Days Between Consecutive Dates",
|
77
|
+
yaxis_title="Frequency",
|
78
|
+
font=dict(size=18),
|
79
|
+
)
|
80
|
+
|
81
|
+
figures.append(
|
82
|
+
Figure(
|
83
|
+
for_object=self,
|
84
|
+
key=f"{self.key}:index",
|
85
|
+
figure=fig,
|
86
|
+
)
|
87
|
+
)
|
92
88
|
|
93
89
|
return self.cache_results(
|
94
90
|
figures=figures,
|
@@ -58,7 +58,7 @@ class WOEBinPlots(Metric):
|
|
58
58
|
"""
|
59
59
|
|
60
60
|
name = "woe_bin_plots"
|
61
|
-
|
61
|
+
required_inputs = ["dataset"]
|
62
62
|
default_params = {"breaks_adj": None, "fig_height": 600, "fig_width": 500}
|
63
63
|
metadata = {
|
64
64
|
"task_types": ["classification"],
|
@@ -4,9 +4,14 @@
|
|
4
4
|
|
5
5
|
from dataclasses import dataclass
|
6
6
|
|
7
|
+
import pandas as pd
|
7
8
|
from arch.unitroot import ZivotAndrews
|
9
|
+
from numpy.linalg import LinAlgError
|
8
10
|
|
9
|
-
from validmind.
|
11
|
+
from validmind.logging import get_logger
|
12
|
+
from validmind.vm_models import Metric, ResultSummary, ResultTable, ResultTableMetadata
|
13
|
+
|
14
|
+
logger = get_logger(__name__)
|
10
15
|
|
11
16
|
|
12
17
|
@dataclass
|
@@ -57,14 +62,63 @@ class ZivotAndrewsArch(Metric):
|
|
57
62
|
"""
|
58
63
|
dataset = self.inputs.dataset.df
|
59
64
|
|
60
|
-
|
65
|
+
# Check if the dataset is a time series
|
66
|
+
if not isinstance(dataset.index, (pd.DatetimeIndex, pd.PeriodIndex)):
|
67
|
+
raise ValueError(
|
68
|
+
"Dataset index must be a datetime or period index for time series analysis."
|
69
|
+
)
|
70
|
+
|
71
|
+
# Preprocessing: Drop rows with any NaN values
|
72
|
+
if dataset.isnull().values.any():
|
73
|
+
logger.warning(
|
74
|
+
"Dataset contains missing values. Rows with NaNs will be dropped."
|
75
|
+
)
|
76
|
+
dataset = dataset.dropna()
|
77
|
+
|
78
|
+
# Convert to numeric and handle non-numeric data
|
79
|
+
dataset = dataset.apply(pd.to_numeric, errors="coerce")
|
80
|
+
|
81
|
+
# Initialize a list to store Zivot-Andrews results
|
82
|
+
za_values = []
|
83
|
+
|
61
84
|
for col in dataset.columns:
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
85
|
+
try:
|
86
|
+
za = ZivotAndrews(dataset[col].values)
|
87
|
+
za_values.append(
|
88
|
+
{
|
89
|
+
"Variable": col,
|
90
|
+
"stat": za.stat,
|
91
|
+
"pvalue": za.pvalue,
|
92
|
+
"usedlag": za.lags,
|
93
|
+
"nobs": za.nobs,
|
94
|
+
}
|
95
|
+
)
|
96
|
+
except (LinAlgError, ValueError) as e:
|
97
|
+
logger.error(f"Error while processing column '{col}'. Details: {e}")
|
98
|
+
za_values.append(
|
99
|
+
{
|
100
|
+
"Variable": col,
|
101
|
+
"stat": None,
|
102
|
+
"pvalue": None,
|
103
|
+
"usedlag": None,
|
104
|
+
"nobs": None,
|
105
|
+
"error": str(e),
|
106
|
+
}
|
107
|
+
)
|
108
|
+
|
109
|
+
return self.cache_results({"zivot_andrews_results": za_values})
|
110
|
+
|
111
|
+
def summary(self, metric_value):
|
112
|
+
"""
|
113
|
+
Build a table for summarizing the Zivot-Andrews results
|
114
|
+
"""
|
115
|
+
za_results = metric_value["zivot_andrews_results"]
|
116
|
+
|
117
|
+
return ResultSummary(
|
118
|
+
results=[
|
119
|
+
ResultTable(
|
120
|
+
data=za_results,
|
121
|
+
metadata=ResultTableMetadata(title="Zivot-Andrews Test Results"),
|
122
|
+
)
|
123
|
+
]
|
124
|
+
)
|
@@ -52,7 +52,7 @@ class CommonWords(Metric):
|
|
52
52
|
"""
|
53
53
|
|
54
54
|
name = "common_words"
|
55
|
-
required_inputs = ["dataset"
|
55
|
+
required_inputs = ["dataset"]
|
56
56
|
metadata = {
|
57
57
|
"task_types": ["text_classification", "text_summarization"],
|
58
58
|
"tags": ["nlp", "text_data", "visualization", "frequency_analysis"],
|
@@ -54,7 +54,7 @@ class Hashtags(ThresholdTest):
|
|
54
54
|
"""
|
55
55
|
|
56
56
|
name = "hashtags"
|
57
|
-
required_inputs = ["dataset"
|
57
|
+
required_inputs = ["dataset"]
|
58
58
|
default_params = {"top_hashtags": 25}
|
59
59
|
metadata = {
|
60
60
|
"task_types": ["text_classification", "text_summarization"],
|
@@ -54,7 +54,7 @@ class Mentions(ThresholdTest):
|
|
54
54
|
|
55
55
|
name = "mentions"
|
56
56
|
|
57
|
-
required_inputs = ["dataset"
|
57
|
+
required_inputs = ["dataset"]
|
58
58
|
default_params = {"top_mentions": 25}
|
59
59
|
metadata = {
|
60
60
|
"task_types": ["text_classification", "text_summarization"],
|
@@ -10,7 +10,7 @@ from textblob import TextBlob
|
|
10
10
|
from validmind import tags, tasks
|
11
11
|
|
12
12
|
|
13
|
-
@tags("data_validation")
|
13
|
+
@tags("nlp", "text_data", "data_validation")
|
14
14
|
@tasks("nlp")
|
15
15
|
def PolarityAndSubjectivity(dataset):
|
16
16
|
"""
|
@@ -27,6 +27,7 @@ def PolarityAndSubjectivity(dataset):
|
|
27
27
|
Returns:
|
28
28
|
plotly.graph_objs._figure.Figure: A Plotly scatter plot of polarity vs subjectivity.
|
29
29
|
"""
|
30
|
+
|
30
31
|
# Function to calculate sentiment and subjectivity
|
31
32
|
def analyze_sentiment(text):
|
32
33
|
analysis = TextBlob(text)
|
@@ -51,7 +51,7 @@ class Punctuations(Metric):
|
|
51
51
|
"""
|
52
52
|
|
53
53
|
name = "punctuations"
|
54
|
-
required_inputs = ["dataset"
|
54
|
+
required_inputs = ["dataset"]
|
55
55
|
metadata = {
|
56
56
|
"task_types": ["text_classification", "text_summarization"],
|
57
57
|
"tags": ["nlp", "text_data", "visualization", "frequency_analysis"],
|
@@ -60,7 +60,7 @@ class TextDescription(Metric):
|
|
60
60
|
"""
|
61
61
|
|
62
62
|
name = "text_description"
|
63
|
-
required_inputs = ["dataset"
|
63
|
+
required_inputs = ["dataset"]
|
64
64
|
default_params = {
|
65
65
|
"unwanted_tokens": {
|
66
66
|
"s",
|
@@ -79,6 +79,10 @@ class TextDescription(Metric):
|
|
79
79
|
"num_top_words": 3,
|
80
80
|
"lang": "english",
|
81
81
|
}
|
82
|
+
metadata = {
|
83
|
+
"task_types": ["text_classification", "text_summarization"],
|
84
|
+
"tags": ["nlp", "text_data", "visualization"],
|
85
|
+
}
|
82
86
|
|
83
87
|
def general_text_metrics(self, df, text_column):
|
84
88
|
nltk.download("punkt", quiet=True)
|
validmind/tests/decorator.py
CHANGED
@@ -13,9 +13,9 @@ from uuid import uuid4
|
|
13
13
|
|
14
14
|
import pandas as pd
|
15
15
|
|
16
|
+
from validmind.ai.test_descriptions import get_description_metadata
|
16
17
|
from validmind.errors import MissingRequiredTestInputError
|
17
18
|
from validmind.logging import get_logger
|
18
|
-
from validmind.utils import get_description_metadata
|
19
19
|
from validmind.vm_models import (
|
20
20
|
Metric,
|
21
21
|
MetricResult,
|
@@ -55,10 +55,12 @@ class FeaturesAUC(Metric):
|
|
55
55
|
}
|
56
56
|
|
57
57
|
def run(self):
|
58
|
-
|
59
|
-
|
58
|
+
dataset = self.inputs.dataset
|
59
|
+
x = dataset.x_df()
|
60
|
+
y = dataset.y_df()
|
61
|
+
n_targets = dataset.df[dataset.target_column].nunique()
|
60
62
|
|
61
|
-
if
|
63
|
+
if n_targets != 2:
|
62
64
|
raise SkipTestError("FeaturesAUC metric requires a binary target variable.")
|
63
65
|
|
64
66
|
aucs = pd.DataFrame(index=x.columns, columns=["AUC"])
|
@@ -9,7 +9,11 @@ import pandas as pd
|
|
9
9
|
import plotly.express as px
|
10
10
|
from sklearn.metrics.pairwise import cosine_similarity
|
11
11
|
|
12
|
+
from validmind import tags, tasks
|
12
13
|
|
14
|
+
|
15
|
+
@tags("visualization", "dimensionality_reduction", "embeddings")
|
16
|
+
@tasks("text_qa", "text_generation", "text_summarization")
|
13
17
|
def CosineSimilarityComparison(dataset, models):
|
14
18
|
"""
|
15
19
|
Computes pairwise cosine similarities between model embeddings and visualizes the results through bar charts,
|
@@ -6,7 +6,11 @@ import numpy as np
|
|
6
6
|
import plotly.express as px
|
7
7
|
from sklearn.metrics.pairwise import cosine_similarity
|
8
8
|
|
9
|
+
from validmind import tags, tasks
|
9
10
|
|
11
|
+
|
12
|
+
@tags("visualization", "dimensionality_reduction", "embeddings")
|
13
|
+
@tasks("text_qa", "text_generation", "text_summarization")
|
10
14
|
def CosineSimilarityHeatmap(
|
11
15
|
dataset,
|
12
16
|
model,
|
@@ -9,7 +9,11 @@ import pandas as pd
|
|
9
9
|
import plotly.express as px
|
10
10
|
from sklearn.metrics.pairwise import euclidean_distances
|
11
11
|
|
12
|
+
from validmind import tags, tasks
|
12
13
|
|
14
|
+
|
15
|
+
@tags("visualization", "dimensionality_reduction", "embeddings")
|
16
|
+
@tasks("text_qa", "text_generation", "text_summarization")
|
13
17
|
def EuclideanDistanceComparison(dataset, models):
|
14
18
|
"""
|
15
19
|
Computes pairwise Euclidean distances between model embeddings and visualizes the results through bar charts,
|
@@ -6,7 +6,11 @@ import numpy as np
|
|
6
6
|
import plotly.express as px
|
7
7
|
from sklearn.metrics.pairwise import euclidean_distances
|
8
8
|
|
9
|
+
from validmind import tags, tasks
|
9
10
|
|
11
|
+
|
12
|
+
@tags("visualization", "dimensionality_reduction", "embeddings")
|
13
|
+
@tasks("text_qa", "text_generation", "text_summarization")
|
10
14
|
def EuclideanDistanceHeatmap(
|
11
15
|
dataset,
|
12
16
|
model,
|
@@ -10,7 +10,11 @@ import plotly.express as px
|
|
10
10
|
from sklearn.decomposition import PCA
|
11
11
|
from sklearn.preprocessing import StandardScaler
|
12
12
|
|
13
|
+
from validmind import tags, tasks
|
13
14
|
|
15
|
+
|
16
|
+
@tags("visualization", "dimensionality_reduction", "embeddings")
|
17
|
+
@tasks("text_qa", "text_generation", "text_summarization")
|
14
18
|
def PCAComponentsPairwisePlots(dataset, model, n_components=3):
|
15
19
|
"""
|
16
20
|
Generates scatter plots for pairwise combinations of principal component analysis (PCA) components of model embeddings.
|
@@ -10,7 +10,11 @@ import plotly.express as px
|
|
10
10
|
from sklearn.manifold import TSNE
|
11
11
|
from sklearn.preprocessing import StandardScaler
|
12
12
|
|
13
|
+
from validmind import tags, tasks
|
13
14
|
|
15
|
+
|
16
|
+
@tags("visualization", "dimensionality_reduction", "embeddings")
|
17
|
+
@tasks("text_qa", "text_generation", "text_summarization")
|
14
18
|
def TSNEComponentsPairwisePlots(
|
15
19
|
dataset,
|
16
20
|
model,
|
@@ -11,7 +11,7 @@ from ragas.metrics import answer_correctness
|
|
11
11
|
|
12
12
|
from validmind import tags, tasks
|
13
13
|
|
14
|
-
from .utils import get_renamed_columns
|
14
|
+
from .utils import get_ragas_config, get_renamed_columns
|
15
15
|
|
16
16
|
|
17
17
|
@tags("ragas", "llm")
|
@@ -104,7 +104,7 @@ def AnswerCorrectness(
|
|
104
104
|
df = get_renamed_columns(dataset.df, required_columns)
|
105
105
|
|
106
106
|
result_df = evaluate(
|
107
|
-
Dataset.from_pandas(df), metrics=[answer_correctness]
|
107
|
+
Dataset.from_pandas(df), metrics=[answer_correctness], **get_ragas_config()
|
108
108
|
).to_pandas()
|
109
109
|
|
110
110
|
fig_histogram = px.histogram(x=result_df["answer_correctness"].to_list(), nbins=10)
|
@@ -112,7 +112,7 @@ def AnswerCorrectness(
|
|
112
112
|
|
113
113
|
return (
|
114
114
|
{
|
115
|
-
"Scores": result_df[
|
115
|
+
"Scores (will not be uploaded to UI)": result_df[
|
116
116
|
["question", "answer", "ground_truth", "answer_correctness"]
|
117
117
|
],
|
118
118
|
"Aggregate Scores": [
|