validmind 2.8.10__py3-none-any.whl → 2.8.12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- validmind/__version__.py +1 -1
- validmind/ai/test_descriptions.py +4 -2
- validmind/tests/data_validation/ACFandPACFPlot.py +4 -1
- validmind/tests/data_validation/AutoMA.py +1 -1
- validmind/tests/data_validation/BivariateScatterPlots.py +5 -1
- validmind/tests/data_validation/BoxPierce.py +3 -1
- validmind/tests/data_validation/ClassImbalance.py +1 -1
- validmind/tests/data_validation/DatasetDescription.py +1 -1
- validmind/tests/data_validation/DickeyFullerGLS.py +1 -1
- validmind/tests/data_validation/FeatureTargetCorrelationPlot.py +1 -1
- validmind/tests/data_validation/HighCardinality.py +5 -1
- validmind/tests/data_validation/HighPearsonCorrelation.py +1 -1
- validmind/tests/data_validation/IQROutliersBarPlot.py +5 -3
- validmind/tests/data_validation/IQROutliersTable.py +5 -2
- validmind/tests/data_validation/IsolationForestOutliers.py +5 -4
- validmind/tests/data_validation/JarqueBera.py +2 -2
- validmind/tests/data_validation/LJungBox.py +2 -2
- validmind/tests/data_validation/LaggedCorrelationHeatmap.py +1 -1
- validmind/tests/data_validation/MissingValues.py +14 -10
- validmind/tests/data_validation/MissingValuesBarPlot.py +3 -1
- validmind/tests/data_validation/MutualInformation.py +2 -1
- validmind/tests/data_validation/PearsonCorrelationMatrix.py +1 -1
- validmind/tests/data_validation/ProtectedClassesCombination.py +2 -0
- validmind/tests/data_validation/ProtectedClassesDescription.py +2 -2
- validmind/tests/data_validation/ProtectedClassesDisparity.py +9 -5
- validmind/tests/data_validation/ProtectedClassesThresholdOptimizer.py +10 -2
- validmind/tests/data_validation/RollingStatsPlot.py +2 -1
- validmind/tests/data_validation/ScoreBandDefaultRates.py +4 -2
- validmind/tests/data_validation/SeasonalDecompose.py +1 -1
- validmind/tests/data_validation/ShapiroWilk.py +2 -2
- validmind/tests/data_validation/SpreadPlot.py +1 -1
- validmind/tests/data_validation/TabularCategoricalBarPlots.py +1 -1
- validmind/tests/data_validation/TabularDateTimeHistograms.py +1 -1
- validmind/tests/data_validation/TargetRateBarPlots.py +4 -1
- validmind/tests/data_validation/TimeSeriesFrequency.py +1 -1
- validmind/tests/data_validation/TimeSeriesOutliers.py +7 -2
- validmind/tests/data_validation/WOEBinPlots.py +1 -1
- validmind/tests/data_validation/WOEBinTable.py +1 -1
- validmind/tests/data_validation/ZivotAndrewsArch.py +5 -2
- validmind/tests/data_validation/nlp/CommonWords.py +1 -1
- validmind/tests/data_validation/nlp/Hashtags.py +1 -1
- validmind/tests/data_validation/nlp/LanguageDetection.py +1 -1
- validmind/tests/data_validation/nlp/Mentions.py +1 -1
- validmind/tests/data_validation/nlp/PolarityAndSubjectivity.py +5 -1
- validmind/tests/data_validation/nlp/Punctuations.py +1 -1
- validmind/tests/data_validation/nlp/Sentiment.py +3 -1
- validmind/tests/data_validation/nlp/TextDescription.py +1 -1
- validmind/tests/data_validation/nlp/Toxicity.py +1 -1
- validmind/tests/model_validation/BertScore.py +7 -1
- validmind/tests/model_validation/BleuScore.py +7 -1
- validmind/tests/model_validation/ClusterSizeDistribution.py +3 -1
- validmind/tests/model_validation/ContextualRecall.py +9 -1
- validmind/tests/model_validation/FeaturesAUC.py +1 -1
- validmind/tests/model_validation/MeteorScore.py +7 -1
- validmind/tests/model_validation/ModelPredictionResiduals.py +5 -1
- validmind/tests/model_validation/RegardScore.py +6 -1
- validmind/tests/model_validation/RegressionResidualsPlot.py +10 -1
- validmind/tests/model_validation/RougeScore.py +3 -1
- validmind/tests/model_validation/TimeSeriesPredictionWithCI.py +2 -0
- validmind/tests/model_validation/TimeSeriesPredictionsPlot.py +10 -2
- validmind/tests/model_validation/TimeSeriesR2SquareBySegments.py +6 -2
- validmind/tests/model_validation/TokenDisparity.py +5 -1
- validmind/tests/model_validation/ToxicityScore.py +2 -0
- validmind/tests/model_validation/embeddings/ClusterDistribution.py +1 -1
- validmind/tests/model_validation/embeddings/CosineSimilarityComparison.py +5 -1
- validmind/tests/model_validation/embeddings/CosineSimilarityDistribution.py +5 -1
- validmind/tests/model_validation/embeddings/CosineSimilarityHeatmap.py +5 -1
- validmind/tests/model_validation/embeddings/DescriptiveAnalytics.py +2 -0
- validmind/tests/model_validation/embeddings/EmbeddingsVisualization2D.py +5 -1
- validmind/tests/model_validation/embeddings/EuclideanDistanceComparison.py +6 -2
- validmind/tests/model_validation/embeddings/EuclideanDistanceHeatmap.py +3 -1
- validmind/tests/model_validation/embeddings/PCAComponentsPairwisePlots.py +4 -1
- validmind/tests/model_validation/embeddings/StabilityAnalysisKeyword.py +5 -1
- validmind/tests/model_validation/embeddings/StabilityAnalysisRandomNoise.py +5 -1
- validmind/tests/model_validation/embeddings/StabilityAnalysisSynonyms.py +5 -1
- validmind/tests/model_validation/embeddings/StabilityAnalysisTranslation.py +5 -1
- validmind/tests/model_validation/embeddings/TSNEComponentsPairwisePlots.py +6 -1
- validmind/tests/model_validation/ragas/AnswerCorrectness.py +1 -1
- validmind/tests/model_validation/ragas/AspectCritic.py +4 -1
- validmind/tests/model_validation/ragas/ContextEntityRecall.py +1 -1
- validmind/tests/model_validation/ragas/ContextPrecision.py +1 -1
- validmind/tests/model_validation/ragas/ContextPrecisionWithoutReference.py +1 -1
- validmind/tests/model_validation/ragas/ContextRecall.py +1 -1
- validmind/tests/model_validation/ragas/Faithfulness.py +1 -1
- validmind/tests/model_validation/ragas/NoiseSensitivity.py +1 -1
- validmind/tests/model_validation/ragas/ResponseRelevancy.py +1 -1
- validmind/tests/model_validation/ragas/SemanticSimilarity.py +1 -1
- validmind/tests/model_validation/sklearn/AdjustedMutualInformation.py +9 -9
- validmind/tests/model_validation/sklearn/AdjustedRandIndex.py +9 -9
- validmind/tests/model_validation/sklearn/CalibrationCurve.py +5 -2
- validmind/tests/model_validation/sklearn/ClassifierThresholdOptimization.py +15 -2
- validmind/tests/model_validation/sklearn/ClusterCosineSimilarity.py +5 -1
- validmind/tests/model_validation/sklearn/ClusterPerformanceMetrics.py +24 -14
- validmind/tests/model_validation/sklearn/CompletenessScore.py +8 -9
- validmind/tests/model_validation/sklearn/ConfusionMatrix.py +22 -3
- validmind/tests/model_validation/sklearn/FeatureImportance.py +6 -2
- validmind/tests/model_validation/sklearn/FowlkesMallowsScore.py +12 -9
- validmind/tests/model_validation/sklearn/HomogeneityScore.py +14 -9
- validmind/tests/model_validation/sklearn/HyperParametersTuning.py +4 -2
- validmind/tests/model_validation/sklearn/KMeansClustersOptimization.py +6 -1
- validmind/tests/model_validation/sklearn/MinimumAccuracy.py +12 -7
- validmind/tests/model_validation/sklearn/MinimumF1Score.py +12 -7
- validmind/tests/model_validation/sklearn/MinimumROCAUCScore.py +21 -6
- validmind/tests/model_validation/sklearn/OverfitDiagnosis.py +8 -2
- validmind/tests/model_validation/sklearn/PermutationFeatureImportance.py +5 -1
- validmind/tests/model_validation/sklearn/PopulationStabilityIndex.py +5 -1
- validmind/tests/model_validation/sklearn/PrecisionRecallCurve.py +6 -1
- validmind/tests/model_validation/sklearn/ROCCurve.py +3 -1
- validmind/tests/model_validation/sklearn/RegressionErrors.py +6 -2
- validmind/tests/model_validation/sklearn/RegressionPerformance.py +13 -8
- validmind/tests/model_validation/sklearn/RegressionR2Square.py +8 -5
- validmind/tests/model_validation/sklearn/RobustnessDiagnosis.py +5 -1
- validmind/tests/model_validation/sklearn/SHAPGlobalImportance.py +6 -1
- validmind/tests/model_validation/sklearn/ScoreProbabilityAlignment.py +10 -2
- validmind/tests/model_validation/sklearn/SilhouettePlot.py +5 -1
- validmind/tests/model_validation/sklearn/VMeasure.py +12 -9
- validmind/tests/model_validation/statsmodels/CumulativePredictionProbabilities.py +5 -1
- validmind/tests/model_validation/statsmodels/DurbinWatsonTest.py +6 -1
- validmind/tests/model_validation/statsmodels/GINITable.py +8 -1
- validmind/tests/model_validation/statsmodels/KolmogorovSmirnov.py +2 -2
- validmind/tests/model_validation/statsmodels/PredictionProbabilitiesHistogram.py +6 -2
- validmind/tests/model_validation/statsmodels/RegressionCoeffs.py +8 -2
- validmind/tests/model_validation/statsmodels/RegressionFeatureSignificance.py +3 -1
- validmind/tests/model_validation/statsmodels/RegressionModelForecastPlot.py +7 -2
- validmind/tests/model_validation/statsmodels/RegressionModelForecastPlotLevels.py +2 -0
- validmind/tests/model_validation/statsmodels/RegressionModelSensitivityPlot.py +2 -0
- validmind/tests/model_validation/statsmodels/RegressionModelSummary.py +4 -2
- validmind/tests/model_validation/statsmodels/RegressionPermutationFeatureImportance.py +3 -1
- validmind/tests/ongoing_monitoring/CalibrationCurveDrift.py +11 -1
- validmind/tests/ongoing_monitoring/ClassificationAccuracyDrift.py +10 -2
- validmind/tests/ongoing_monitoring/ConfusionMatrixDrift.py +8 -1
- validmind/tests/ongoing_monitoring/CumulativePredictionProbabilitiesDrift.py +18 -2
- validmind/tests/ongoing_monitoring/FeatureDrift.py +9 -2
- validmind/tests/ongoing_monitoring/PredictionAcrossEachFeature.py +8 -2
- validmind/tests/ongoing_monitoring/PredictionCorrelation.py +13 -2
- validmind/tests/ongoing_monitoring/PredictionProbabilitiesHistogramDrift.py +13 -2
- validmind/tests/ongoing_monitoring/ROCCurveDrift.py +16 -2
- validmind/tests/ongoing_monitoring/ScoreBandsDrift.py +11 -2
- validmind/tests/ongoing_monitoring/TargetPredictionDistributionPlot.py +13 -2
- validmind/tests/prompt_validation/Clarity.py +1 -1
- validmind/tests/prompt_validation/NegativeInstruction.py +1 -1
- validmind/tests/prompt_validation/Robustness.py +6 -1
- validmind/tests/prompt_validation/Specificity.py +1 -1
- validmind/vm_models/result/utils.py +4 -23
- {validmind-2.8.10.dist-info → validmind-2.8.12.dist-info}/METADATA +2 -2
- {validmind-2.8.10.dist-info → validmind-2.8.12.dist-info}/RECORD +149 -149
- {validmind-2.8.10.dist-info → validmind-2.8.12.dist-info}/LICENSE +0 -0
- {validmind-2.8.10.dist-info → validmind-2.8.12.dist-info}/WHEEL +0 -0
- {validmind-2.8.10.dist-info → validmind-2.8.12.dist-info}/entry_points.txt +0 -0
validmind/__version__.py
CHANGED
@@ -1 +1 @@
|
|
1
|
-
__version__ = "2.8.
|
1
|
+
__version__ = "2.8.12"
|
@@ -37,8 +37,10 @@ def _get_llm_global_context():
|
|
37
37
|
return context if context_enabled and context else None
|
38
38
|
|
39
39
|
|
40
|
-
def _truncate_summary(
|
41
|
-
|
40
|
+
def _truncate_summary(
|
41
|
+
summary: Union[str, None], test_id: str, max_tokens: int = 100_000
|
42
|
+
):
|
43
|
+
if summary is None or len(summary) < max_tokens:
|
42
44
|
# since string itself is less than max_tokens, definitely small enough
|
43
45
|
return summary
|
44
46
|
|
@@ -94,4 +94,7 @@ def ACFandPACFPlot(dataset: VMDataset):
|
|
94
94
|
figures.append(pacf_fig)
|
95
95
|
pacf_store[col] = pacf_values
|
96
96
|
|
97
|
-
return (
|
97
|
+
return (
|
98
|
+
*figures,
|
99
|
+
RawData(acf_values=acf_store, pacf_values=pacf_store, dataset=dataset.input_id),
|
100
|
+
)
|
@@ -80,5 +80,9 @@ def BivariateScatterPlots(dataset):
|
|
80
80
|
figures.append(fig)
|
81
81
|
|
82
82
|
return tuple(figures) + (
|
83
|
-
RawData(
|
83
|
+
RawData(
|
84
|
+
selected_numerical_df=df,
|
85
|
+
feature_pairs=features_pairs,
|
86
|
+
dataset=dataset.input_id,
|
87
|
+
),
|
84
88
|
)
|
@@ -68,4 +68,6 @@ def BoxPierce(dataset):
|
|
68
68
|
box_pierce_df.reset_index(inplace=True)
|
69
69
|
box_pierce_df.columns = ["column", "stat", "pvalue"]
|
70
70
|
|
71
|
-
return box_pierce_df, RawData(
|
71
|
+
return box_pierce_df, RawData(
|
72
|
+
box_pierce_values=box_pierce_values, dataset=dataset.input_id
|
73
|
+
)
|
@@ -104,5 +104,5 @@ def ClassImbalance(
|
|
104
104
|
},
|
105
105
|
go.Figure(data=[trace], layout=layout),
|
106
106
|
all(row["Pass/Fail"] == "Pass" for row in imbalanced_classes),
|
107
|
-
RawData(imbalance_percentages=imbalance_percentages),
|
107
|
+
RawData(imbalance_percentages=imbalance_percentages, dataset=dataset.input_id),
|
108
108
|
)
|
@@ -58,7 +58,7 @@ def FeatureTargetCorrelationPlot(dataset, fig_height=600):
|
|
58
58
|
df, dataset.target_column, fig_height
|
59
59
|
)
|
60
60
|
|
61
|
-
return fig, RawData(correlation_data=correlations)
|
61
|
+
return fig, RawData(correlation_data=correlations, dataset=dataset.input_id)
|
62
62
|
|
63
63
|
|
64
64
|
def _visualize_feature_target_correlation(df, target_column, fig_height):
|
@@ -118,11 +118,13 @@ def IQROutliersBarPlot(
|
|
118
118
|
)
|
119
119
|
figures.append(fig)
|
120
120
|
|
121
|
+
outliers_by_feature = df[dataset.feature_columns_numeric].apply(
|
122
|
+
lambda col: compute_outliers(col, threshold)
|
123
|
+
)
|
124
|
+
|
121
125
|
return (
|
122
126
|
*figures,
|
123
127
|
RawData(
|
124
|
-
outlier_counts_by_feature=
|
125
|
-
lambda col: compute_outliers(col, threshold)
|
126
|
-
)
|
128
|
+
outlier_counts_by_feature=outliers_by_feature, dataset=dataset.input_id
|
127
129
|
),
|
128
130
|
)
|
@@ -2,7 +2,7 @@
|
|
2
2
|
# See the LICENSE file in the root of this repository for details.
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
|
-
from validmind import tags, tasks
|
5
|
+
from validmind import RawData, tags, tasks
|
6
6
|
from validmind.vm_models import VMDataset
|
7
7
|
|
8
8
|
|
@@ -64,6 +64,7 @@ def IQROutliersTable(dataset: VMDataset, threshold: float = 1.5):
|
|
64
64
|
df = dataset.df
|
65
65
|
|
66
66
|
outliers_table = []
|
67
|
+
all_outliers = {}
|
67
68
|
|
68
69
|
for col in dataset.feature_columns_numeric:
|
69
70
|
# Skip binary features
|
@@ -71,6 +72,8 @@ def IQROutliersTable(dataset: VMDataset, threshold: float = 1.5):
|
|
71
72
|
continue
|
72
73
|
|
73
74
|
outliers = compute_outliers(df[col], threshold)
|
75
|
+
all_outliers[col] = outliers
|
76
|
+
|
74
77
|
if outliers.empty:
|
75
78
|
continue
|
76
79
|
|
@@ -89,4 +92,4 @@ def IQROutliersTable(dataset: VMDataset, threshold: float = 1.5):
|
|
89
92
|
|
90
93
|
return {
|
91
94
|
"Summary of Outliers Detected by IQR Method": outliers_table,
|
92
|
-
}
|
95
|
+
}, RawData(all_outliers=all_outliers, dataset=dataset.input_id)
|
@@ -8,7 +8,7 @@ import matplotlib.pyplot as plt
|
|
8
8
|
import seaborn as sns
|
9
9
|
from sklearn.ensemble import IsolationForest
|
10
10
|
|
11
|
-
from validmind import tags, tasks
|
11
|
+
from validmind import RawData, tags, tasks
|
12
12
|
from validmind.vm_models import VMDataset
|
13
13
|
|
14
14
|
|
@@ -91,6 +91,7 @@ def IsolationForestOutliers(
|
|
91
91
|
|
92
92
|
figures.append(fig)
|
93
93
|
|
94
|
-
|
95
|
-
|
96
|
-
|
94
|
+
return (
|
95
|
+
*figures,
|
96
|
+
RawData(predictions=y_pred, dataset=dataset.input_id),
|
97
|
+
)
|
@@ -5,7 +5,7 @@
|
|
5
5
|
import pandas as pd
|
6
6
|
from statsmodels.stats.stattools import jarque_bera
|
7
7
|
|
8
|
-
from validmind import tags, tasks
|
8
|
+
from validmind import RawData, tags, tasks
|
9
9
|
|
10
10
|
|
11
11
|
@tasks("classification", "regression")
|
@@ -67,4 +67,4 @@ def JarqueBera(dataset):
|
|
67
67
|
jb_df.reset_index(inplace=True)
|
68
68
|
jb_df.columns = ["column", "stat", "pvalue", "skew", "kurtosis"]
|
69
69
|
|
70
|
-
return jb_df
|
70
|
+
return jb_df, RawData(jb_values=jb_values, dataset=dataset.input_id)
|
@@ -5,7 +5,7 @@
|
|
5
5
|
import pandas as pd
|
6
6
|
from statsmodels.stats.diagnostic import acorr_ljungbox
|
7
7
|
|
8
|
-
from validmind import tags, tasks
|
8
|
+
from validmind import RawData, tags, tasks
|
9
9
|
|
10
10
|
|
11
11
|
@tasks("regression")
|
@@ -63,4 +63,4 @@ def LJungBox(dataset):
|
|
63
63
|
ljung_box_df.reset_index(inplace=True)
|
64
64
|
ljung_box_df.columns = ["column", "stat", "pvalue"]
|
65
65
|
|
66
|
-
return ljung_box_df
|
66
|
+
return ljung_box_df, RawData(ljung_box_df=ljung_box_df, dataset=dataset.input_id)
|
@@ -2,7 +2,7 @@
|
|
2
2
|
# See the LICENSE file in the root of this repository for details.
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
|
-
from validmind import tags, tasks
|
5
|
+
from validmind import RawData, tags, tasks
|
6
6
|
from validmind.vm_models import VMDataset
|
7
7
|
|
8
8
|
|
@@ -49,12 +49,16 @@ def MissingValues(dataset: VMDataset, min_threshold: int = 1):
|
|
49
49
|
df = dataset.df
|
50
50
|
missing = df.isna().sum()
|
51
51
|
|
52
|
-
return
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
52
|
+
return (
|
53
|
+
[
|
54
|
+
{
|
55
|
+
"Column": col,
|
56
|
+
"Number of Missing Values": missing[col],
|
57
|
+
"Percentage of Missing Values (%)": missing[col] / df.shape[0] * 100,
|
58
|
+
"Pass/Fail": "Pass" if missing[col] < min_threshold else "Fail",
|
59
|
+
}
|
60
|
+
for col in missing.index
|
61
|
+
],
|
62
|
+
all(missing[col] < min_threshold for col in missing.index),
|
63
|
+
RawData(missing_values=missing, dataset=dataset.input_id),
|
64
|
+
)
|
@@ -6,7 +6,7 @@
|
|
6
6
|
import pandas as pd
|
7
7
|
import plotly.graph_objects as go
|
8
8
|
|
9
|
-
from validmind import tags, tasks
|
9
|
+
from validmind import RawData, tags, tasks
|
10
10
|
from validmind.logging import get_logger
|
11
11
|
|
12
12
|
logger = get_logger(__name__)
|
@@ -127,4 +127,4 @@ def ProtectedClassesDescription(dataset, protected_classes=None):
|
|
127
127
|
["Protected Class", "Count"], ascending=[True, False]
|
128
128
|
)
|
129
129
|
|
130
|
-
return (stats_df, *figures)
|
130
|
+
return (stats_df, *figures, RawData(dataset=dataset.input_id))
|
@@ -7,7 +7,7 @@ import sys
|
|
7
7
|
|
8
8
|
import pandas as pd
|
9
9
|
|
10
|
-
from validmind import tags, tasks
|
10
|
+
from validmind import RawData, tags, tasks
|
11
11
|
from validmind.errors import MissingDependencyError
|
12
12
|
from validmind.logging import get_logger
|
13
13
|
|
@@ -119,7 +119,7 @@ def ProtectedClassesDisparity(
|
|
119
119
|
mask_significance=True,
|
120
120
|
)
|
121
121
|
|
122
|
-
|
122
|
+
returns = [] # Renamed to 'returns' for clarity
|
123
123
|
for protected_class in protected_classes:
|
124
124
|
plot = ap.disparity(
|
125
125
|
bdf, metrics, protected_class, fairness_threshold=disparity_tolerance
|
@@ -129,12 +129,16 @@ def ProtectedClassesDisparity(
|
|
129
129
|
plot.save(
|
130
130
|
buf, format="png"
|
131
131
|
) # as long as the above library is installed, this will work
|
132
|
-
|
132
|
+
returns.append(buf.getvalue())
|
133
133
|
|
134
134
|
string = "_disparity"
|
135
135
|
metrics_adj = [x + string for x in metrics]
|
136
136
|
|
137
137
|
table = bdf[["attribute_name", "attribute_value"] + b.list_disparities(bdf)]
|
138
|
-
|
138
|
+
returns.append(aqp.plot_disparity_all(bdf, metrics=metrics_adj))
|
139
139
|
|
140
|
-
return (
|
140
|
+
return (
|
141
|
+
table,
|
142
|
+
*returns,
|
143
|
+
RawData(model=model.input_id, dataset=dataset.input_id, disparity_data=bdf),
|
144
|
+
)
|
@@ -8,7 +8,7 @@ import sys
|
|
8
8
|
import matplotlib.pyplot as plt
|
9
9
|
import pandas as pd
|
10
10
|
|
11
|
-
from validmind import tags, tasks
|
11
|
+
from validmind import RawData, tags, tasks
|
12
12
|
from validmind.errors import MissingDependencyError
|
13
13
|
from validmind.logging import get_logger
|
14
14
|
|
@@ -103,7 +103,15 @@ def ProtectedClassesThresholdOptimizer(
|
|
103
103
|
test_df, target, y_pred_opt, protected_classes
|
104
104
|
)
|
105
105
|
|
106
|
-
return
|
106
|
+
return (
|
107
|
+
{"DPR and EOR Table": fairness_metrics.reset_index()},
|
108
|
+
fig,
|
109
|
+
RawData(
|
110
|
+
y_predictions=y_pred_opt.tolist(),
|
111
|
+
dataset=dataset.input_id,
|
112
|
+
protected_classes=protected_classes,
|
113
|
+
),
|
114
|
+
)
|
107
115
|
|
108
116
|
|
109
117
|
def initialize_and_fit_optimizer(pipeline, X_train, y_train, protected_classes_df):
|
@@ -5,7 +5,7 @@
|
|
5
5
|
import numpy as np
|
6
6
|
import pandas as pd
|
7
7
|
|
8
|
-
from validmind import tags, tasks
|
8
|
+
from validmind import RawData, tags, tasks
|
9
9
|
from validmind.vm_models import VMDataset, VMModel
|
10
10
|
|
11
11
|
|
@@ -137,4 +137,6 @@ def ScoreBandDefaultRates(
|
|
137
137
|
}
|
138
138
|
)
|
139
139
|
|
140
|
-
return pd.DataFrame(results)
|
140
|
+
return pd.DataFrame(results), RawData(
|
141
|
+
results=results, model=model.input_id, dataset=dataset.input_id
|
142
|
+
)
|
@@ -166,4 +166,4 @@ def SeasonalDecompose(dataset: VMDataset, seasonal_model: str = "additive"):
|
|
166
166
|
if not figures:
|
167
167
|
raise SkipTestError("No valid features found for seasonal decomposition")
|
168
168
|
|
169
|
-
return (*figures, RawData(decomposed_components=raw_data))
|
169
|
+
return (*figures, RawData(decomposed_components=raw_data, dataset=dataset.input_id))
|
@@ -5,7 +5,7 @@
|
|
5
5
|
import pandas as pd
|
6
6
|
from scipy import stats
|
7
7
|
|
8
|
-
from validmind import tags, tasks
|
8
|
+
from validmind import RawData, tags, tasks
|
9
9
|
|
10
10
|
|
11
11
|
@tasks("classification", "regression")
|
@@ -66,4 +66,4 @@ def ShapiroWilk(dataset):
|
|
66
66
|
sw_df.reset_index(inplace=True)
|
67
67
|
sw_df.columns = ["column", "stat", "pvalue"]
|
68
68
|
|
69
|
-
return sw_df
|
69
|
+
return sw_df, RawData(shapiro_results=sw_values, dataset=dataset.input_id)
|
@@ -5,7 +5,7 @@
|
|
5
5
|
import pandas as pd
|
6
6
|
import plotly.graph_objects as go
|
7
7
|
|
8
|
-
from validmind import tags, tasks
|
8
|
+
from validmind import RawData, tags, tasks
|
9
9
|
from validmind.errors import SkipTestError
|
10
10
|
from validmind.vm_models import VMDataset
|
11
11
|
|
@@ -111,4 +111,9 @@ def TimeSeriesOutliers(dataset: VMDataset, zscore_threshold: int = 3):
|
|
111
111
|
|
112
112
|
figures.append(fig)
|
113
113
|
|
114
|
-
return (
|
114
|
+
return (
|
115
|
+
outlier_df.sort_values(["Column", "Date"]),
|
116
|
+
figures,
|
117
|
+
len(outlier_df) == 0,
|
118
|
+
RawData(outliers=outlier_df, dataset=dataset.input_id),
|
119
|
+
)
|
@@ -6,7 +6,7 @@ import pandas as pd
|
|
6
6
|
from arch.unitroot import ZivotAndrews
|
7
7
|
from numpy.linalg import LinAlgError
|
8
8
|
|
9
|
-
from validmind import tags, tasks
|
9
|
+
from validmind import RawData, tags, tasks
|
10
10
|
from validmind.errors import SkipTestError
|
11
11
|
from validmind.logging import get_logger
|
12
12
|
from validmind.vm_models import VMDataset
|
@@ -83,4 +83,7 @@ def ZivotAndrewsArch(dataset: VMDataset):
|
|
83
83
|
}
|
84
84
|
)
|
85
85
|
|
86
|
-
return
|
86
|
+
return (
|
87
|
+
{"Zivot-Andrews Test Results": za_values},
|
88
|
+
RawData(zivot_andrews=za_values, dataset=dataset.input_id),
|
89
|
+
)
|
@@ -144,4 +144,8 @@ def PolarityAndSubjectivity(dataset, threshold_subjectivity=0.5, threshold_polar
|
|
144
144
|
|
145
145
|
statistics_tables = {"Quadrant Distribution": quadrant_df, "Statistics": stats_df}
|
146
146
|
|
147
|
-
return
|
147
|
+
return (
|
148
|
+
fig,
|
149
|
+
statistics_tables,
|
150
|
+
RawData(sentiment_data=data, dataset=dataset.input_id),
|
151
|
+
)
|
@@ -65,7 +65,7 @@ def Punctuations(dataset, count_mode="token"):
|
|
65
65
|
punctuation_counts = _count_punctuations(corpus, count_mode)
|
66
66
|
fig = _create_punctuation_plot(punctuation_counts)
|
67
67
|
|
68
|
-
return fig, RawData(punctuation_counts=punctuation_counts)
|
68
|
+
return fig, RawData(punctuation_counts=punctuation_counts, dataset=dataset.input_id)
|
69
69
|
|
70
70
|
|
71
71
|
def _create_punctuation_plot(punctuation_counts):
|
@@ -131,4 +131,10 @@ def BertScore(
|
|
131
131
|
# Create a DataFrame from all collected statistics
|
132
132
|
result_df = pd.DataFrame(stats_df).reset_index().rename(columns={"index": "Metric"})
|
133
133
|
|
134
|
-
return (
|
134
|
+
return (
|
135
|
+
result_df,
|
136
|
+
*figures,
|
137
|
+
RawData(
|
138
|
+
bert_scores_df=metrics_df, model=model.input_id, dataset=dataset.input_id
|
139
|
+
),
|
140
|
+
)
|
@@ -114,4 +114,10 @@ def BleuScore(dataset, model):
|
|
114
114
|
# Create a DataFrame from all collected statistics
|
115
115
|
result_df = pd.DataFrame(stats_df).reset_index().rename(columns={"index": "Metric"})
|
116
116
|
|
117
|
-
return (
|
117
|
+
return (
|
118
|
+
result_df,
|
119
|
+
*figures,
|
120
|
+
RawData(
|
121
|
+
bleu_scores_df=metrics_df, model=model.input_id, dataset=dataset.input_id
|
122
|
+
),
|
123
|
+
)
|
@@ -72,4 +72,6 @@ def ClusterSizeDistribution(dataset: VMDataset, model: VMModel):
|
|
72
72
|
fig.update_yaxes(title_text="Counts", showgrid=False)
|
73
73
|
fig.update_layout(title_text="Cluster distribution", title_x=0.5, barmode="group")
|
74
74
|
|
75
|
-
return fig, RawData(
|
75
|
+
return fig, RawData(
|
76
|
+
cluster_counts=df_counts, model=model.input_id, dataset=dataset.input_id
|
77
|
+
)
|
@@ -118,4 +118,12 @@ def ContextualRecall(dataset, model):
|
|
118
118
|
# Create a DataFrame from all collected statistics
|
119
119
|
result_df = pd.DataFrame(stats_df).reset_index().rename(columns={"index": "Metric"})
|
120
120
|
|
121
|
-
return (
|
121
|
+
return (
|
122
|
+
result_df,
|
123
|
+
*figures,
|
124
|
+
RawData(
|
125
|
+
contextual_recall_scores=metrics_df,
|
126
|
+
model=model.input_id,
|
127
|
+
dataset=dataset.input_id,
|
128
|
+
),
|
129
|
+
)
|