validmind 2.8.28__py3-none-any.whl → 2.8.29__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (204) hide show
  1. validmind/models/function.py +11 -3
  2. validmind/tests/data_validation/ACFandPACFPlot.py +3 -1
  3. validmind/tests/data_validation/ADF.py +3 -1
  4. validmind/tests/data_validation/AutoAR.py +3 -1
  5. validmind/tests/data_validation/AutoMA.py +5 -1
  6. validmind/tests/data_validation/AutoStationarity.py +5 -1
  7. validmind/tests/data_validation/BivariateScatterPlots.py +3 -1
  8. validmind/tests/data_validation/BoxPierce.py +4 -1
  9. validmind/tests/data_validation/ChiSquaredFeaturesTable.py +1 -1
  10. validmind/tests/data_validation/ClassImbalance.py +1 -1
  11. validmind/tests/data_validation/DatasetDescription.py +4 -1
  12. validmind/tests/data_validation/DatasetSplit.py +3 -2
  13. validmind/tests/data_validation/DescriptiveStatistics.py +3 -1
  14. validmind/tests/data_validation/DickeyFullerGLS.py +3 -1
  15. validmind/tests/data_validation/Duplicates.py +3 -1
  16. validmind/tests/data_validation/EngleGrangerCoint.py +6 -1
  17. validmind/tests/data_validation/FeatureTargetCorrelationPlot.py +3 -1
  18. validmind/tests/data_validation/HighCardinality.py +3 -1
  19. validmind/tests/data_validation/HighPearsonCorrelation.py +4 -1
  20. validmind/tests/data_validation/IQROutliersBarPlot.py +4 -1
  21. validmind/tests/data_validation/IQROutliersTable.py +6 -1
  22. validmind/tests/data_validation/IsolationForestOutliers.py +3 -1
  23. validmind/tests/data_validation/JarqueBera.py +3 -1
  24. validmind/tests/data_validation/KPSS.py +3 -1
  25. validmind/tests/data_validation/LJungBox.py +3 -1
  26. validmind/tests/data_validation/LaggedCorrelationHeatmap.py +6 -1
  27. validmind/tests/data_validation/MissingValues.py +5 -1
  28. validmind/tests/data_validation/MissingValuesBarPlot.py +3 -1
  29. validmind/tests/data_validation/MutualInformation.py +4 -1
  30. validmind/tests/data_validation/PearsonCorrelationMatrix.py +3 -1
  31. validmind/tests/data_validation/PhillipsPerronArch.py +3 -1
  32. validmind/tests/data_validation/ProtectedClassesCombination.py +5 -1
  33. validmind/tests/data_validation/ProtectedClassesDescription.py +5 -1
  34. validmind/tests/data_validation/ProtectedClassesDisparity.py +5 -3
  35. validmind/tests/data_validation/ProtectedClassesThresholdOptimizer.py +9 -2
  36. validmind/tests/data_validation/RollingStatsPlot.py +5 -1
  37. validmind/tests/data_validation/RunsTest.py +1 -1
  38. validmind/tests/data_validation/ScatterPlot.py +2 -1
  39. validmind/tests/data_validation/ScoreBandDefaultRates.py +3 -1
  40. validmind/tests/data_validation/SeasonalDecompose.py +6 -1
  41. validmind/tests/data_validation/ShapiroWilk.py +4 -1
  42. validmind/tests/data_validation/Skewness.py +3 -1
  43. validmind/tests/data_validation/SpreadPlot.py +3 -1
  44. validmind/tests/data_validation/TabularCategoricalBarPlots.py +4 -1
  45. validmind/tests/data_validation/TabularDateTimeHistograms.py +3 -1
  46. validmind/tests/data_validation/TabularDescriptionTables.py +4 -1
  47. validmind/tests/data_validation/TabularNumericalHistograms.py +3 -1
  48. validmind/tests/data_validation/TargetRateBarPlots.py +4 -1
  49. validmind/tests/data_validation/TimeSeriesDescription.py +1 -1
  50. validmind/tests/data_validation/TimeSeriesDescriptiveStatistics.py +1 -1
  51. validmind/tests/data_validation/TimeSeriesFrequency.py +5 -1
  52. validmind/tests/data_validation/TimeSeriesHistogram.py +4 -1
  53. validmind/tests/data_validation/TimeSeriesLinePlot.py +3 -1
  54. validmind/tests/data_validation/TimeSeriesMissingValues.py +6 -1
  55. validmind/tests/data_validation/TimeSeriesOutliers.py +5 -1
  56. validmind/tests/data_validation/TooManyZeroValues.py +6 -1
  57. validmind/tests/data_validation/UniqueRows.py +5 -1
  58. validmind/tests/data_validation/WOEBinPlots.py +4 -1
  59. validmind/tests/data_validation/WOEBinTable.py +5 -1
  60. validmind/tests/data_validation/ZivotAndrewsArch.py +3 -1
  61. validmind/tests/data_validation/nlp/CommonWords.py +2 -1
  62. validmind/tests/data_validation/nlp/Hashtags.py +2 -1
  63. validmind/tests/data_validation/nlp/LanguageDetection.py +4 -1
  64. validmind/tests/data_validation/nlp/Mentions.py +3 -1
  65. validmind/tests/data_validation/nlp/PolarityAndSubjectivity.py +6 -1
  66. validmind/tests/data_validation/nlp/Punctuations.py +2 -1
  67. validmind/tests/data_validation/nlp/Sentiment.py +3 -1
  68. validmind/tests/data_validation/nlp/StopWords.py +2 -1
  69. validmind/tests/data_validation/nlp/TextDescription.py +3 -1
  70. validmind/tests/data_validation/nlp/Toxicity.py +3 -1
  71. validmind/tests/load.py +91 -17
  72. validmind/tests/model_validation/BertScore.py +6 -3
  73. validmind/tests/model_validation/BleuScore.py +6 -1
  74. validmind/tests/model_validation/ClusterSizeDistribution.py +5 -1
  75. validmind/tests/model_validation/ContextualRecall.py +6 -1
  76. validmind/tests/model_validation/FeaturesAUC.py +5 -1
  77. validmind/tests/model_validation/MeteorScore.py +6 -1
  78. validmind/tests/model_validation/ModelMetadata.py +2 -1
  79. validmind/tests/model_validation/ModelPredictionResiduals.py +10 -2
  80. validmind/tests/model_validation/RegardScore.py +7 -1
  81. validmind/tests/model_validation/RegressionResidualsPlot.py +5 -1
  82. validmind/tests/model_validation/RougeScore.py +8 -1
  83. validmind/tests/model_validation/TimeSeriesPredictionWithCI.py +8 -1
  84. validmind/tests/model_validation/TimeSeriesPredictionsPlot.py +7 -1
  85. validmind/tests/model_validation/TimeSeriesR2SquareBySegments.py +6 -1
  86. validmind/tests/model_validation/TokenDisparity.py +6 -1
  87. validmind/tests/model_validation/ToxicityScore.py +6 -1
  88. validmind/tests/model_validation/embeddings/ClusterDistribution.py +6 -1
  89. validmind/tests/model_validation/embeddings/CosineSimilarityComparison.py +6 -1
  90. validmind/tests/model_validation/embeddings/CosineSimilarityDistribution.py +6 -1
  91. validmind/tests/model_validation/embeddings/CosineSimilarityHeatmap.py +7 -3
  92. validmind/tests/model_validation/embeddings/DescriptiveAnalytics.py +6 -1
  93. validmind/tests/model_validation/embeddings/EmbeddingsVisualization2D.py +4 -3
  94. validmind/tests/model_validation/embeddings/EuclideanDistanceComparison.py +6 -1
  95. validmind/tests/model_validation/embeddings/EuclideanDistanceHeatmap.py +7 -3
  96. validmind/tests/model_validation/embeddings/PCAComponentsPairwisePlots.py +6 -1
  97. validmind/tests/model_validation/embeddings/StabilityAnalysisKeyword.py +5 -2
  98. validmind/tests/model_validation/embeddings/StabilityAnalysisRandomNoise.py +5 -1
  99. validmind/tests/model_validation/embeddings/StabilityAnalysisSynonyms.py +4 -1
  100. validmind/tests/model_validation/embeddings/StabilityAnalysisTranslation.py +5 -1
  101. validmind/tests/model_validation/embeddings/TSNEComponentsPairwisePlots.py +9 -6
  102. validmind/tests/model_validation/ragas/AnswerCorrectness.py +8 -5
  103. validmind/tests/model_validation/ragas/AspectCritic.py +11 -8
  104. validmind/tests/model_validation/ragas/ContextEntityRecall.py +5 -2
  105. validmind/tests/model_validation/ragas/ContextPrecision.py +5 -2
  106. validmind/tests/model_validation/ragas/ContextPrecisionWithoutReference.py +5 -2
  107. validmind/tests/model_validation/ragas/ContextRecall.py +6 -2
  108. validmind/tests/model_validation/ragas/Faithfulness.py +9 -5
  109. validmind/tests/model_validation/ragas/NoiseSensitivity.py +10 -7
  110. validmind/tests/model_validation/ragas/ResponseRelevancy.py +9 -6
  111. validmind/tests/model_validation/ragas/SemanticSimilarity.py +7 -4
  112. validmind/tests/model_validation/sklearn/AdjustedMutualInformation.py +5 -1
  113. validmind/tests/model_validation/sklearn/AdjustedRandIndex.py +5 -1
  114. validmind/tests/model_validation/sklearn/CalibrationCurve.py +5 -1
  115. validmind/tests/model_validation/sklearn/ClassifierPerformance.py +5 -1
  116. validmind/tests/model_validation/sklearn/ClusterCosineSimilarity.py +5 -1
  117. validmind/tests/model_validation/sklearn/ClusterPerformanceMetrics.py +5 -1
  118. validmind/tests/model_validation/sklearn/CompletenessScore.py +5 -1
  119. validmind/tests/model_validation/sklearn/ConfusionMatrix.py +4 -1
  120. validmind/tests/model_validation/sklearn/FeatureImportance.py +5 -1
  121. validmind/tests/model_validation/sklearn/FowlkesMallowsScore.py +5 -1
  122. validmind/tests/model_validation/sklearn/HomogeneityScore.py +5 -1
  123. validmind/tests/model_validation/sklearn/HyperParametersTuning.py +2 -4
  124. validmind/tests/model_validation/sklearn/KMeansClustersOptimization.py +3 -3
  125. validmind/tests/model_validation/sklearn/MinimumAccuracy.py +5 -1
  126. validmind/tests/model_validation/sklearn/MinimumF1Score.py +5 -1
  127. validmind/tests/model_validation/sklearn/MinimumROCAUCScore.py +5 -1
  128. validmind/tests/model_validation/sklearn/ModelParameters.py +6 -1
  129. validmind/tests/model_validation/sklearn/ModelsPerformanceComparison.py +5 -1
  130. validmind/tests/model_validation/sklearn/OverfitDiagnosis.py +3 -2
  131. validmind/tests/model_validation/sklearn/PermutationFeatureImportance.py +4 -4
  132. validmind/tests/model_validation/sklearn/PopulationStabilityIndex.py +2 -2
  133. validmind/tests/model_validation/sklearn/PrecisionRecallCurve.py +5 -1
  134. validmind/tests/model_validation/sklearn/ROCCurve.py +3 -1
  135. validmind/tests/model_validation/sklearn/RegressionErrors.py +6 -1
  136. validmind/tests/model_validation/sklearn/RegressionErrorsComparison.py +6 -1
  137. validmind/tests/model_validation/sklearn/RegressionPerformance.py +5 -1
  138. validmind/tests/model_validation/sklearn/RegressionR2Square.py +6 -1
  139. validmind/tests/model_validation/sklearn/RegressionR2SquareComparison.py +6 -1
  140. validmind/tests/model_validation/sklearn/RobustnessDiagnosis.py +2 -2
  141. validmind/tests/model_validation/sklearn/ScoreProbabilityAlignment.py +3 -1
  142. validmind/tests/model_validation/sklearn/SilhouettePlot.py +6 -1
  143. validmind/tests/model_validation/sklearn/TrainingTestDegradation.py +2 -2
  144. validmind/tests/model_validation/sklearn/VMeasure.py +5 -1
  145. validmind/tests/model_validation/sklearn/WeakspotsDiagnosis.py +6 -5
  146. validmind/tests/model_validation/statsmodels/AutoARIMA.py +3 -1
  147. validmind/tests/model_validation/statsmodels/CumulativePredictionProbabilities.py +6 -1
  148. validmind/tests/model_validation/statsmodels/DurbinWatsonTest.py +6 -1
  149. validmind/tests/model_validation/statsmodels/GINITable.py +4 -1
  150. validmind/tests/model_validation/statsmodels/KolmogorovSmirnov.py +5 -1
  151. validmind/tests/model_validation/statsmodels/Lilliefors.py +3 -1
  152. validmind/tests/model_validation/statsmodels/PredictionProbabilitiesHistogram.py +6 -2
  153. validmind/tests/model_validation/statsmodels/RegressionCoeffs.py +4 -1
  154. validmind/tests/model_validation/statsmodels/RegressionFeatureSignificance.py +7 -2
  155. validmind/tests/model_validation/statsmodels/RegressionModelForecastPlot.py +5 -4
  156. validmind/tests/model_validation/statsmodels/RegressionModelForecastPlotLevels.py +4 -1
  157. validmind/tests/model_validation/statsmodels/RegressionModelSensitivityPlot.py +3 -2
  158. validmind/tests/model_validation/statsmodels/RegressionModelSummary.py +5 -1
  159. validmind/tests/model_validation/statsmodels/RegressionPermutationFeatureImportance.py +3 -1
  160. validmind/tests/model_validation/statsmodels/ScorecardHistogram.py +6 -1
  161. validmind/tests/ongoing_monitoring/CalibrationCurveDrift.py +2 -2
  162. validmind/tests/ongoing_monitoring/ClassDiscriminationDrift.py +2 -2
  163. validmind/tests/ongoing_monitoring/ClassImbalanceDrift.py +2 -2
  164. validmind/tests/ongoing_monitoring/ClassificationAccuracyDrift.py +2 -2
  165. validmind/tests/ongoing_monitoring/ConfusionMatrixDrift.py +2 -2
  166. validmind/tests/ongoing_monitoring/CumulativePredictionProbabilitiesDrift.py +2 -2
  167. validmind/tests/ongoing_monitoring/FeatureDrift.py +5 -2
  168. validmind/tests/ongoing_monitoring/PredictionAcrossEachFeature.py +6 -1
  169. validmind/tests/ongoing_monitoring/PredictionCorrelation.py +8 -1
  170. validmind/tests/ongoing_monitoring/PredictionProbabilitiesHistogramDrift.py +2 -2
  171. validmind/tests/ongoing_monitoring/PredictionQuantilesAcrossFeatures.py +6 -1
  172. validmind/tests/ongoing_monitoring/ROCCurveDrift.py +4 -2
  173. validmind/tests/ongoing_monitoring/ScoreBandsDrift.py +2 -2
  174. validmind/tests/ongoing_monitoring/ScorecardHistogramDrift.py +2 -2
  175. validmind/tests/ongoing_monitoring/TargetPredictionDistributionPlot.py +8 -1
  176. validmind/tests/prompt_validation/Bias.py +5 -1
  177. validmind/tests/prompt_validation/Clarity.py +5 -1
  178. validmind/tests/prompt_validation/Conciseness.py +5 -1
  179. validmind/tests/prompt_validation/Delimitation.py +5 -1
  180. validmind/tests/prompt_validation/NegativeInstruction.py +5 -1
  181. validmind/tests/prompt_validation/Robustness.py +5 -1
  182. validmind/tests/prompt_validation/Specificity.py +5 -1
  183. validmind/unit_metrics/classification/Accuracy.py +2 -1
  184. validmind/unit_metrics/classification/F1.py +2 -1
  185. validmind/unit_metrics/classification/Precision.py +2 -1
  186. validmind/unit_metrics/classification/ROC_AUC.py +2 -1
  187. validmind/unit_metrics/classification/Recall.py +2 -1
  188. validmind/unit_metrics/regression/AdjustedRSquaredScore.py +2 -1
  189. validmind/unit_metrics/regression/GiniCoefficient.py +2 -1
  190. validmind/unit_metrics/regression/HuberLoss.py +2 -1
  191. validmind/unit_metrics/regression/KolmogorovSmirnovStatistic.py +2 -1
  192. validmind/unit_metrics/regression/MeanAbsoluteError.py +2 -1
  193. validmind/unit_metrics/regression/MeanAbsolutePercentageError.py +2 -1
  194. validmind/unit_metrics/regression/MeanBiasDeviation.py +2 -1
  195. validmind/unit_metrics/regression/MeanSquaredError.py +2 -1
  196. validmind/unit_metrics/regression/QuantileLoss.py +1 -1
  197. validmind/unit_metrics/regression/RSquaredScore.py +2 -1
  198. validmind/unit_metrics/regression/RootMeanSquaredError.py +2 -1
  199. validmind/vm_models/dataset/dataset.py +145 -38
  200. {validmind-2.8.28.dist-info → validmind-2.8.29.dist-info}/METADATA +1 -1
  201. {validmind-2.8.28.dist-info → validmind-2.8.29.dist-info}/RECORD +204 -204
  202. {validmind-2.8.28.dist-info → validmind-2.8.29.dist-info}/LICENSE +0 -0
  203. {validmind-2.8.28.dist-info → validmind-2.8.29.dist-info}/WHEEL +0 -0
  204. {validmind-2.8.28.dist-info → validmind-2.8.29.dist-info}/entry_points.txt +0 -0
@@ -2,6 +2,8 @@
2
2
  # See the LICENSE file in the root of this repository for details.
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
+ from typing import Tuple
6
+
5
7
  import pandas as pd
6
8
  import plotly.graph_objects as go
7
9
 
@@ -13,7 +15,7 @@ from validmind.vm_models import VMDataset, VMModel
13
15
  @tasks("classification")
14
16
  def ScoreProbabilityAlignment(
15
17
  model: VMModel, dataset: VMDataset, score_column: str = "score", n_bins: int = 10
16
- ):
18
+ ) -> Tuple[pd.DataFrame, go.Figure, RawData]:
17
19
  """
18
20
  Analyzes the alignment between credit scores and predicted probabilities.
19
21
 
@@ -2,8 +2,11 @@
2
2
  # See the LICENSE file in the root of this repository for details.
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
+ from typing import Dict, Tuple
6
+
5
7
  import matplotlib.pyplot as plt
6
8
  import numpy as np
9
+ import plotly.graph_objects as go
7
10
  from sklearn.metrics import silhouette_samples, silhouette_score
8
11
 
9
12
  from validmind import RawData, tags, tasks
@@ -12,7 +15,9 @@ from validmind.vm_models import VMDataset, VMModel
12
15
 
13
16
  @tags("sklearn", "model_performance")
14
17
  @tasks("clustering")
15
- def SilhouettePlot(model: VMModel, dataset: VMDataset):
18
+ def SilhouettePlot(
19
+ model: VMModel, dataset: VMDataset
20
+ ) -> Tuple[Dict[str, float], go.Figure, RawData]:
16
21
  """
17
22
  Calculates and visualizes Silhouette Score, assessing the degree of data point suitability to its cluster in ML
18
23
  models.
@@ -2,7 +2,7 @@
2
2
  # See the LICENSE file in the root of this repository for details.
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
- from typing import List
5
+ from typing import Dict, List, Tuple
6
6
 
7
7
  from numpy import unique
8
8
  from sklearn.metrics import classification_report
@@ -22,7 +22,7 @@ from validmind.vm_models import VMDataset, VMModel
22
22
  @tasks("classification", "text_classification")
23
23
  def TrainingTestDegradation(
24
24
  datasets: List[VMDataset], model: VMModel, max_threshold: float = 0.10
25
- ):
25
+ ) -> Tuple[List[Dict[str, float]], bool, RawData]:
26
26
  """
27
27
  Tests if model performance degradation between training and test datasets exceeds a predefined threshold.
28
28
 
@@ -2,6 +2,8 @@
2
2
  # See the LICENSE file in the root of this repository for details.
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
+ from typing import Dict, List, Tuple
6
+
5
7
  from sklearn import metrics
6
8
 
7
9
  from validmind import RawData, tags, tasks
@@ -10,7 +12,9 @@ from validmind.vm_models import VMDataset, VMModel
10
12
 
11
13
  @tags("sklearn", "model_performance")
12
14
  @tasks("clustering")
13
- def VMeasure(dataset: VMDataset, model: VMModel):
15
+ def VMeasure(
16
+ dataset: VMDataset, model: VMModel
17
+ ) -> Tuple[List[Dict[str, float]], RawData]:
14
18
  """
15
19
  Evaluates homogeneity and completeness of a clustering model using the V Measure Score.
16
20
 
@@ -2,10 +2,11 @@
2
2
  # See the LICENSE file in the root of this repository for details.
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
- from typing import Callable, Dict, List, Tuple, Union
5
+ from typing import Callable, Dict, List, Optional, Tuple
6
6
 
7
7
  import matplotlib.pyplot as plt
8
8
  import pandas as pd
9
+ import plotly.graph_objects as go
9
10
  import seaborn as sns
10
11
  from sklearn import metrics
11
12
 
@@ -157,10 +158,10 @@ def _plot_weak_spots(
157
158
  def WeakspotsDiagnosis(
158
159
  datasets: List[VMDataset],
159
160
  model: VMModel,
160
- features_columns: Union[List[str], None] = None,
161
- metrics: Union[Dict[str, Callable], None] = None,
162
- thresholds: Union[Dict[str, float], None] = None,
163
- ):
161
+ features_columns: Optional[List[str]] = None,
162
+ metrics: Optional[Dict[str, Callable]] = None,
163
+ thresholds: Optional[Dict[str, float]] = None,
164
+ ) -> Tuple[pd.DataFrame, go.Figure, bool]:
164
165
  """
165
166
  Identifies and visualizes weak spots in a machine learning model's performance across various sections of the
166
167
  feature space.
@@ -2,6 +2,8 @@
2
2
  # See the LICENSE file in the root of this repository for details.
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
+ from typing import Dict, List
6
+
5
7
  from statsmodels.tsa.arima.model import ARIMA
6
8
  from statsmodels.tsa.stattools import adfuller
7
9
 
@@ -14,7 +16,7 @@ logger = get_logger(__name__)
14
16
 
15
17
  @tags("time_series_data", "forecasting", "model_selection", "statsmodels")
16
18
  @tasks("regression")
17
- def AutoARIMA(model: VMModel, dataset: VMDataset):
19
+ def AutoARIMA(model: VMModel, dataset: VMDataset) -> List[Dict[str, float]]:
18
20
  """
19
21
  Evaluates ARIMA models for time-series forecasting, ranking them using Bayesian and Akaike Information Criteria.
20
22
 
@@ -2,16 +2,21 @@
2
2
  # See the LICENSE file in the root of this repository for details.
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
+ from typing import Tuple
6
+
5
7
  import numpy as np
6
8
  import plotly.graph_objects as go
7
9
  from matplotlib import cm
8
10
 
9
11
  from validmind import RawData, tags, tasks
12
+ from validmind.vm_models import VMDataset, VMModel
10
13
 
11
14
 
12
15
  @tags("visualization", "credit_risk")
13
16
  @tasks("classification")
14
- def CumulativePredictionProbabilities(dataset, model, title="Cumulative Probabilities"):
17
+ def CumulativePredictionProbabilities(
18
+ dataset: VMDataset, model: VMModel, title: str = "Cumulative Probabilities"
19
+ ) -> Tuple[go.Figure, RawData]:
15
20
  """
16
21
  Visualizes cumulative probabilities of positive and negative classes for both training and testing in classification models.
17
22
 
@@ -2,15 +2,20 @@
2
2
  # See the LICENSE file in the root of this repository for details.
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
+ from typing import List, Tuple
6
+
5
7
  import pandas as pd
6
8
  from statsmodels.stats.stattools import durbin_watson
7
9
 
8
10
  from validmind import RawData, tags, tasks
11
+ from validmind.vm_models import VMDataset, VMModel
9
12
 
10
13
 
11
14
  @tasks("regression")
12
15
  @tags("time_series_data", "forecasting", "statistical_test", "statsmodels")
13
- def DurbinWatsonTest(dataset, model, threshold=[1.5, 2.5]):
16
+ def DurbinWatsonTest(
17
+ dataset: VMDataset, model: VMModel, threshold: List[float] = [1.5, 2.5]
18
+ ) -> Tuple[pd.DataFrame, RawData]:
14
19
  """
15
20
  Assesses autocorrelation in time series data features using the Durbin-Watson statistic.
16
21
 
@@ -2,16 +2,19 @@
2
2
  # See the LICENSE file in the root of this repository for details.
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
+ from typing import Tuple
6
+
5
7
  import numpy as np
6
8
  import pandas as pd
7
9
  from sklearn.metrics import roc_auc_score, roc_curve
8
10
 
9
11
  from validmind import RawData, tags, tasks
12
+ from validmind.vm_models import VMDataset, VMModel
10
13
 
11
14
 
12
15
  @tags("model_performance")
13
16
  @tasks("classification")
14
- def GINITable(dataset, model):
17
+ def GINITable(dataset: VMDataset, model: VMModel) -> Tuple[pd.DataFrame, RawData]:
15
18
  """
16
19
  Evaluates classification model performance using AUC, GINI, and KS metrics for training and test datasets.
17
20
 
@@ -2,6 +2,8 @@
2
2
  # See the LICENSE file in the root of this repository for details.
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
+ from typing import Dict, List, Tuple
6
+
5
7
  from statsmodels.stats.diagnostic import kstest_normal
6
8
 
7
9
  from validmind import RawData, tags, tasks
@@ -11,7 +13,9 @@ from validmind.vm_models import VMDataset, VMModel
11
13
 
12
14
  @tags("tabular_data", "data_distribution", "statistical_test", "statsmodels")
13
15
  @tasks("classification", "regression")
14
- def KolmogorovSmirnov(model: VMModel, dataset: VMDataset, dist: str = "norm"):
16
+ def KolmogorovSmirnov(
17
+ model: VMModel, dataset: VMDataset, dist: str = "norm"
18
+ ) -> Tuple[List[Dict[str, float]], RawData]:
15
19
  """
16
20
  Assesses whether each feature in the dataset aligns with a normal distribution using the Kolmogorov-Smirnov test.
17
21
 
@@ -2,6 +2,8 @@
2
2
  # See the LICENSE file in the root of this repository for details.
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
+ from typing import Dict, List
6
+
5
7
  from statsmodels.stats.diagnostic import lilliefors
6
8
 
7
9
  from validmind import tags, tasks
@@ -10,7 +12,7 @@ from validmind.vm_models import VMDataset
10
12
 
11
13
  @tags("tabular_data", "data_distribution", "statistical_test", "statsmodels")
12
14
  @tasks("classification", "regression")
13
- def Lilliefors(dataset: VMDataset):
15
+ def Lilliefors(dataset: VMDataset) -> List[Dict[str, float]]:
14
16
  """
15
17
  Assesses the normality of feature distributions in an ML model's training dataset using the Lilliefors test.
16
18
 
@@ -2,18 +2,22 @@
2
2
  # See the LICENSE file in the root of this repository for details.
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
+ from typing import Tuple
5
6
 
6
7
  import plotly.graph_objects as go
7
8
  from matplotlib import cm
8
9
 
9
10
  from validmind import RawData, tags, tasks
11
+ from validmind.vm_models import VMDataset, VMModel
10
12
 
11
13
 
12
14
  @tags("visualization", "credit_risk")
13
15
  @tasks("classification")
14
16
  def PredictionProbabilitiesHistogram(
15
- dataset, model, title="Histogram of Predictive Probabilities"
16
- ):
17
+ dataset: VMDataset,
18
+ model: VMModel,
19
+ title: str = "Histogram of Predictive Probabilities",
20
+ ) -> Tuple[go.Figure, RawData]:
17
21
  """
18
22
  Assesses the predictive probability distribution for binary classification to evaluate model performance and
19
23
  potential overfitting or bias.
@@ -3,17 +3,20 @@
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
5
 
6
+ from typing import Tuple
7
+
6
8
  import pandas as pd
7
9
  import plotly.graph_objects as go
8
10
  from scipy import stats
9
11
 
10
12
  from validmind import RawData, tags, tasks
11
13
  from validmind.errors import SkipTestError
14
+ from validmind.vm_models import VMModel
12
15
 
13
16
 
14
17
  @tags("tabular_data", "visualization", "model_training")
15
18
  @tasks("regression")
16
- def RegressionCoeffs(model):
19
+ def RegressionCoeffs(model: VMModel) -> Tuple[go.Figure, RawData, pd.DataFrame]:
17
20
  """
18
21
  Assesses the significance and uncertainty of predictor variables in a regression model through visualization of
19
22
  coefficients and their 95% confidence intervals.
@@ -2,7 +2,10 @@
2
2
  # See the LICENSE file in the root of this repository for details.
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
+ from typing import Tuple
6
+
5
7
  import matplotlib.pyplot as plt
8
+ import plotly.graph_objects as go
6
9
  import seaborn as sns
7
10
 
8
11
  from validmind import RawData, tags, tasks
@@ -16,8 +19,10 @@ logger = get_logger(__name__)
16
19
  @tags("statistical_test", "model_interpretation", "visualization", "feature_importance")
17
20
  @tasks("regression")
18
21
  def RegressionFeatureSignificance(
19
- model: VMModel, fontsize: int = 10, p_threshold: float = 0.05
20
- ):
22
+ model: VMModel,
23
+ fontsize: int = 10,
24
+ p_threshold: float = 0.05,
25
+ ) -> Tuple[go.Figure, RawData]:
21
26
  """
22
27
  Assesses and visualizes the statistical significance of features in a regression model.
23
28
 
@@ -2,10 +2,11 @@
2
2
  # See the LICENSE file in the root of this repository for details.
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
- from typing import Union
5
+ from typing import Optional, Tuple
6
6
 
7
7
  import matplotlib.pyplot as plt
8
8
  import pandas as pd
9
+ import plotly.graph_objects as go
9
10
 
10
11
  from validmind import RawData, tags, tasks
11
12
  from validmind.logging import get_logger
@@ -19,9 +20,9 @@ logger = get_logger(__name__)
19
20
  def RegressionModelForecastPlot(
20
21
  model: VMModel,
21
22
  dataset: VMDataset,
22
- start_date: Union[str, None] = None,
23
- end_date: Union[str, None] = None,
24
- ):
23
+ start_date: Optional[str] = None,
24
+ end_date: Optional[str] = None,
25
+ ) -> Tuple[go.Figure, RawData]:
25
26
  """
26
27
  Generates plots to visually compare the forecasted outcomes of a regression model against actual observed values over
27
28
  a specified date range.
@@ -2,9 +2,12 @@
2
2
  # See the LICENSE file in the root of this repository for details.
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
+ from typing import Tuple
6
+
5
7
  import matplotlib.pyplot as plt
6
8
  import numpy as np
7
9
  import pandas as pd
10
+ import plotly.graph_objects as go
8
11
 
9
12
  from validmind import RawData, tags, tasks
10
13
  from validmind.vm_models import VMDataset, VMModel
@@ -23,7 +26,7 @@ def integrate_diff(series_diff, start_value):
23
26
  def RegressionModelForecastPlotLevels(
24
27
  model: VMModel,
25
28
  dataset: VMDataset,
26
- ):
29
+ ) -> Tuple[go.Figure, RawData]:
27
30
  """
28
31
  Assesses the alignment between forecasted and observed values in regression models through visual plots
29
32
 
@@ -2,10 +2,11 @@
2
2
  # See the LICENSE file in the root of this repository for details.
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
- from typing import List, Union
5
+ from typing import List, Tuple, Union
6
6
 
7
7
  import matplotlib.pyplot as plt
8
8
  import numpy as np
9
+ import plotly.graph_objects as go
9
10
 
10
11
  from validmind import RawData, tags, tasks
11
12
  from validmind.logging import get_logger
@@ -29,7 +30,7 @@ def RegressionModelSensitivityPlot(
29
30
  model: VMModel,
30
31
  shocks: List[float] = [0.1],
31
32
  transformation: Union[str, None] = None,
32
- ):
33
+ ) -> Tuple[go.Figure, RawData]:
33
34
  """
34
35
  Assesses the sensitivity of a regression model to changes in independent variables by applying shocks and
35
36
  visualizing the impact.
@@ -2,6 +2,8 @@
2
2
  # See the LICENSE file in the root of this repository for details.
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
+ from typing import Dict, List, Tuple
6
+
5
7
  from sklearn.metrics import mean_squared_error, r2_score
6
8
 
7
9
  from validmind import RawData, tags, tasks
@@ -12,7 +14,9 @@ from .statsutils import adj_r2_score
12
14
 
13
15
  @tags("model_performance", "regression")
14
16
  @tasks("regression")
15
- def RegressionModelSummary(dataset: VMDataset, model: VMModel):
17
+ def RegressionModelSummary(
18
+ dataset: VMDataset, model: VMModel
19
+ ) -> Tuple[List[Dict[str, float]], RawData]:
16
20
  """
17
21
  Evaluates regression model performance using metrics including R-Squared, Adjusted R-Squared, MSE, and RMSE.
18
22
 
@@ -2,6 +2,8 @@
2
2
  # See the LICENSE file in the root of this repository for details.
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
+ from typing import Tuple
6
+
5
7
  import numpy as np
6
8
  import pandas as pd
7
9
  import plotly.graph_objects as go
@@ -19,7 +21,7 @@ logger = get_logger(__name__)
19
21
  @tasks("regression")
20
22
  def RegressionPermutationFeatureImportance(
21
23
  dataset: VMDataset, model: VMModel, fontsize: int = 12, figure_height: int = 500
22
- ):
24
+ ) -> Tuple[go.Figure, RawData]:
23
25
  """
24
26
  Assesses the significance of each feature in a model by evaluating the impact on model performance when feature
25
27
  values are randomly rearranged.
@@ -6,11 +6,16 @@ import plotly.graph_objects as go
6
6
  from matplotlib import cm
7
7
 
8
8
  from validmind import tags, tasks
9
+ from validmind.vm_models import VMDataset
9
10
 
10
11
 
11
12
  @tags("visualization", "credit_risk", "logistic_regression")
12
13
  @tasks("classification")
13
- def ScorecardHistogram(dataset, title="Histogram of Scores", score_column="score"):
14
+ def ScorecardHistogram(
15
+ dataset: VMDataset,
16
+ title: str = "Histogram of Scores",
17
+ score_column: str = "score",
18
+ ) -> go.Figure:
14
19
  """
15
20
  The Scorecard Histogram test evaluates the distribution of credit scores between default and non-default instances,
16
21
  providing critical insights into the performance and generalizability of credit-risk models.
@@ -2,7 +2,7 @@
2
2
  # See the LICENSE file in the root of this repository for details.
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
- from typing import List
5
+ from typing import Dict, List, Tuple
6
6
 
7
7
  import numpy as np
8
8
  import pandas as pd
@@ -26,7 +26,7 @@ def CalibrationCurveDrift(
26
26
  model: VMModel,
27
27
  n_bins: int = 10,
28
28
  drift_pct_threshold: float = 20,
29
- ):
29
+ ) -> Tuple[go.Figure, Dict[str, pd.DataFrame], bool, RawData]:
30
30
  """
31
31
  Evaluates changes in probability calibration between reference and monitoring datasets.
32
32
 
@@ -2,7 +2,7 @@
2
2
  # See the LICENSE file in the root of this repository for details.
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
- from typing import List
5
+ from typing import Dict, List, Tuple
6
6
 
7
7
  import numpy as np
8
8
  import pandas as pd
@@ -38,7 +38,7 @@ def calculate_ks_statistic(y_true, y_prob):
38
38
  @tasks("classification", "text_classification")
39
39
  def ClassDiscriminationDrift(
40
40
  datasets: List[VMDataset], model: VMModel, drift_pct_threshold=20
41
- ):
41
+ ) -> Tuple[Dict[str, pd.DataFrame], bool]:
42
42
  """
43
43
  Compares classification discrimination metrics between reference and monitoring datasets.
44
44
 
@@ -2,7 +2,7 @@
2
2
  # See the LICENSE file in the root of this repository for details.
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
- from typing import List
5
+ from typing import Dict, List, Tuple
6
6
 
7
7
  import pandas as pd
8
8
  import plotly.graph_objs as go
@@ -18,7 +18,7 @@ def ClassImbalanceDrift(
18
18
  datasets: List[VMDataset],
19
19
  drift_pct_threshold: float = 5.0,
20
20
  title: str = "Class Distribution Drift",
21
- ):
21
+ ) -> Tuple[go.Figure, Dict[str, pd.DataFrame], bool]:
22
22
  """
23
23
  Evaluates drift in class distribution between reference and monitoring datasets.
24
24
 
@@ -2,7 +2,7 @@
2
2
  # See the LICENSE file in the root of this repository for details.
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
- from typing import List
5
+ from typing import Dict, List, Tuple
6
6
 
7
7
  import numpy as np
8
8
  import pandas as pd
@@ -18,7 +18,7 @@ from validmind.vm_models import VMDataset, VMModel
18
18
  @tasks("classification", "text_classification")
19
19
  def ClassificationAccuracyDrift(
20
20
  datasets: List[VMDataset], model: VMModel, drift_pct_threshold=20
21
- ):
21
+ ) -> Tuple[Dict[str, pd.DataFrame], bool, RawData]:
22
22
  """
23
23
  Compares classification accuracy metrics between reference and monitoring datasets.
24
24
 
@@ -2,7 +2,7 @@
2
2
  # See the LICENSE file in the root of this repository for details.
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
- from typing import List
5
+ from typing import Dict, List, Tuple
6
6
 
7
7
  import numpy as np
8
8
  import pandas as pd
@@ -18,7 +18,7 @@ from validmind.vm_models import VMDataset, VMModel
18
18
  @tasks("classification", "text_classification")
19
19
  def ConfusionMatrixDrift(
20
20
  datasets: List[VMDataset], model: VMModel, drift_pct_threshold=20
21
- ):
21
+ ) -> Tuple[Dict[str, pd.DataFrame], bool, RawData]:
22
22
  """
23
23
  Compares confusion matrix metrics between reference and monitoring datasets.
24
24
 
@@ -2,7 +2,7 @@
2
2
  # See the LICENSE file in the root of this repository for details.
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
- from typing import List
5
+ from typing import List, Tuple
6
6
 
7
7
  import numpy as np
8
8
  import plotly.graph_objects as go
@@ -17,7 +17,7 @@ from validmind.vm_models import VMDataset, VMModel
17
17
  def CumulativePredictionProbabilitiesDrift(
18
18
  datasets: List[VMDataset],
19
19
  model: VMModel,
20
- ):
20
+ ) -> Tuple[go.Figure, RawData]:
21
21
  """
22
22
  Compares cumulative prediction probability distributions between reference and monitoring datasets.
23
23
 
@@ -2,11 +2,14 @@
2
2
  # See the LICENSE file in the root of this repository for details.
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
+ from typing import Dict, List, Tuple
6
+
5
7
  import numpy as np
6
8
  import pandas as pd
7
9
  import plotly.graph_objects as go
8
10
 
9
11
  from validmind import RawData, tags, tasks
12
+ from validmind.vm_models import VMDataset
10
13
 
11
14
 
12
15
  def calculate_psi_score(actual, expected):
@@ -92,11 +95,11 @@ def create_distribution_plot(feature_name, reference_dist, monitoring_dist, bins
92
95
  @tags("visualization")
93
96
  @tasks("monitoring")
94
97
  def FeatureDrift(
95
- datasets,
98
+ datasets: List[VMDataset],
96
99
  bins=[0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9],
97
100
  feature_columns=None,
98
101
  psi_threshold=0.2,
99
- ):
102
+ ) -> Tuple[Dict[str, pd.DataFrame], go.Figure, bool, RawData]:
100
103
  """
101
104
  Evaluates changes in feature distribution over time to identify potential model drift.
102
105
 
@@ -3,14 +3,19 @@
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
5
 
6
+ from typing import List, Tuple
7
+
6
8
  import matplotlib.pyplot as plt
7
9
 
8
10
  from validmind import RawData, tags, tasks
11
+ from validmind.vm_models import VMDataset, VMModel
9
12
 
10
13
 
11
14
  @tags("visualization")
12
15
  @tasks("monitoring")
13
- def PredictionAcrossEachFeature(datasets, model):
16
+ def PredictionAcrossEachFeature(
17
+ datasets: List[VMDataset], model: VMModel
18
+ ) -> Tuple[plt.Figure, RawData]:
14
19
  """
15
20
  Assesses differences in model predictions across individual features between reference and monitoring datasets
16
21
  through visual analysis.
@@ -2,15 +2,22 @@
2
2
  # See the LICENSE file in the root of this repository for details.
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
+ from typing import Dict, List, Tuple
6
+
5
7
  import pandas as pd
6
8
  import plotly.graph_objects as go
7
9
 
8
10
  from validmind import RawData, tags, tasks
11
+ from validmind.vm_models import VMDataset, VMModel
9
12
 
10
13
 
11
14
  @tags("visualization")
12
15
  @tasks("monitoring")
13
- def PredictionCorrelation(datasets, model, drift_pct_threshold=20):
16
+ def PredictionCorrelation(
17
+ datasets: List[VMDataset],
18
+ model: VMModel,
19
+ drift_pct_threshold: float = 20,
20
+ ) -> Tuple[Dict[str, pd.DataFrame], go.Figure, bool, RawData]:
14
21
  """
15
22
  Assesses correlation changes between model predictions from reference and monitoring datasets to detect potential
16
23
  target drift.
@@ -2,7 +2,7 @@
2
2
  # See the LICENSE file in the root of this repository for details.
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
- from typing import List
5
+ from typing import Dict, List, Tuple
6
6
 
7
7
  import numpy as np
8
8
  import pandas as pd
@@ -21,7 +21,7 @@ def PredictionProbabilitiesHistogramDrift(
21
21
  model: VMModel,
22
22
  title="Prediction Probabilities Histogram Drift",
23
23
  drift_pct_threshold: float = 20.0,
24
- ):
24
+ ) -> Tuple[go.Figure, Dict[str, pd.DataFrame], bool, RawData]:
25
25
  """
26
26
  Compares prediction probability distributions between reference and monitoring datasets.
27
27