validmind 2.7.6__py3-none-any.whl → 2.7.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (147) hide show
  1. validmind/__init__.py +2 -0
  2. validmind/__version__.py +1 -1
  3. validmind/api_client.py +8 -1
  4. validmind/datasets/credit_risk/lending_club.py +3 -4
  5. validmind/html_templates/content_blocks.py +1 -1
  6. validmind/tests/__types__.py +17 -0
  7. validmind/tests/data_validation/ACFandPACFPlot.py +6 -2
  8. validmind/tests/data_validation/AutoMA.py +2 -2
  9. validmind/tests/data_validation/BivariateScatterPlots.py +4 -2
  10. validmind/tests/data_validation/BoxPierce.py +2 -2
  11. validmind/tests/data_validation/ClassImbalance.py +2 -1
  12. validmind/tests/data_validation/DatasetDescription.py +11 -2
  13. validmind/tests/data_validation/DatasetSplit.py +2 -2
  14. validmind/tests/data_validation/DickeyFullerGLS.py +2 -2
  15. validmind/tests/data_validation/FeatureTargetCorrelationPlot.py +8 -2
  16. validmind/tests/data_validation/HighCardinality.py +9 -2
  17. validmind/tests/data_validation/HighPearsonCorrelation.py +6 -2
  18. validmind/tests/data_validation/IQROutliersBarPlot.py +9 -2
  19. validmind/tests/data_validation/LaggedCorrelationHeatmap.py +2 -2
  20. validmind/tests/data_validation/MissingValuesBarPlot.py +12 -9
  21. validmind/tests/data_validation/MutualInformation.py +6 -8
  22. validmind/tests/data_validation/PearsonCorrelationMatrix.py +2 -2
  23. validmind/tests/data_validation/ProtectedClassesCombination.py +6 -1
  24. validmind/tests/data_validation/ProtectedClassesDescription.py +1 -1
  25. validmind/tests/data_validation/ProtectedClassesDisparity.py +4 -5
  26. validmind/tests/data_validation/ProtectedClassesThresholdOptimizer.py +1 -4
  27. validmind/tests/data_validation/RollingStatsPlot.py +21 -10
  28. validmind/tests/data_validation/ScatterPlot.py +3 -5
  29. validmind/tests/data_validation/ScoreBandDefaultRates.py +2 -1
  30. validmind/tests/data_validation/SeasonalDecompose.py +12 -2
  31. validmind/tests/data_validation/Skewness.py +6 -3
  32. validmind/tests/data_validation/SpreadPlot.py +8 -3
  33. validmind/tests/data_validation/TabularCategoricalBarPlots.py +4 -2
  34. validmind/tests/data_validation/TabularDateTimeHistograms.py +2 -2
  35. validmind/tests/data_validation/TargetRateBarPlots.py +4 -3
  36. validmind/tests/data_validation/TimeSeriesFrequency.py +7 -2
  37. validmind/tests/data_validation/TimeSeriesMissingValues.py +14 -10
  38. validmind/tests/data_validation/TimeSeriesOutliers.py +1 -5
  39. validmind/tests/data_validation/WOEBinPlots.py +2 -2
  40. validmind/tests/data_validation/WOEBinTable.py +11 -9
  41. validmind/tests/data_validation/nlp/CommonWords.py +2 -2
  42. validmind/tests/data_validation/nlp/Hashtags.py +2 -2
  43. validmind/tests/data_validation/nlp/LanguageDetection.py +9 -6
  44. validmind/tests/data_validation/nlp/Mentions.py +9 -6
  45. validmind/tests/data_validation/nlp/PolarityAndSubjectivity.py +2 -2
  46. validmind/tests/data_validation/nlp/Punctuations.py +4 -2
  47. validmind/tests/data_validation/nlp/Sentiment.py +2 -2
  48. validmind/tests/data_validation/nlp/StopWords.py +5 -4
  49. validmind/tests/data_validation/nlp/TextDescription.py +2 -2
  50. validmind/tests/data_validation/nlp/Toxicity.py +2 -2
  51. validmind/tests/model_validation/BertScore.py +2 -2
  52. validmind/tests/model_validation/BleuScore.py +2 -2
  53. validmind/tests/model_validation/ClusterSizeDistribution.py +2 -2
  54. validmind/tests/model_validation/ContextualRecall.py +2 -2
  55. validmind/tests/model_validation/FeaturesAUC.py +2 -2
  56. validmind/tests/model_validation/MeteorScore.py +2 -2
  57. validmind/tests/model_validation/ModelPredictionResiduals.py +2 -2
  58. validmind/tests/model_validation/RegardScore.py +6 -2
  59. validmind/tests/model_validation/RegressionResidualsPlot.py +4 -3
  60. validmind/tests/model_validation/RougeScore.py +6 -5
  61. validmind/tests/model_validation/TimeSeriesPredictionWithCI.py +11 -2
  62. validmind/tests/model_validation/TokenDisparity.py +2 -2
  63. validmind/tests/model_validation/ToxicityScore.py +10 -2
  64. validmind/tests/model_validation/embeddings/ClusterDistribution.py +9 -3
  65. validmind/tests/model_validation/embeddings/CosineSimilarityComparison.py +16 -2
  66. validmind/tests/model_validation/embeddings/CosineSimilarityDistribution.py +5 -3
  67. validmind/tests/model_validation/embeddings/CosineSimilarityHeatmap.py +2 -2
  68. validmind/tests/model_validation/embeddings/DescriptiveAnalytics.py +14 -4
  69. validmind/tests/model_validation/embeddings/EmbeddingsVisualization2D.py +2 -2
  70. validmind/tests/model_validation/embeddings/EuclideanDistanceComparison.py +16 -2
  71. validmind/tests/model_validation/embeddings/EuclideanDistanceHeatmap.py +2 -2
  72. validmind/tests/model_validation/embeddings/PCAComponentsPairwisePlots.py +4 -5
  73. validmind/tests/model_validation/embeddings/StabilityAnalysisKeyword.py +4 -2
  74. validmind/tests/model_validation/embeddings/StabilityAnalysisRandomNoise.py +4 -2
  75. validmind/tests/model_validation/embeddings/StabilityAnalysisSynonyms.py +4 -2
  76. validmind/tests/model_validation/embeddings/StabilityAnalysisTranslation.py +4 -2
  77. validmind/tests/model_validation/embeddings/TSNEComponentsPairwisePlots.py +8 -6
  78. validmind/tests/model_validation/embeddings/utils.py +11 -1
  79. validmind/tests/model_validation/ragas/AnswerCorrectness.py +2 -1
  80. validmind/tests/model_validation/ragas/AspectCritic.py +11 -7
  81. validmind/tests/model_validation/ragas/ContextEntityRecall.py +2 -1
  82. validmind/tests/model_validation/ragas/ContextPrecision.py +2 -1
  83. validmind/tests/model_validation/ragas/ContextPrecisionWithoutReference.py +2 -1
  84. validmind/tests/model_validation/ragas/ContextRecall.py +2 -1
  85. validmind/tests/model_validation/ragas/Faithfulness.py +2 -1
  86. validmind/tests/model_validation/ragas/NoiseSensitivity.py +2 -1
  87. validmind/tests/model_validation/ragas/ResponseRelevancy.py +2 -1
  88. validmind/tests/model_validation/ragas/SemanticSimilarity.py +2 -1
  89. validmind/tests/model_validation/sklearn/CalibrationCurve.py +3 -2
  90. validmind/tests/model_validation/sklearn/ClassifierThresholdOptimization.py +2 -5
  91. validmind/tests/model_validation/sklearn/ClusterCosineSimilarity.py +5 -2
  92. validmind/tests/model_validation/sklearn/ConfusionMatrix.py +2 -2
  93. validmind/tests/model_validation/sklearn/FeatureImportance.py +1 -14
  94. validmind/tests/model_validation/sklearn/HyperParametersTuning.py +6 -3
  95. validmind/tests/model_validation/sklearn/KMeansClustersOptimization.py +2 -2
  96. validmind/tests/model_validation/sklearn/MinimumROCAUCScore.py +8 -4
  97. validmind/tests/model_validation/sklearn/ModelParameters.py +1 -0
  98. validmind/tests/model_validation/sklearn/OverfitDiagnosis.py +3 -3
  99. validmind/tests/model_validation/sklearn/PermutationFeatureImportance.py +2 -2
  100. validmind/tests/model_validation/sklearn/PopulationStabilityIndex.py +20 -16
  101. validmind/tests/model_validation/sklearn/PrecisionRecallCurve.py +4 -2
  102. validmind/tests/model_validation/sklearn/ROCCurve.py +1 -1
  103. validmind/tests/model_validation/sklearn/RegressionR2Square.py +7 -9
  104. validmind/tests/model_validation/sklearn/RegressionR2SquareComparison.py +1 -3
  105. validmind/tests/model_validation/sklearn/SHAPGlobalImportance.py +2 -1
  106. validmind/tests/model_validation/sklearn/ScoreProbabilityAlignment.py +2 -1
  107. validmind/tests/model_validation/sklearn/SilhouettePlot.py +5 -3
  108. validmind/tests/model_validation/sklearn/TrainingTestDegradation.py +9 -1
  109. validmind/tests/model_validation/sklearn/WeakspotsDiagnosis.py +1 -1
  110. validmind/tests/model_validation/statsmodels/CumulativePredictionProbabilities.py +11 -4
  111. validmind/tests/model_validation/statsmodels/DurbinWatsonTest.py +1 -3
  112. validmind/tests/model_validation/statsmodels/GINITable.py +7 -15
  113. validmind/tests/model_validation/statsmodels/Lilliefors.py +2 -2
  114. validmind/tests/model_validation/statsmodels/RegressionCoeffs.py +1 -1
  115. validmind/tests/model_validation/statsmodels/RegressionFeatureSignificance.py +2 -2
  116. validmind/tests/model_validation/statsmodels/RegressionModelForecastPlotLevels.py +5 -2
  117. validmind/tests/model_validation/statsmodels/RegressionModelSensitivityPlot.py +5 -2
  118. validmind/tests/model_validation/statsmodels/RegressionModelSummary.py +7 -7
  119. validmind/tests/model_validation/statsmodels/RegressionPermutationFeatureImportance.py +2 -2
  120. validmind/tests/ongoing_monitoring/CalibrationCurveDrift.py +3 -1
  121. validmind/tests/ongoing_monitoring/ClassDiscriminationDrift.py +4 -2
  122. validmind/tests/ongoing_monitoring/ClassImbalanceDrift.py +4 -2
  123. validmind/tests/ongoing_monitoring/ClassificationAccuracyDrift.py +3 -1
  124. validmind/tests/ongoing_monitoring/ConfusionMatrixDrift.py +3 -1
  125. validmind/tests/ongoing_monitoring/CumulativePredictionProbabilitiesDrift.py +3 -1
  126. validmind/tests/ongoing_monitoring/FeatureDrift.py +1 -0
  127. validmind/tests/ongoing_monitoring/PredictionCorrelation.py +1 -0
  128. validmind/tests/ongoing_monitoring/PredictionProbabilitiesHistogramDrift.py +3 -1
  129. validmind/tests/ongoing_monitoring/PredictionQuantilesAcrossFeatures.py +1 -0
  130. validmind/tests/ongoing_monitoring/ROCCurveDrift.py +3 -2
  131. validmind/tests/ongoing_monitoring/ScoreBandsDrift.py +4 -2
  132. validmind/tests/ongoing_monitoring/ScorecardHistogramDrift.py +3 -1
  133. validmind/tests/ongoing_monitoring/TargetPredictionDistributionPlot.py +4 -3
  134. validmind/tests/prompt_validation/Bias.py +13 -9
  135. validmind/tests/prompt_validation/Clarity.py +13 -9
  136. validmind/tests/prompt_validation/Conciseness.py +13 -9
  137. validmind/tests/prompt_validation/Delimitation.py +13 -9
  138. validmind/tests/prompt_validation/NegativeInstruction.py +14 -11
  139. validmind/tests/prompt_validation/Robustness.py +6 -2
  140. validmind/tests/prompt_validation/Specificity.py +13 -9
  141. validmind/tests/run.py +6 -0
  142. validmind/utils.py +7 -8
  143. {validmind-2.7.6.dist-info → validmind-2.7.7.dist-info}/METADATA +1 -2
  144. {validmind-2.7.6.dist-info → validmind-2.7.7.dist-info}/RECORD +147 -147
  145. {validmind-2.7.6.dist-info → validmind-2.7.7.dist-info}/WHEEL +1 -1
  146. {validmind-2.7.6.dist-info → validmind-2.7.7.dist-info}/LICENSE +0 -0
  147. {validmind-2.7.6.dist-info → validmind-2.7.7.dist-info}/entry_points.txt +0 -0
validmind/__init__.py CHANGED
@@ -50,6 +50,7 @@ from .client import ( # noqa: E402
50
50
  run_test_suite,
51
51
  )
52
52
  from .tests.decorator import tags, tasks, test
53
+ from .tests.run import print_env
53
54
  from .vm_models.result import RawData
54
55
 
55
56
  __all__ = [ # noqa
@@ -63,6 +64,7 @@ __all__ = [ # noqa
63
64
  "init_model",
64
65
  "init_r_model",
65
66
  "preview_template",
67
+ "print_env",
66
68
  "RawData",
67
69
  "reload",
68
70
  "run_documentation_tests",
validmind/__version__.py CHANGED
@@ -1 +1 @@
1
- __version__ = "2.7.6"
1
+ __version__ = "2.7.7"
validmind/api_client.py CHANGED
@@ -407,6 +407,7 @@ async def alog_metric(
407
407
  inputs: Optional[List[str]] = None,
408
408
  params: Optional[Dict[str, Any]] = None,
409
409
  recorded_at: Optional[str] = None,
410
+ thresholds: Optional[Dict[str, Any]] = None,
410
411
  ):
411
412
  """See log_metric for details"""
412
413
  if not key or not isinstance(key, str):
@@ -421,6 +422,9 @@ async def alog_metric(
421
422
  except (ValueError, TypeError):
422
423
  raise ValueError("`value` must be a scalar (int or float)")
423
424
 
425
+ if thresholds is not None and not isinstance(thresholds, dict):
426
+ raise ValueError("`thresholds` must be a dictionary or None")
427
+
424
428
  try:
425
429
  return await _post(
426
430
  "log_unit_metric",
@@ -431,6 +435,7 @@ async def alog_metric(
431
435
  "inputs": inputs or [],
432
436
  "params": params or {},
433
437
  "recorded_at": recorded_at,
438
+ "thresholds": thresholds or {},
434
439
  },
435
440
  cls=NumpyEncoder,
436
441
  allow_nan=False,
@@ -447,6 +452,7 @@ def log_metric(
447
452
  inputs: Optional[List[str]] = None,
448
453
  params: Optional[Dict[str, Any]] = None,
449
454
  recorded_at: Optional[str] = None,
455
+ thresholds: Optional[Dict[str, Any]] = None,
450
456
  ):
451
457
  """Logs a unit metric
452
458
 
@@ -463,8 +469,9 @@ def log_metric(
463
469
  params (dict, optional): Dictionary of parameters used to compute the metric.
464
470
  recorded_at (str, optional): The timestamp of the metric. Server will use
465
471
  current time if not provided.
472
+ thresholds (dict, optional): Dictionary of thresholds for the metric.
466
473
  """
467
- run_async(alog_metric, key, value, inputs, params, recorded_at)
474
+ run_async(alog_metric, key, value, inputs, params, recorded_at, thresholds)
468
475
 
469
476
 
470
477
  def get_ai_key() -> Dict[str, Any]:
@@ -2,20 +2,19 @@
2
2
  # See the LICENSE file in the root of this repository for details.
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
+ import logging
5
6
  import os
6
7
  import warnings
7
- import logging
8
+
8
9
  import numpy as np
9
10
  import pandas as pd
10
11
  import scorecardpy as sc
11
12
  import statsmodels.api as sm
12
-
13
13
  import xgboost as xgb
14
- import validmind as vm
15
-
16
14
  from sklearn.ensemble import RandomForestClassifier
17
15
  from sklearn.model_selection import train_test_split
18
16
 
17
+ import validmind as vm
19
18
 
20
19
  current_path = os.path.dirname(os.path.abspath(__file__))
21
20
  dataset_path = os.path.join(current_path, "datasets")
@@ -111,7 +111,7 @@ hljs.highlightAll();
111
111
  </script>
112
112
  """
113
113
 
114
- # FIXME: this is a bit too hacky
114
+ # have to dynamically load mathjax
115
115
  math_jax_snippet = """
116
116
  <script>
117
117
  window.MathJax = {
@@ -39,6 +39,7 @@ TestID = Union[
39
39
  "validmind.data_validation.LaggedCorrelationHeatmap",
40
40
  "validmind.data_validation.MissingValues",
41
41
  "validmind.data_validation.MissingValuesBarPlot",
42
+ "validmind.data_validation.MutualInformation",
42
43
  "validmind.data_validation.PearsonCorrelationMatrix",
43
44
  "validmind.data_validation.PhillipsPerronArch",
44
45
  "validmind.data_validation.ProtectedClassesCombination",
@@ -48,6 +49,7 @@ TestID = Union[
48
49
  "validmind.data_validation.RollingStatsPlot",
49
50
  "validmind.data_validation.RunsTest",
50
51
  "validmind.data_validation.ScatterPlot",
52
+ "validmind.data_validation.ScoreBandDefaultRates",
51
53
  "validmind.data_validation.SeasonalDecompose",
52
54
  "validmind.data_validation.ShapiroWilk",
53
55
  "validmind.data_validation.Skewness",
@@ -121,7 +123,9 @@ TestID = Union[
121
123
  "validmind.model_validation.ragas.SemanticSimilarity",
122
124
  "validmind.model_validation.sklearn.AdjustedMutualInformation",
123
125
  "validmind.model_validation.sklearn.AdjustedRandIndex",
126
+ "validmind.model_validation.sklearn.CalibrationCurve",
124
127
  "validmind.model_validation.sklearn.ClassifierPerformance",
128
+ "validmind.model_validation.sklearn.ClassifierThresholdOptimization",
125
129
  "validmind.model_validation.sklearn.ClusterCosineSimilarity",
126
130
  "validmind.model_validation.sklearn.ClusterPerformanceMetrics",
127
131
  "validmind.model_validation.sklearn.CompletenessScore",
@@ -134,6 +138,7 @@ TestID = Union[
134
138
  "validmind.model_validation.sklearn.MinimumAccuracy",
135
139
  "validmind.model_validation.sklearn.MinimumF1Score",
136
140
  "validmind.model_validation.sklearn.MinimumROCAUCScore",
141
+ "validmind.model_validation.sklearn.ModelParameters",
137
142
  "validmind.model_validation.sklearn.ModelsPerformanceComparison",
138
143
  "validmind.model_validation.sklearn.OverfitDiagnosis",
139
144
  "validmind.model_validation.sklearn.PermutationFeatureImportance",
@@ -147,6 +152,7 @@ TestID = Union[
147
152
  "validmind.model_validation.sklearn.RegressionR2SquareComparison",
148
153
  "validmind.model_validation.sklearn.RobustnessDiagnosis",
149
154
  "validmind.model_validation.sklearn.SHAPGlobalImportance",
155
+ "validmind.model_validation.sklearn.ScoreProbabilityAlignment",
150
156
  "validmind.model_validation.sklearn.SilhouettePlot",
151
157
  "validmind.model_validation.sklearn.TrainingTestDegradation",
152
158
  "validmind.model_validation.sklearn.VMeasure",
@@ -166,9 +172,20 @@ TestID = Union[
166
172
  "validmind.model_validation.statsmodels.RegressionModelSummary",
167
173
  "validmind.model_validation.statsmodels.RegressionPermutationFeatureImportance",
168
174
  "validmind.model_validation.statsmodels.ScorecardHistogram",
175
+ "validmind.ongoing_monitoring.CalibrationCurveDrift",
176
+ "validmind.ongoing_monitoring.ClassDiscriminationDrift",
177
+ "validmind.ongoing_monitoring.ClassImbalanceDrift",
178
+ "validmind.ongoing_monitoring.ClassificationAccuracyDrift",
179
+ "validmind.ongoing_monitoring.ConfusionMatrixDrift",
180
+ "validmind.ongoing_monitoring.CumulativePredictionProbabilitiesDrift",
169
181
  "validmind.ongoing_monitoring.FeatureDrift",
170
182
  "validmind.ongoing_monitoring.PredictionAcrossEachFeature",
171
183
  "validmind.ongoing_monitoring.PredictionCorrelation",
184
+ "validmind.ongoing_monitoring.PredictionProbabilitiesHistogramDrift",
185
+ "validmind.ongoing_monitoring.PredictionQuantilesAcrossFeatures",
186
+ "validmind.ongoing_monitoring.ROCCurveDrift",
187
+ "validmind.ongoing_monitoring.ScoreBandsDrift",
188
+ "validmind.ongoing_monitoring.ScorecardHistogramDrift",
172
189
  "validmind.ongoing_monitoring.TargetPredictionDistributionPlot",
173
190
  "validmind.prompt_validation.Bias",
174
191
  "validmind.prompt_validation.Clarity",
@@ -6,7 +6,7 @@ import pandas as pd
6
6
  import plotly.graph_objects as go
7
7
  from statsmodels.tsa.stattools import acf, pacf
8
8
 
9
- from validmind import tags, tasks
9
+ from validmind import RawData, tags, tasks
10
10
  from validmind.vm_models import VMDataset
11
11
 
12
12
 
@@ -62,6 +62,8 @@ def ACFandPACFPlot(dataset: VMDataset):
62
62
  raise ValueError("Provided 'columns' must exist in the dataset")
63
63
 
64
64
  figures = []
65
+ acf_store = {}
66
+ pacf_store = {}
65
67
  for col in df.columns:
66
68
  series = df[col]
67
69
  max_lags = min(40, len(series) // 2 - 1)
@@ -77,6 +79,7 @@ def ACFandPACFPlot(dataset: VMDataset):
77
79
  font=dict(size=18),
78
80
  )
79
81
  figures.append(acf_fig)
82
+ acf_store[col] = acf_values
80
83
 
81
84
  # Create PACF plot using Plotly
82
85
  pacf_values = pacf(series, nlags=max_lags)
@@ -89,5 +92,6 @@ def ACFandPACFPlot(dataset: VMDataset):
89
92
  font=dict(size=18),
90
93
  )
91
94
  figures.append(pacf_fig)
95
+ pacf_store[col] = pacf_values
92
96
 
93
- return tuple(figures)
97
+ return (*figures, RawData(acf_values=acf_store, pacf_values=pacf_store))
@@ -6,7 +6,7 @@ import pandas as pd
6
6
  from statsmodels.tsa.arima.model import ARIMA
7
7
  from statsmodels.tsa.stattools import adfuller
8
8
 
9
- from validmind import tags, tasks
9
+ from validmind import RawData, tags, tasks
10
10
  from validmind.logging import get_logger
11
11
  from validmind.vm_models import VMDataset
12
12
 
@@ -116,4 +116,4 @@ def AutoMA(dataset: VMDataset, max_ma_order: int = 3):
116
116
  return {
117
117
  "Auto MA Analysis Results": summary_ma_analysis,
118
118
  "Best MA Order Results": best_ma_order,
119
- }
119
+ }, RawData(raw_series_data=df)
@@ -6,7 +6,7 @@ import itertools
6
6
 
7
7
  import plotly.express as px
8
8
 
9
- from validmind import tags, tasks
9
+ from validmind import RawData, tags, tasks
10
10
 
11
11
 
12
12
  @tags("tabular_data", "numerical_data", "visualization")
@@ -79,4 +79,6 @@ def BivariateScatterPlots(dataset):
79
79
 
80
80
  figures.append(fig)
81
81
 
82
- return tuple(figures)
82
+ return tuple(figures) + (
83
+ RawData(selected_numerical_df=df, feature_pairs=features_pairs),
84
+ )
@@ -5,7 +5,7 @@
5
5
  import pandas as pd
6
6
  from statsmodels.stats.diagnostic import acorr_ljungbox
7
7
 
8
- from validmind import tags, tasks
8
+ from validmind import RawData, tags, tasks
9
9
 
10
10
 
11
11
  @tasks("regression")
@@ -68,4 +68,4 @@ def BoxPierce(dataset):
68
68
  box_pierce_df.reset_index(inplace=True)
69
69
  box_pierce_df.columns = ["column", "stat", "pvalue"]
70
70
 
71
- return box_pierce_df
71
+ return box_pierce_df, RawData(box_pierce_values=box_pierce_values)
@@ -9,7 +9,7 @@ from typing import Any, Dict, Tuple
9
9
 
10
10
  import plotly.graph_objs as go
11
11
 
12
- from validmind import tags, tasks
12
+ from validmind import RawData, tags, tasks
13
13
  from validmind.errors import SkipTestError
14
14
  from validmind.vm_models import VMDataset
15
15
 
@@ -104,4 +104,5 @@ def ClassImbalance(
104
104
  },
105
105
  go.Figure(data=[trace], layout=layout),
106
106
  all(row["Pass/Fail"] == "Pass" for row in imbalanced_classes),
107
+ RawData(imbalance_percentages=imbalance_percentages),
107
108
  )
@@ -9,7 +9,7 @@ import numpy as np
9
9
  from ydata_profiling.config import Settings
10
10
  from ydata_profiling.model.typeset import ProfilingTypeSet
11
11
 
12
- from validmind import tags, tasks
12
+ from validmind import RawData, tags, tasks
13
13
  from validmind.errors import UnsupportedColumnTypeError
14
14
  from validmind.logging import get_logger
15
15
  from validmind.vm_models import VMDataset
@@ -220,6 +220,15 @@ def DatasetDescription(dataset: VMDataset):
220
220
  for column in infer_datatypes(df):
221
221
  results.append(describe_column(df, column))
222
222
 
223
+ raw_data = {
224
+ column["id"]: {
225
+ "type": column["type"],
226
+ "statistics": column["statistics"],
227
+ "histograms": column["histograms"],
228
+ }
229
+ for column in results
230
+ }
231
+
223
232
  return {
224
233
  "Dataset Description": [
225
234
  {
@@ -233,4 +242,4 @@ def DatasetDescription(dataset: VMDataset):
233
242
  }
234
243
  for column in results
235
244
  ]
236
- }
245
+ }, RawData(raw_data=raw_data)
@@ -4,7 +4,7 @@
4
4
 
5
5
  from typing import List
6
6
 
7
- from validmind import tags, tasks
7
+ from validmind import RawData, tags, tasks
8
8
  from validmind.vm_models import VMDataset
9
9
 
10
10
  DATASET_LABELS = {
@@ -98,4 +98,4 @@ def DatasetSplit(datasets: List[VMDataset]):
98
98
  }
99
99
  )
100
100
 
101
- return table
101
+ return table, RawData(dataset_results=results)
@@ -6,7 +6,7 @@ import pandas as pd
6
6
  from arch.unitroot import DFGLS
7
7
  from numpy.linalg import LinAlgError
8
8
 
9
- from validmind import tags, tasks
9
+ from validmind import RawData, tags, tasks
10
10
  from validmind.errors import SkipTestError
11
11
  from validmind.logging import get_logger
12
12
  from validmind.vm_models import VMDataset
@@ -97,4 +97,4 @@ def DickeyFullerGLS(dataset: VMDataset):
97
97
 
98
98
  return {
99
99
  "DFGLS Test Results": dfgls_values,
100
- }
100
+ }, RawData(df=df)
@@ -6,7 +6,7 @@
6
6
  import numpy as np
7
7
  import plotly.graph_objects as go
8
8
 
9
- from validmind import tags, tasks
9
+ from validmind import RawData, tags, tasks
10
10
 
11
11
 
12
12
  @tags("tabular_data", "visualization", "correlation")
@@ -58,7 +58,13 @@ def FeatureTargetCorrelationPlot(dataset, fig_height=600):
58
58
 
59
59
  fig = _visualize_feature_target_correlation(df, dataset.target_column, fig_height)
60
60
 
61
- return fig
61
+ correlations = (
62
+ df.corr(numeric_only=True)[dataset.target_column]
63
+ .drop(dataset.target_column)
64
+ .to_frame()
65
+ )
66
+
67
+ return fig, RawData(correlation_data=correlations)
62
68
 
63
69
 
64
70
  def _visualize_feature_target_correlation(df, target_column, fig_height):
@@ -2,7 +2,7 @@
2
2
  # See the LICENSE file in the root of this repository for details.
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
- from validmind import tags, tasks
5
+ from validmind import RawData, tags, tasks
6
6
  from validmind.vm_models import VMDataset
7
7
 
8
8
 
@@ -59,6 +59,8 @@ def HighCardinality(
59
59
  table = []
60
60
  all_passed = True
61
61
 
62
+ raw_data = {}
63
+
62
64
  for col in dataset.feature_columns_categorical:
63
65
  n_distinct = df[col].nunique()
64
66
  p_distinct = n_distinct / df.shape[0]
@@ -73,7 +75,12 @@ def HighCardinality(
73
75
  }
74
76
  )
75
77
 
78
+ raw_data[col] = {
79
+ "n_distinct": n_distinct,
80
+ "p_distinct": p_distinct,
81
+ }
82
+
76
83
  if not passed:
77
84
  all_passed = False
78
85
 
79
- return table, all_passed
86
+ return table, all_passed, RawData(raw_cardinality_details=raw_data)
@@ -2,7 +2,7 @@
2
2
  # See the LICENSE file in the root of this repository for details.
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
- from validmind import tags, tasks
5
+ from validmind import RawData, tags, tasks
6
6
  from validmind.vm_models import VMDataset
7
7
 
8
8
 
@@ -81,4 +81,8 @@ def HighPearsonCorrelation(
81
81
  pairs.sort(key=lambda x: abs(x["Coefficient"]), reverse=True)
82
82
  pairs = pairs[:top_n_correlations]
83
83
 
84
- return pairs, all(p["Pass/Fail"] == "Pass" for p in pairs)
84
+ return (
85
+ pairs,
86
+ all(p["Pass/Fail"] == "Pass" for p in pairs),
87
+ RawData(correlation_matrix=corr),
88
+ )
@@ -4,7 +4,7 @@
4
4
 
5
5
  import plotly.graph_objects as go
6
6
 
7
- from validmind import tags, tasks
7
+ from validmind import RawData, tags, tasks
8
8
  from validmind.vm_models import VMDataset
9
9
 
10
10
 
@@ -118,4 +118,11 @@ def IQROutliersBarPlot(
118
118
  )
119
119
  figures.append(fig)
120
120
 
121
- return tuple(figures)
121
+ return (
122
+ *figures,
123
+ RawData(
124
+ outlier_counts_by_feature=df[dataset.feature_columns_numeric].apply(
125
+ lambda col: compute_outliers(col, threshold)
126
+ )
127
+ ),
128
+ )
@@ -6,7 +6,7 @@ import numpy as np
6
6
  import pandas as pd
7
7
  import plotly.figure_factory as ff
8
8
 
9
- from validmind import tags, tasks
9
+ from validmind import RawData, tags, tasks
10
10
  from validmind.vm_models import VMDataset
11
11
 
12
12
  # Define the 'coolwarm' color scale manually
@@ -101,4 +101,4 @@ def LaggedCorrelationHeatmap(dataset: VMDataset, num_lags: int = 10):
101
101
  xaxis_title="Lags",
102
102
  )
103
103
 
104
- return fig
104
+ return fig, RawData(correlation_matrix=correlation_df)
@@ -4,7 +4,7 @@
4
4
 
5
5
  import plotly.graph_objects as go
6
6
 
7
- from validmind import tags, tasks
7
+ from validmind import RawData, tags, tasks
8
8
  from validmind.vm_models import VMDataset
9
9
 
10
10
 
@@ -106,13 +106,16 @@ def MissingValuesBarPlot(
106
106
  line=dict(color="red", dash="dash"),
107
107
  )
108
108
 
109
- return go.Figure(
110
- data=[trace_below_threshold, trace_above_threshold, threshold_line],
111
- layout=go.Layout(
112
- title="Missing Values",
113
- yaxis=dict(title="Columns"),
114
- xaxis=dict(title="Missing Value Percentage (%)", range=[0, 100]),
115
- barmode="stack",
116
- height=fig_height,
109
+ return (
110
+ go.Figure(
111
+ data=[trace_below_threshold, trace_above_threshold, threshold_line],
112
+ layout=go.Layout(
113
+ title="Missing Values",
114
+ yaxis=dict(title="Columns"),
115
+ xaxis=dict(title="Missing Value Percentage (%)", range=[0, 100]),
116
+ barmode="stack",
117
+ height=fig_height,
118
+ ),
117
119
  ),
120
+ RawData(missing_percentages=missing_percentages_sorted),
118
121
  )
@@ -4,6 +4,7 @@
4
4
 
5
5
  import plotly.graph_objects as go
6
6
  from sklearn.feature_selection import mutual_info_classif, mutual_info_regression
7
+
7
8
  from validmind import tags, tasks
8
9
  from validmind.vm_models import VMDataset
9
10
  from validmind.vm_models.result import RawData
@@ -76,13 +77,6 @@ def MutualInformation(
76
77
  else:
77
78
  mi_scores = mutual_info_regression(X, y)
78
79
 
79
- # Create DataFrame for raw data
80
- raw_data = RawData(
81
- feature=dataset.feature_columns,
82
- mutual_information_score=mi_scores.tolist(),
83
- pass_fail=["Pass" if score >= min_threshold else "Fail" for score in mi_scores],
84
- )
85
-
86
80
  # Create Plotly figure
87
81
  fig = go.Figure()
88
82
 
@@ -126,4 +120,8 @@ def MutualInformation(
126
120
  template="plotly_white",
127
121
  )
128
122
 
129
- return raw_data, fig
123
+ return fig, RawData(
124
+ mutual_information_scores={
125
+ feature: score for feature, score in zip(sorted_features, sorted_scores)
126
+ }
127
+ )
@@ -5,7 +5,7 @@
5
5
 
6
6
  import plotly.graph_objects as go
7
7
 
8
- from validmind import tags, tasks
8
+ from validmind import RawData, tags, tasks
9
9
 
10
10
 
11
11
  @tags("tabular_data", "numerical_data", "correlation")
@@ -88,4 +88,4 @@ def PearsonCorrelationMatrix(dataset):
88
88
 
89
89
  fig = go.Figure(data=[heatmap], layout=layout)
90
90
 
91
- return fig
91
+ return fig, RawData(correlation_matrix=corr_matrix)
@@ -8,7 +8,7 @@ import pandas as pd
8
8
  import plotly.graph_objects as go
9
9
  import plotly.subplots as sp
10
10
 
11
- from validmind import tags, tasks
11
+ from validmind import RawData, tags, tasks
12
12
  from validmind.errors import MissingDependencyError
13
13
  from validmind.logging import get_logger
14
14
 
@@ -202,4 +202,9 @@ def ProtectedClassesCombination(dataset, model, protected_classes=None):
202
202
  {"Class Combination Table": metrics_by_group},
203
203
  {"DPR and EOR table": dpr_eor_df},
204
204
  fig,
205
+ RawData(
206
+ metrics_frame=mf,
207
+ demographic_parity_ratios=m_dpr,
208
+ equalized_odds_ratios=m_eqo,
209
+ ),
205
210
  )
@@ -127,4 +127,4 @@ def ProtectedClassesDescription(dataset, protected_classes=None):
127
127
  ["Protected Class", "Count"], ascending=[True, False]
128
128
  )
129
129
 
130
- return (stats_df, *tuple(figures))
130
+ return (stats_df, *figures)
@@ -119,7 +119,7 @@ def ProtectedClassesDisparity(
119
119
  mask_significance=True,
120
120
  )
121
121
 
122
- plots = []
122
+ figures = []
123
123
  for protected_class in protected_classes:
124
124
  plot = ap.disparity(
125
125
  bdf, metrics, protected_class, fairness_threshold=disparity_tolerance
@@ -129,13 +129,12 @@ def ProtectedClassesDisparity(
129
129
  plot.save(
130
130
  buf, format="png"
131
131
  ) # as long as the above library is installed, this will work
132
- plots.append(buf.getvalue())
132
+ figures.append(buf.getvalue())
133
133
 
134
134
  string = "_disparity"
135
135
  metrics_adj = [x + string for x in metrics]
136
136
 
137
137
  table = bdf[["attribute_name", "attribute_value"] + b.list_disparities(bdf)]
138
- plots.append(aqp.plot_disparity_all(bdf, metrics=metrics_adj))
139
- plots_return = tuple(plots)
138
+ figures.append(aqp.plot_disparity_all(bdf, metrics=metrics_adj))
140
139
 
141
- return (table, *plots_return)
140
+ return (table, *figures)
@@ -103,10 +103,7 @@ def ProtectedClassesThresholdOptimizer(
103
103
  test_df, target, y_pred_opt, protected_classes
104
104
  )
105
105
 
106
- return (
107
- {"DPR and EOR Table": fairness_metrics.reset_index()},
108
- fig,
109
- )
106
+ return {"DPR and EOR Table": fairness_metrics.reset_index()}, fig
110
107
 
111
108
 
112
109
  def initialize_and_fit_optimizer(pipeline, X_train, y_train, protected_classes_df):
@@ -5,7 +5,7 @@
5
5
  import matplotlib.pyplot as plt
6
6
  import pandas as pd
7
7
 
8
- from validmind import tags, tasks
8
+ from validmind import RawData, tags, tasks
9
9
  from validmind.errors import SkipTestError
10
10
  from validmind.vm_models import VMDataset
11
11
 
@@ -95,13 +95,24 @@ def RollingStatsPlot(dataset: VMDataset, window_size: int = 12):
95
95
  if not pd.api.types.is_datetime64_any_dtype(dataset.df.index):
96
96
  raise SkipTestError("Index must be a datetime type")
97
97
 
98
- return tuple(
99
- [
100
- plot_rolling_statistics(
101
- df=dataset.df.dropna(),
102
- col=col,
103
- window_size=window_size,
104
- )
105
- for col in dataset.feature_columns
106
- ]
98
+ figures = [
99
+ plot_rolling_statistics(
100
+ df=dataset.df.dropna(),
101
+ col=col,
102
+ window_size=window_size,
103
+ )
104
+ for col in dataset.feature_columns
105
+ ]
106
+
107
+ return (
108
+ *figures,
109
+ RawData(
110
+ rolling_means_stds={
111
+ col: {
112
+ "rolling_mean": dataset.df[col].rolling(window=window_size).mean(),
113
+ "rolling_std": dataset.df[col].rolling(window=window_size).std(),
114
+ }
115
+ for col in dataset.feature_columns
116
+ }
117
+ ),
107
118
  )
@@ -55,8 +55,8 @@ def ScatterPlot(dataset):
55
55
  - Assumes that the dataset can fit into the computer's memory, which might not be valid for extremely large
56
56
  datasets.
57
57
  """
58
-
59
58
  g = sns.pairplot(data=dataset.df, diag_kind="kde")
59
+
60
60
  for ax in g.axes.flatten():
61
61
  # rotate x axis labels
62
62
  ax.set_xlabel(ax.get_xlabel(), rotation=45)
@@ -64,12 +64,10 @@ def ScatterPlot(dataset):
64
64
  ax.set_ylabel(ax.get_ylabel(), rotation=45)
65
65
  # set y labels alignment
66
66
  ax.yaxis.get_label().set_horizontalalignment("right")
67
+
67
68
  # Get the current figure
68
69
  fig = plt.gcf()
69
70
 
70
- figures = []
71
- figures.append(fig)
72
-
73
71
  plt.close("all")
74
72
 
75
- return tuple(figures)
73
+ return fig
@@ -2,8 +2,9 @@
2
2
  # See the LICENSE file in the root of this repository for details.
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
- import pandas as pd
6
5
  import numpy as np
6
+ import pandas as pd
7
+
7
8
  from validmind import tags, tasks
8
9
  from validmind.vm_models import VMDataset, VMModel
9
10