validmind 2.7.5__py3-none-any.whl → 2.7.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (149) hide show
  1. validmind/__init__.py +2 -0
  2. validmind/__version__.py +1 -1
  3. validmind/api_client.py +8 -1
  4. validmind/datasets/credit_risk/lending_club.py +352 -87
  5. validmind/html_templates/content_blocks.py +1 -1
  6. validmind/tests/__types__.py +17 -0
  7. validmind/tests/data_validation/ACFandPACFPlot.py +6 -2
  8. validmind/tests/data_validation/AutoMA.py +2 -2
  9. validmind/tests/data_validation/BivariateScatterPlots.py +4 -2
  10. validmind/tests/data_validation/BoxPierce.py +2 -2
  11. validmind/tests/data_validation/ClassImbalance.py +2 -1
  12. validmind/tests/data_validation/DatasetDescription.py +11 -2
  13. validmind/tests/data_validation/DatasetSplit.py +2 -2
  14. validmind/tests/data_validation/DickeyFullerGLS.py +2 -2
  15. validmind/tests/data_validation/FeatureTargetCorrelationPlot.py +8 -2
  16. validmind/tests/data_validation/HighCardinality.py +9 -2
  17. validmind/tests/data_validation/HighPearsonCorrelation.py +18 -4
  18. validmind/tests/data_validation/IQROutliersBarPlot.py +9 -2
  19. validmind/tests/data_validation/LaggedCorrelationHeatmap.py +2 -2
  20. validmind/tests/data_validation/MissingValuesBarPlot.py +12 -9
  21. validmind/tests/data_validation/MutualInformation.py +6 -8
  22. validmind/tests/data_validation/PearsonCorrelationMatrix.py +2 -2
  23. validmind/tests/data_validation/ProtectedClassesCombination.py +6 -1
  24. validmind/tests/data_validation/ProtectedClassesDescription.py +1 -1
  25. validmind/tests/data_validation/ProtectedClassesDisparity.py +4 -5
  26. validmind/tests/data_validation/ProtectedClassesThresholdOptimizer.py +1 -4
  27. validmind/tests/data_validation/RollingStatsPlot.py +21 -10
  28. validmind/tests/data_validation/ScatterPlot.py +3 -5
  29. validmind/tests/data_validation/ScoreBandDefaultRates.py +2 -1
  30. validmind/tests/data_validation/SeasonalDecompose.py +12 -2
  31. validmind/tests/data_validation/Skewness.py +6 -3
  32. validmind/tests/data_validation/SpreadPlot.py +8 -3
  33. validmind/tests/data_validation/TabularCategoricalBarPlots.py +4 -2
  34. validmind/tests/data_validation/TabularDateTimeHistograms.py +2 -2
  35. validmind/tests/data_validation/TargetRateBarPlots.py +4 -3
  36. validmind/tests/data_validation/TimeSeriesFrequency.py +7 -2
  37. validmind/tests/data_validation/TimeSeriesMissingValues.py +14 -10
  38. validmind/tests/data_validation/TimeSeriesOutliers.py +1 -5
  39. validmind/tests/data_validation/WOEBinPlots.py +2 -2
  40. validmind/tests/data_validation/WOEBinTable.py +11 -9
  41. validmind/tests/data_validation/nlp/CommonWords.py +2 -2
  42. validmind/tests/data_validation/nlp/Hashtags.py +2 -2
  43. validmind/tests/data_validation/nlp/LanguageDetection.py +9 -6
  44. validmind/tests/data_validation/nlp/Mentions.py +9 -6
  45. validmind/tests/data_validation/nlp/PolarityAndSubjectivity.py +2 -2
  46. validmind/tests/data_validation/nlp/Punctuations.py +4 -2
  47. validmind/tests/data_validation/nlp/Sentiment.py +2 -2
  48. validmind/tests/data_validation/nlp/StopWords.py +5 -4
  49. validmind/tests/data_validation/nlp/TextDescription.py +2 -2
  50. validmind/tests/data_validation/nlp/Toxicity.py +2 -2
  51. validmind/tests/model_validation/BertScore.py +2 -2
  52. validmind/tests/model_validation/BleuScore.py +2 -2
  53. validmind/tests/model_validation/ClusterSizeDistribution.py +2 -2
  54. validmind/tests/model_validation/ContextualRecall.py +2 -2
  55. validmind/tests/model_validation/FeaturesAUC.py +2 -2
  56. validmind/tests/model_validation/MeteorScore.py +2 -2
  57. validmind/tests/model_validation/ModelPredictionResiduals.py +2 -2
  58. validmind/tests/model_validation/RegardScore.py +6 -2
  59. validmind/tests/model_validation/RegressionResidualsPlot.py +4 -3
  60. validmind/tests/model_validation/RougeScore.py +6 -5
  61. validmind/tests/model_validation/TimeSeriesPredictionWithCI.py +11 -2
  62. validmind/tests/model_validation/TokenDisparity.py +2 -2
  63. validmind/tests/model_validation/ToxicityScore.py +10 -2
  64. validmind/tests/model_validation/embeddings/ClusterDistribution.py +9 -3
  65. validmind/tests/model_validation/embeddings/CosineSimilarityComparison.py +16 -2
  66. validmind/tests/model_validation/embeddings/CosineSimilarityDistribution.py +5 -3
  67. validmind/tests/model_validation/embeddings/CosineSimilarityHeatmap.py +2 -2
  68. validmind/tests/model_validation/embeddings/DescriptiveAnalytics.py +14 -4
  69. validmind/tests/model_validation/embeddings/EmbeddingsVisualization2D.py +2 -2
  70. validmind/tests/model_validation/embeddings/EuclideanDistanceComparison.py +16 -2
  71. validmind/tests/model_validation/embeddings/EuclideanDistanceHeatmap.py +2 -2
  72. validmind/tests/model_validation/embeddings/PCAComponentsPairwisePlots.py +4 -5
  73. validmind/tests/model_validation/embeddings/StabilityAnalysisKeyword.py +4 -2
  74. validmind/tests/model_validation/embeddings/StabilityAnalysisRandomNoise.py +4 -2
  75. validmind/tests/model_validation/embeddings/StabilityAnalysisSynonyms.py +4 -2
  76. validmind/tests/model_validation/embeddings/StabilityAnalysisTranslation.py +4 -2
  77. validmind/tests/model_validation/embeddings/TSNEComponentsPairwisePlots.py +8 -6
  78. validmind/tests/model_validation/embeddings/utils.py +11 -1
  79. validmind/tests/model_validation/ragas/AnswerCorrectness.py +2 -1
  80. validmind/tests/model_validation/ragas/AspectCritic.py +11 -7
  81. validmind/tests/model_validation/ragas/ContextEntityRecall.py +2 -1
  82. validmind/tests/model_validation/ragas/ContextPrecision.py +2 -1
  83. validmind/tests/model_validation/ragas/ContextPrecisionWithoutReference.py +2 -1
  84. validmind/tests/model_validation/ragas/ContextRecall.py +2 -1
  85. validmind/tests/model_validation/ragas/Faithfulness.py +2 -1
  86. validmind/tests/model_validation/ragas/NoiseSensitivity.py +2 -1
  87. validmind/tests/model_validation/ragas/ResponseRelevancy.py +2 -1
  88. validmind/tests/model_validation/ragas/SemanticSimilarity.py +2 -1
  89. validmind/tests/model_validation/sklearn/CalibrationCurve.py +3 -2
  90. validmind/tests/model_validation/sklearn/ClassifierThresholdOptimization.py +2 -5
  91. validmind/tests/model_validation/sklearn/ClusterCosineSimilarity.py +5 -2
  92. validmind/tests/model_validation/sklearn/ConfusionMatrix.py +2 -2
  93. validmind/tests/model_validation/sklearn/FeatureImportance.py +1 -14
  94. validmind/tests/model_validation/sklearn/HyperParametersTuning.py +6 -3
  95. validmind/tests/model_validation/sklearn/KMeansClustersOptimization.py +2 -2
  96. validmind/tests/model_validation/sklearn/MinimumROCAUCScore.py +8 -4
  97. validmind/tests/model_validation/sklearn/ModelParameters.py +1 -0
  98. validmind/tests/model_validation/sklearn/OverfitDiagnosis.py +3 -3
  99. validmind/tests/model_validation/sklearn/PermutationFeatureImportance.py +2 -2
  100. validmind/tests/model_validation/sklearn/PopulationStabilityIndex.py +20 -16
  101. validmind/tests/model_validation/sklearn/PrecisionRecallCurve.py +4 -2
  102. validmind/tests/model_validation/sklearn/ROCCurve.py +1 -1
  103. validmind/tests/model_validation/sklearn/RegressionR2Square.py +7 -9
  104. validmind/tests/model_validation/sklearn/RegressionR2SquareComparison.py +1 -3
  105. validmind/tests/model_validation/sklearn/SHAPGlobalImportance.py +2 -1
  106. validmind/tests/model_validation/sklearn/ScoreProbabilityAlignment.py +2 -1
  107. validmind/tests/model_validation/sklearn/SilhouettePlot.py +5 -3
  108. validmind/tests/model_validation/sklearn/TrainingTestDegradation.py +9 -1
  109. validmind/tests/model_validation/sklearn/WeakspotsDiagnosis.py +1 -1
  110. validmind/tests/model_validation/statsmodels/CumulativePredictionProbabilities.py +11 -4
  111. validmind/tests/model_validation/statsmodels/DurbinWatsonTest.py +1 -3
  112. validmind/tests/model_validation/statsmodels/GINITable.py +7 -15
  113. validmind/tests/model_validation/statsmodels/Lilliefors.py +2 -2
  114. validmind/tests/model_validation/statsmodels/RegressionCoeffs.py +1 -1
  115. validmind/tests/model_validation/statsmodels/RegressionFeatureSignificance.py +2 -2
  116. validmind/tests/model_validation/statsmodels/RegressionModelForecastPlotLevels.py +5 -2
  117. validmind/tests/model_validation/statsmodels/RegressionModelSensitivityPlot.py +5 -2
  118. validmind/tests/model_validation/statsmodels/RegressionModelSummary.py +7 -7
  119. validmind/tests/model_validation/statsmodels/RegressionPermutationFeatureImportance.py +2 -2
  120. validmind/tests/ongoing_monitoring/CalibrationCurveDrift.py +220 -0
  121. validmind/tests/ongoing_monitoring/ClassDiscriminationDrift.py +155 -0
  122. validmind/tests/ongoing_monitoring/ClassImbalanceDrift.py +146 -0
  123. validmind/tests/ongoing_monitoring/ClassificationAccuracyDrift.py +148 -0
  124. validmind/tests/ongoing_monitoring/ConfusionMatrixDrift.py +193 -0
  125. validmind/tests/ongoing_monitoring/CumulativePredictionProbabilitiesDrift.py +178 -0
  126. validmind/tests/ongoing_monitoring/FeatureDrift.py +120 -120
  127. validmind/tests/ongoing_monitoring/PredictionAcrossEachFeature.py +18 -23
  128. validmind/tests/ongoing_monitoring/PredictionCorrelation.py +86 -44
  129. validmind/tests/ongoing_monitoring/PredictionProbabilitiesHistogramDrift.py +204 -0
  130. validmind/tests/ongoing_monitoring/PredictionQuantilesAcrossFeatures.py +98 -0
  131. validmind/tests/ongoing_monitoring/ROCCurveDrift.py +150 -0
  132. validmind/tests/ongoing_monitoring/ScoreBandsDrift.py +212 -0
  133. validmind/tests/ongoing_monitoring/ScorecardHistogramDrift.py +209 -0
  134. validmind/tests/ongoing_monitoring/TargetPredictionDistributionPlot.py +91 -13
  135. validmind/tests/prompt_validation/Bias.py +13 -9
  136. validmind/tests/prompt_validation/Clarity.py +13 -9
  137. validmind/tests/prompt_validation/Conciseness.py +13 -9
  138. validmind/tests/prompt_validation/Delimitation.py +13 -9
  139. validmind/tests/prompt_validation/NegativeInstruction.py +14 -11
  140. validmind/tests/prompt_validation/Robustness.py +6 -2
  141. validmind/tests/prompt_validation/Specificity.py +13 -9
  142. validmind/tests/run.py +6 -0
  143. validmind/utils.py +7 -8
  144. validmind/vm_models/dataset/dataset.py +0 -4
  145. {validmind-2.7.5.dist-info → validmind-2.7.7.dist-info}/METADATA +2 -3
  146. {validmind-2.7.5.dist-info → validmind-2.7.7.dist-info}/RECORD +149 -138
  147. {validmind-2.7.5.dist-info → validmind-2.7.7.dist-info}/WHEEL +1 -1
  148. {validmind-2.7.5.dist-info → validmind-2.7.7.dist-info}/LICENSE +0 -0
  149. {validmind-2.7.5.dist-info → validmind-2.7.7.dist-info}/entry_points.txt +0 -0
validmind/__init__.py CHANGED
@@ -50,6 +50,7 @@ from .client import ( # noqa: E402
50
50
  run_test_suite,
51
51
  )
52
52
  from .tests.decorator import tags, tasks, test
53
+ from .tests.run import print_env
53
54
  from .vm_models.result import RawData
54
55
 
55
56
  __all__ = [ # noqa
@@ -63,6 +64,7 @@ __all__ = [ # noqa
63
64
  "init_model",
64
65
  "init_r_model",
65
66
  "preview_template",
67
+ "print_env",
66
68
  "RawData",
67
69
  "reload",
68
70
  "run_documentation_tests",
validmind/__version__.py CHANGED
@@ -1 +1 @@
1
- __version__ = "2.7.5"
1
+ __version__ = "2.7.7"
validmind/api_client.py CHANGED
@@ -407,6 +407,7 @@ async def alog_metric(
407
407
  inputs: Optional[List[str]] = None,
408
408
  params: Optional[Dict[str, Any]] = None,
409
409
  recorded_at: Optional[str] = None,
410
+ thresholds: Optional[Dict[str, Any]] = None,
410
411
  ):
411
412
  """See log_metric for details"""
412
413
  if not key or not isinstance(key, str):
@@ -421,6 +422,9 @@ async def alog_metric(
421
422
  except (ValueError, TypeError):
422
423
  raise ValueError("`value` must be a scalar (int or float)")
423
424
 
425
+ if thresholds is not None and not isinstance(thresholds, dict):
426
+ raise ValueError("`thresholds` must be a dictionary or None")
427
+
424
428
  try:
425
429
  return await _post(
426
430
  "log_unit_metric",
@@ -431,6 +435,7 @@ async def alog_metric(
431
435
  "inputs": inputs or [],
432
436
  "params": params or {},
433
437
  "recorded_at": recorded_at,
438
+ "thresholds": thresholds or {},
434
439
  },
435
440
  cls=NumpyEncoder,
436
441
  allow_nan=False,
@@ -447,6 +452,7 @@ def log_metric(
447
452
  inputs: Optional[List[str]] = None,
448
453
  params: Optional[Dict[str, Any]] = None,
449
454
  recorded_at: Optional[str] = None,
455
+ thresholds: Optional[Dict[str, Any]] = None,
450
456
  ):
451
457
  """Logs a unit metric
452
458
 
@@ -463,8 +469,9 @@ def log_metric(
463
469
  params (dict, optional): Dictionary of parameters used to compute the metric.
464
470
  recorded_at (str, optional): The timestamp of the metric. Server will use
465
471
  current time if not provided.
472
+ thresholds (dict, optional): Dictionary of thresholds for the metric.
466
473
  """
467
- run_async(alog_metric, key, value, inputs, params, recorded_at)
474
+ run_async(alog_metric, key, value, inputs, params, recorded_at, thresholds)
468
475
 
469
476
 
470
477
  def get_ai_key() -> Dict[str, Any]:
@@ -2,14 +2,20 @@
2
2
  # See the LICENSE file in the root of this repository for details.
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
+ import logging
5
6
  import os
7
+ import warnings
6
8
 
7
9
  import numpy as np
8
10
  import pandas as pd
9
11
  import scorecardpy as sc
10
12
  import statsmodels.api as sm
13
+ import xgboost as xgb
14
+ from sklearn.ensemble import RandomForestClassifier
11
15
  from sklearn.model_selection import train_test_split
12
16
 
17
+ import validmind as vm
18
+
13
19
  current_path = os.path.dirname(os.path.abspath(__file__))
14
20
  dataset_path = os.path.join(current_path, "datasets")
15
21
 
@@ -95,7 +101,7 @@ score_params = {
95
101
  }
96
102
 
97
103
 
98
- def load_data(source="online"):
104
+ def load_data(source="online", verbose=True):
99
105
  """
100
106
  Load data from either an online source or offline files, automatically dropping specified columns for offline data.
101
107
 
@@ -104,28 +110,33 @@ def load_data(source="online"):
104
110
  """
105
111
 
106
112
  if source == "online":
107
- print(f"Loading data from an online source: {online_data_file}")
113
+ if verbose:
114
+ print(f"Loading data from an online source: {online_data_file}")
108
115
  df = pd.read_csv(online_data_file)
109
- df = _clean_data(df)
116
+ df = _clean_data(df, verbose=verbose)
110
117
 
111
118
  elif source == "offline":
112
- print(f"Loading data from an offline .gz file: {offline_data_file}")
119
+ if verbose:
120
+ print(f"Loading data from an offline .gz file: {offline_data_file}")
113
121
  # Since we know the offline_data_file path ends with '.zip', we replace it with '.csv.gz'
114
122
  gzip_file_path = offline_data_file.replace(".zip", ".csv.gz")
115
- print(f"Attempting to read from .gz file: {gzip_file_path}")
123
+ if verbose:
124
+ print(f"Attempting to read from .gz file: {gzip_file_path}")
116
125
  # Read the CSV file directly from the .gz archive
117
126
  df = pd.read_csv(gzip_file_path, compression="gzip")
118
- print("Data loaded successfully.")
127
+ if verbose:
128
+ print("Data loaded successfully.")
119
129
  else:
120
130
  raise ValueError("Invalid source specified. Choose 'online' or 'offline'.")
121
131
 
122
- print(
123
- f"Rows: {df.shape[0]}, Columns: {df.shape[1]}, Missing values: {df.isnull().sum().sum()}"
124
- )
132
+ if verbose:
133
+ print(
134
+ f"Rows: {df.shape[0]}, Columns: {df.shape[1]}, Missing values: {df.isnull().sum().sum()}"
135
+ )
125
136
  return df
126
137
 
127
138
 
128
- def _clean_data(df):
139
+ def _clean_data(df, verbose=True):
129
140
  df = df.copy()
130
141
 
131
142
  # Drop columns not relevant for application scorecards
@@ -133,41 +144,45 @@ def _clean_data(df):
133
144
 
134
145
  # Drop rows with missing target values
135
146
  df.dropna(subset=[target_column], inplace=True)
136
- print("Dropping rows with missing target values:")
137
- print(
138
- f"Rows: {df.shape[0]}\nColumns: {df.shape[1]}\nMissing values: {df.isnull().sum().sum()}\n"
139
- )
147
+ if verbose:
148
+ print("Dropping rows with missing target values:")
149
+ print(
150
+ f"Rows: {df.shape[0]}\nColumns: {df.shape[1]}\nMissing values: {df.isnull().sum().sum()}\n"
151
+ )
140
152
 
141
153
  # Drop columns with more than N percent missing values
142
154
  missing_values = df.isnull().mean()
143
155
  df = df.loc[:, missing_values < 0.7]
144
- print("Dropping columns with more than 70% missing values:")
145
- print(
146
- f"Rows: {df.shape[0]}\nColumns: {df.shape[1]}\nMissing values: {df.isnull().sum().sum()}\n"
147
- )
156
+ if verbose:
157
+ print("Dropping columns with more than 70% missing values:")
158
+ print(
159
+ f"Rows: {df.shape[0]}\nColumns: {df.shape[1]}\nMissing values: {df.isnull().sum().sum()}\n"
160
+ )
148
161
 
149
162
  # Drop columns with only one unique value
150
163
  unique_values = df.nunique()
151
164
  df = df.loc[:, unique_values > 1]
152
- print("Dropping columns with only one unique value:")
153
- print(
154
- f"Rows: {df.shape[0]}\nColumns: {df.shape[1]}\nMissing values: {df.isnull().sum().sum()}\n"
155
- )
165
+ if verbose:
166
+ print("Dropping columns with only one unique value:")
167
+ print(
168
+ f"Rows: {df.shape[0]}\nColumns: {df.shape[1]}\nMissing values: {df.isnull().sum().sum()}\n"
169
+ )
156
170
 
157
171
  # Define the target variable for the model, representing loan default status.
158
172
  df[target_column] = df[target_column].map({"Fully Paid": 0, "Charged Off": 1})
159
173
 
160
174
  # Drop rows with NaN in target_column after mapping
161
175
  df.dropna(subset=[target_column], inplace=True)
162
- print("Dropping rows with missing target values:")
163
- print(
164
- f"Rows: {df.shape[0]}\nColumns: {df.shape[1]}\nMissing values: {df.isnull().sum().sum()}\n"
165
- )
176
+ if verbose:
177
+ print("Dropping rows with missing target values:")
178
+ print(
179
+ f"Rows: {df.shape[0]}\nColumns: {df.shape[1]}\nMissing values: {df.isnull().sum().sum()}\n"
180
+ )
166
181
 
167
182
  return df
168
183
 
169
184
 
170
- def preprocess(df):
185
+ def preprocess(df, verbose=True):
171
186
  df = df.copy()
172
187
 
173
188
  # Convert the target variable to integer type for modeling.
@@ -175,45 +190,51 @@ def preprocess(df):
175
190
 
176
191
  # Keep rows where purpose is 'debt_consolidation' or 'credit_card'
177
192
  df = df[df["purpose"].isin(["debt_consolidation", "credit_card"])]
178
- print("Filtering 'purpose' to 'debt_consolidation' and 'credit_card':")
179
- print(
180
- f"Rows: {df.shape[0]}\nColumns: {df.shape[1]}\nMissing values: {df.isnull().sum().sum()}\n"
181
- )
193
+ if verbose:
194
+ print("Filtering 'purpose' to 'debt_consolidation' and 'credit_card':")
195
+ print(
196
+ f"Rows: {df.shape[0]}\nColumns: {df.shape[1]}\nMissing values: {df.isnull().sum().sum()}\n"
197
+ )
182
198
 
183
199
  # Remove rows where grade is 'F' or 'G'
184
200
  df = df[~df["grade"].isin(["F", "G"])]
185
- print("Filtering out 'grade' F and G:")
186
- print(
187
- f"Rows: {df.shape[0]}\nColumns: {df.shape[1]}\nMissing values: {df.isnull().sum().sum()}\n"
188
- )
201
+ if verbose:
202
+ print("Filtering out 'grade' F and G:")
203
+ print(
204
+ f"Rows: {df.shape[0]}\nColumns: {df.shape[1]}\nMissing values: {df.isnull().sum().sum()}\n"
205
+ )
189
206
 
190
207
  # Remove rows where sub_grade starts with 'F' or 'G'
191
208
  df = df[~df["sub_grade"].str.startswith(("F", "G"))]
192
- print("Filtering out 'sub_grade' F and G:")
193
- print(
194
- f"Rows: {df.shape[0]}\nColumns: {df.shape[1]}\nMissing values: {df.isnull().sum().sum()}\n"
195
- )
209
+ if verbose:
210
+ print("Filtering out 'sub_grade' F and G:")
211
+ print(
212
+ f"Rows: {df.shape[0]}\nColumns: {df.shape[1]}\nMissing values: {df.isnull().sum().sum()}\n"
213
+ )
196
214
 
197
215
  # Remove rows where home_ownership is 'OTHER', 'NONE', or 'ANY'
198
216
  df = df[~df["home_ownership"].isin(["OTHER", "NONE", "ANY"])]
199
- print("Filtering out 'home_ownership' OTHER, NONE, ANY:")
200
- print(
201
- f"Rows: {df.shape[0]}\nColumns: {df.shape[1]}\nMissing values: {df.isnull().sum().sum()}\n"
202
- )
217
+ if verbose:
218
+ print("Filtering out 'home_ownership' OTHER, NONE, ANY:")
219
+ print(
220
+ f"Rows: {df.shape[0]}\nColumns: {df.shape[1]}\nMissing values: {df.isnull().sum().sum()}\n"
221
+ )
203
222
 
204
223
  # Drop features that are not useful for modeling
205
224
  df.drop(drop_features, axis=1, inplace=True)
206
- print("Dropping specified features:")
207
- print(
208
- f"Rows: {df.shape[0]}\nColumns: {df.shape[1]}\nMissing values: {df.isnull().sum().sum()}\n"
209
- )
225
+ if verbose:
226
+ print("Dropping specified features:")
227
+ print(
228
+ f"Rows: {df.shape[0]}\nColumns: {df.shape[1]}\nMissing values: {df.isnull().sum().sum()}\n"
229
+ )
210
230
 
211
231
  # Drop rows with missing values
212
232
  df.dropna(inplace=True)
213
- print("Dropping rows with any missing values:")
214
- print(
215
- f"Rows: {df.shape[0]}\nColumns: {df.shape[1]}\nMissing values: {df.isnull().sum().sum()}\n"
216
- )
233
+ if verbose:
234
+ print("Dropping rows with any missing values:")
235
+ print(
236
+ f"Rows: {df.shape[0]}\nColumns: {df.shape[1]}\nMissing values: {df.isnull().sum().sum()}\n"
237
+ )
217
238
 
218
239
  # Preprocess emp_length column
219
240
  df = _preprocess_emp_length(df)
@@ -260,34 +281,37 @@ def _preprocess_emp_length(df):
260
281
  return df
261
282
 
262
283
 
263
- def feature_engineering(df):
284
+ def feature_engineering(df, verbose=True):
264
285
  df = df.copy()
265
286
 
266
287
  # WoE encoding of numerical and categorical features
267
- df = woe_encoding(df)
288
+ df = woe_encoding(df, verbose=verbose)
268
289
 
269
- print(
270
- f"Rows: {df.shape[0]}\nColumns: {df.shape[1]}\nMissing values: {df.isnull().sum().sum()}\n"
271
- )
290
+ if verbose:
291
+ print(
292
+ f"Rows: {df.shape[0]}\nColumns: {df.shape[1]}\nMissing values: {df.isnull().sum().sum()}\n"
293
+ )
272
294
 
273
295
  return df
274
296
 
275
297
 
276
- def woe_encoding(df):
298
+ def woe_encoding(df, verbose=True):
277
299
  df = df.copy()
278
300
 
279
- woe = _woebin(df)
301
+ woe = _woebin(df, verbose=verbose)
280
302
  bins = _woe_to_bins(woe)
281
303
 
282
304
  # Make sure we don't transform the target column
283
305
  if target_column in bins:
284
306
  del bins[target_column]
285
- print(f"Excluded {target_column} from WoE transformation.")
307
+ if verbose:
308
+ print(f"Excluded {target_column} from WoE transformation.")
286
309
 
287
310
  # Apply the WoE transformation
288
311
  df = sc.woebin_ply(df, bins=bins)
289
312
 
290
- print("Successfully converted features to WoE values.")
313
+ if verbose:
314
+ print("Successfully converted features to WoE values.")
291
315
 
292
316
  return df
293
317
 
@@ -326,7 +350,7 @@ def _woe_to_bins(woe):
326
350
  return bins
327
351
 
328
352
 
329
- def _woebin(df):
353
+ def _woebin(df, verbose=True):
330
354
  """
331
355
  This function performs automatic binning using WoE.
332
356
  df: A pandas dataframe
@@ -337,9 +361,10 @@ def _woebin(df):
337
361
  df[non_numeric_cols] = df[non_numeric_cols].astype(str)
338
362
 
339
363
  try:
340
- print(
341
- f"Performing binning with breaks_adj: {breaks_adj}"
342
- ) # print the breaks_adj being used
364
+ if verbose:
365
+ print(
366
+ f"Performing binning with breaks_adj: {breaks_adj}"
367
+ ) # print the breaks_adj being used
343
368
  bins = sc.woebin(df, target_column, breaks_list=breaks_adj)
344
369
  except Exception as e:
345
370
  print("Error during binning: ")
@@ -355,7 +380,7 @@ def _woebin(df):
355
380
  return bins_df
356
381
 
357
382
 
358
- def split(df, validation_size=None, test_size=0.2, add_constant=False):
383
+ def split(df, validation_size=None, test_size=0.2, add_constant=False, verbose=True):
359
384
  """
360
385
  Split dataset into train, validation (optional), and test sets.
361
386
 
@@ -384,15 +409,16 @@ def split(df, validation_size=None, test_size=0.2, add_constant=False):
384
409
  train_val_df = sm.add_constant(train_val_df)
385
410
 
386
411
  # Print details for two-way split
387
- print("After splitting the dataset into training and test sets:")
388
- print(
389
- f"Training Dataset:\nRows: {train_val_df.shape[0]}\nColumns: {train_val_df.shape[1]}\n"
390
- f"Missing values: {train_val_df.isnull().sum().sum()}\n"
391
- )
392
- print(
393
- f"Test Dataset:\nRows: {test_df.shape[0]}\nColumns: {test_df.shape[1]}\n"
394
- f"Missing values: {test_df.isnull().sum().sum()}\n"
395
- )
412
+ if verbose:
413
+ print("After splitting the dataset into training and test sets:")
414
+ print(
415
+ f"Training Dataset:\nRows: {train_val_df.shape[0]}\nColumns: {train_val_df.shape[1]}\n"
416
+ f"Missing values: {train_val_df.isnull().sum().sum()}\n"
417
+ )
418
+ print(
419
+ f"Test Dataset:\nRows: {test_df.shape[0]}\nColumns: {test_df.shape[1]}\n"
420
+ f"Missing values: {test_df.isnull().sum().sum()}\n"
421
+ )
396
422
 
397
423
  return train_val_df, test_df
398
424
 
@@ -407,19 +433,20 @@ def split(df, validation_size=None, test_size=0.2, add_constant=False):
407
433
  validation_df = sm.add_constant(validation_df)
408
434
 
409
435
  # Print details for three-way split
410
- print("After splitting the dataset into training, validation, and test sets:")
411
- print(
412
- f"Training Dataset:\nRows: {train_df.shape[0]}\nColumns: {train_df.shape[1]}\n"
413
- f"Missing values: {train_df.isnull().sum().sum()}\n"
414
- )
415
- print(
416
- f"Validation Dataset:\nRows: {validation_df.shape[0]}\nColumns: {validation_df.shape[1]}\n"
417
- f"Missing values: {validation_df.isnull().sum().sum()}\n"
418
- )
419
- print(
420
- f"Test Dataset:\nRows: {test_df.shape[0]}\nColumns: {test_df.shape[1]}\n"
421
- f"Missing values: {test_df.isnull().sum().sum()}\n"
422
- )
436
+ if verbose:
437
+ print("After splitting the dataset into training, validation, and test sets:")
438
+ print(
439
+ f"Training Dataset:\nRows: {train_df.shape[0]}\nColumns: {train_df.shape[1]}\n"
440
+ f"Missing values: {train_df.isnull().sum().sum()}\n"
441
+ )
442
+ print(
443
+ f"Validation Dataset:\nRows: {validation_df.shape[0]}\nColumns: {validation_df.shape[1]}\n"
444
+ f"Missing values: {validation_df.isnull().sum().sum()}\n"
445
+ )
446
+ print(
447
+ f"Test Dataset:\nRows: {test_df.shape[0]}\nColumns: {test_df.shape[1]}\n"
448
+ f"Missing values: {test_df.isnull().sum().sum()}\n"
449
+ )
423
450
 
424
451
  return train_df, validation_df, test_df
425
452
 
@@ -822,3 +849,241 @@ def get_demo_test_config(x_test=None, y_test=None):
822
849
  }
823
850
 
824
851
  return default_config
852
+
853
+
854
+ def load_scorecard():
855
+
856
+ warnings.filterwarnings("ignore")
857
+ logging.getLogger("scorecardpy").setLevel(logging.ERROR)
858
+
859
+ os.environ["VALIDMIND_LLM_DESCRIPTIONS_CONTEXT_ENABLED"] = "1"
860
+
861
+ context = """
862
+ FORMAT FOR THE LLM DESCRIPTIONS:
863
+ **<Test Name>** is designed to <begin with a concise overview of what the test does and its primary purpose, extracted from the test description>.
864
+
865
+ The test operates by <write a paragraph about the test mechanism, explaining how it works and what it measures. Include any relevant formulas or methodologies mentioned in the test description.>
866
+
867
+ The primary advantages of this test include <write a paragraph about the test's strengths and capabilities, highlighting what makes it particularly useful for specific scenarios.>
868
+
869
+ Users should be aware that <write a paragraph about the test's limitations and potential risks. Include both technical limitations and interpretation challenges. If the test description includes specific signs of high risk, incorporate these here.>
870
+
871
+ **Key Insights:**
872
+
873
+ The test results reveal:
874
+
875
+ - **<insight title>**: <comprehensive description of one aspect of the results>
876
+ - **<insight title>**: <comprehensive description of another aspect>
877
+ ...
878
+
879
+ Based on these results, <conclude with a brief paragraph that ties together the test results with the test's purpose and provides any final recommendations or considerations.>
880
+
881
+ ADDITIONAL INSTRUCTIONS:
882
+ Present insights in order from general to specific, with each insight as a single bullet point with bold title.
883
+
884
+ For each metric in the test results, include in the test overview:
885
+ - The metric's purpose and what it measures
886
+ - Its mathematical formula
887
+ - The range of possible values
888
+ - What constitutes good/bad performance
889
+ - How to interpret different values
890
+
891
+ Each insight should progressively cover:
892
+ 1. Overall scope and distribution
893
+ 2. Complete breakdown of all elements with specific values
894
+ 3. Natural groupings and patterns
895
+ 4. Comparative analysis between datasets/categories
896
+ 5. Stability and variations
897
+ 6. Notable relationships or dependencies
898
+
899
+ Remember:
900
+ - Keep all insights at the same level (no sub-bullets or nested structures)
901
+ - Make each insight complete and self-contained
902
+ - Include specific numerical values and ranges
903
+ - Cover all elements in the results comprehensively
904
+ - Maintain clear, concise language
905
+ - Use only "- **Title**: Description" format for insights
906
+ - Progress naturally from general to specific observations
907
+
908
+ """.strip()
909
+
910
+ os.environ["VALIDMIND_LLM_DESCRIPTIONS_CONTEXT"] = context
911
+
912
+ # Load the data
913
+ df = load_data(source="offline", verbose=False)
914
+ preprocess_df = preprocess(df, verbose=False)
915
+ fe_df = feature_engineering(preprocess_df, verbose=False)
916
+
917
+ # Split the data
918
+ train_df, test_df = split(fe_df, test_size=0.2, verbose=False)
919
+
920
+ x_train = train_df.drop(target_column, axis=1)
921
+ y_train = train_df[target_column]
922
+
923
+ x_test = test_df.drop(target_column, axis=1)
924
+ y_test = test_df[target_column]
925
+
926
+ # Define the XGBoost model
927
+ xgb_model = xgb.XGBClassifier(
928
+ n_estimators=50, random_state=42, early_stopping_rounds=10
929
+ )
930
+ xgb_model.set_params(
931
+ eval_metric=["error", "logloss", "auc"],
932
+ )
933
+
934
+ # Fit the model
935
+ xgb_model.fit(x_train, y_train, eval_set=[(x_test, y_test)], verbose=False)
936
+
937
+ # Define the Random Forest model
938
+ rf_model = RandomForestClassifier(
939
+ n_estimators=50,
940
+ random_state=42,
941
+ )
942
+
943
+ # Fit the model
944
+ rf_model.fit(x_train, y_train)
945
+
946
+ # Compute the probabilities
947
+ train_xgb_prob = xgb_model.predict_proba(x_train)[:, 1]
948
+ test_xgb_prob = xgb_model.predict_proba(x_test)[:, 1]
949
+
950
+ train_rf_prob = rf_model.predict_proba(x_train)[:, 1]
951
+ test_rf_prob = rf_model.predict_proba(x_test)[:, 1]
952
+
953
+ # Compute binary predictions
954
+ cut_off_threshold = 0.3
955
+
956
+ train_xgb_binary_predictions = (train_xgb_prob > cut_off_threshold).astype(int)
957
+ test_xgb_binary_predictions = (test_xgb_prob > cut_off_threshold).astype(int)
958
+
959
+ train_rf_binary_predictions = (train_rf_prob > cut_off_threshold).astype(int)
960
+ test_rf_binary_predictions = (test_rf_prob > cut_off_threshold).astype(int)
961
+
962
+ # Compute credit risk scores
963
+ train_xgb_scores = compute_scores(train_xgb_prob)
964
+ test_xgb_scores = compute_scores(test_xgb_prob)
965
+
966
+ scorecard = {
967
+ "df": df,
968
+ "preprocess_df": preprocess_df,
969
+ "fe_df": fe_df,
970
+ "train_df": train_df,
971
+ "test_df": test_df,
972
+ "x_test": x_test,
973
+ "y_test": y_test,
974
+ "xgb_model": xgb_model,
975
+ "rf_model": rf_model,
976
+ "train_xgb_binary_predictions": train_xgb_binary_predictions,
977
+ "test_xgb_binary_predictions": test_xgb_binary_predictions,
978
+ "train_xgb_prob": train_xgb_prob,
979
+ "test_xgb_prob": test_xgb_prob,
980
+ "train_xgb_scores": train_xgb_scores,
981
+ "test_xgb_scores": test_xgb_scores,
982
+ "train_rf_binary_predictions": train_rf_binary_predictions,
983
+ "test_rf_binary_predictions": test_rf_binary_predictions,
984
+ "train_rf_prob": train_rf_prob,
985
+ "test_rf_prob": test_rf_prob,
986
+ }
987
+
988
+ return scorecard
989
+
990
+
991
+ def init_vm_objects(scorecard):
992
+
993
+ df = scorecard["df"]
994
+ preprocess_df = scorecard["preprocess_df"]
995
+ fe_df = scorecard["fe_df"]
996
+ train_df = scorecard["train_df"]
997
+ test_df = scorecard["test_df"]
998
+ xgb_model = scorecard["xgb_model"]
999
+ rf_model = scorecard["rf_model"]
1000
+ train_xgb_binary_predictions = scorecard["train_xgb_binary_predictions"]
1001
+ test_xgb_binary_predictions = scorecard["test_xgb_binary_predictions"]
1002
+ train_xgb_prob = scorecard["train_xgb_prob"]
1003
+ test_xgb_prob = scorecard["test_xgb_prob"]
1004
+ train_rf_binary_predictions = scorecard["train_rf_binary_predictions"]
1005
+ test_rf_binary_predictions = scorecard["test_rf_binary_predictions"]
1006
+ train_rf_prob = scorecard["train_rf_prob"]
1007
+ test_rf_prob = scorecard["test_rf_prob"]
1008
+ train_xgb_scores = scorecard["train_xgb_scores"]
1009
+ test_xgb_scores = scorecard["test_xgb_scores"]
1010
+
1011
+ vm.init_dataset(
1012
+ dataset=df,
1013
+ input_id="raw_dataset",
1014
+ target_column=target_column,
1015
+ )
1016
+
1017
+ vm.init_dataset(
1018
+ dataset=preprocess_df,
1019
+ input_id="preprocess_dataset",
1020
+ target_column=target_column,
1021
+ )
1022
+
1023
+ vm.init_dataset(
1024
+ dataset=fe_df,
1025
+ input_id="fe_dataset",
1026
+ target_column=target_column,
1027
+ )
1028
+
1029
+ vm_train_ds = vm.init_dataset(
1030
+ dataset=train_df,
1031
+ input_id="train_dataset",
1032
+ target_column=target_column,
1033
+ )
1034
+
1035
+ vm_test_ds = vm.init_dataset(
1036
+ dataset=test_df,
1037
+ input_id="test_dataset",
1038
+ target_column=target_column,
1039
+ )
1040
+
1041
+ vm_xgb_model = vm.init_model(
1042
+ xgb_model,
1043
+ input_id="xgb_model",
1044
+ )
1045
+
1046
+ vm_rf_model = vm.init_model(
1047
+ rf_model,
1048
+ input_id="rf_model",
1049
+ )
1050
+
1051
+ # Assign predictions
1052
+ vm_train_ds.assign_predictions(
1053
+ model=vm_xgb_model,
1054
+ prediction_values=train_xgb_binary_predictions,
1055
+ prediction_probabilities=train_xgb_prob,
1056
+ )
1057
+
1058
+ vm_test_ds.assign_predictions(
1059
+ model=vm_xgb_model,
1060
+ prediction_values=test_xgb_binary_predictions,
1061
+ prediction_probabilities=test_xgb_prob,
1062
+ )
1063
+
1064
+ vm_train_ds.assign_predictions(
1065
+ model=vm_rf_model,
1066
+ prediction_values=train_rf_binary_predictions,
1067
+ prediction_probabilities=train_rf_prob,
1068
+ )
1069
+
1070
+ vm_test_ds.assign_predictions(
1071
+ model=vm_rf_model,
1072
+ prediction_values=test_rf_binary_predictions,
1073
+ prediction_probabilities=test_rf_prob,
1074
+ )
1075
+
1076
+ # Assign scores to the datasets
1077
+ vm_train_ds.add_extra_column("xgb_scores", train_xgb_scores)
1078
+ vm_test_ds.add_extra_column("xgb_scores", test_xgb_scores)
1079
+
1080
+
1081
+ def load_test_config(scorecard):
1082
+
1083
+ x_test = scorecard["x_test"]
1084
+ y_test = scorecard["y_test"]
1085
+
1086
+ # Get the test config
1087
+ test_config = get_demo_test_config(x_test, y_test)
1088
+
1089
+ return test_config
@@ -111,7 +111,7 @@ hljs.highlightAll();
111
111
  </script>
112
112
  """
113
113
 
114
- # FIXME: this is a bit too hacky
114
+ # have to dynamically load mathjax
115
115
  math_jax_snippet = """
116
116
  <script>
117
117
  window.MathJax = {