validmind 2.4.13__tar.gz → 2.5.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (321) hide show
  1. {validmind-2.4.13 → validmind-2.5.1}/PKG-INFO +1 -1
  2. {validmind-2.4.13 → validmind-2.5.1}/pyproject.toml +1 -1
  3. validmind-2.5.1/validmind/__version__.py +1 -0
  4. {validmind-2.4.13 → validmind-2.5.1}/validmind/tests/__types__.py +4 -0
  5. {validmind-2.4.13 → validmind-2.5.1}/validmind/tests/model_validation/sklearn/ClassifierPerformance.py +15 -6
  6. {validmind-2.4.13 → validmind-2.5.1}/validmind/tests/model_validation/sklearn/ClusterPerformance.py +2 -2
  7. {validmind-2.4.13 → validmind-2.5.1}/validmind/tests/model_validation/sklearn/MinimumROCAUCScore.py +10 -3
  8. validmind-2.5.1/validmind/tests/model_validation/sklearn/OverfitDiagnosis.py +395 -0
  9. {validmind-2.4.13 → validmind-2.5.1}/validmind/tests/model_validation/sklearn/PrecisionRecallCurve.py +1 -1
  10. {validmind-2.4.13 → validmind-2.5.1}/validmind/tests/model_validation/sklearn/RobustnessDiagnosis.py +36 -37
  11. validmind-2.5.1/validmind/tests/ongoing_monitoring/FeatureDrift.py +182 -0
  12. validmind-2.5.1/validmind/tests/ongoing_monitoring/PredictionAcrossEachFeature.py +76 -0
  13. validmind-2.5.1/validmind/tests/ongoing_monitoring/PredictionCorrelation.py +91 -0
  14. validmind-2.5.1/validmind/tests/ongoing_monitoring/TargetPredictionDistributionPlot.py +57 -0
  15. validmind-2.5.1/validmind/unit_metrics/classification/sklearn/ROC_AUC.py +34 -0
  16. {validmind-2.4.13 → validmind-2.5.1}/validmind/utils.py +1 -1
  17. {validmind-2.4.13 → validmind-2.5.1}/validmind/vm_models/dataset/dataset.py +2 -1
  18. validmind-2.4.13/validmind/__version__.py +0 -1
  19. validmind-2.4.13/validmind/tests/model_validation/sklearn/OverfitDiagnosis.py +0 -337
  20. validmind-2.4.13/validmind/unit_metrics/classification/sklearn/ROC_AUC.py +0 -13
  21. {validmind-2.4.13 → validmind-2.5.1}/LICENSE +0 -0
  22. {validmind-2.4.13 → validmind-2.5.1}/README.pypi.md +0 -0
  23. {validmind-2.4.13 → validmind-2.5.1}/validmind/__init__.py +0 -0
  24. {validmind-2.4.13 → validmind-2.5.1}/validmind/ai/test_descriptions.py +0 -0
  25. {validmind-2.4.13 → validmind-2.5.1}/validmind/ai/utils.py +0 -0
  26. {validmind-2.4.13 → validmind-2.5.1}/validmind/api_client.py +0 -0
  27. {validmind-2.4.13 → validmind-2.5.1}/validmind/client.py +0 -0
  28. {validmind-2.4.13 → validmind-2.5.1}/validmind/client_config.py +0 -0
  29. {validmind-2.4.13 → validmind-2.5.1}/validmind/datasets/__init__.py +0 -0
  30. {validmind-2.4.13 → validmind-2.5.1}/validmind/datasets/classification/__init__.py +0 -0
  31. {validmind-2.4.13 → validmind-2.5.1}/validmind/datasets/classification/customer_churn.py +0 -0
  32. {validmind-2.4.13 → validmind-2.5.1}/validmind/datasets/classification/datasets/bank_customer_churn.csv +0 -0
  33. {validmind-2.4.13 → validmind-2.5.1}/validmind/datasets/classification/datasets/taiwan_credit.csv +0 -0
  34. {validmind-2.4.13 → validmind-2.5.1}/validmind/datasets/classification/taiwan_credit.py +0 -0
  35. {validmind-2.4.13 → validmind-2.5.1}/validmind/datasets/cluster/digits.py +0 -0
  36. {validmind-2.4.13 → validmind-2.5.1}/validmind/datasets/credit_risk/__init__.py +0 -0
  37. {validmind-2.4.13 → validmind-2.5.1}/validmind/datasets/credit_risk/datasets/lending_club_loan_data_2007_2014_clean.csv.gz +0 -0
  38. {validmind-2.4.13 → validmind-2.5.1}/validmind/datasets/credit_risk/lending_club.py +0 -0
  39. {validmind-2.4.13 → validmind-2.5.1}/validmind/datasets/llm/rag/__init__.py +0 -0
  40. {validmind-2.4.13 → validmind-2.5.1}/validmind/datasets/llm/rag/datasets/rfp_existing_questions_client_1.csv +0 -0
  41. {validmind-2.4.13 → validmind-2.5.1}/validmind/datasets/llm/rag/datasets/rfp_existing_questions_client_2.csv +0 -0
  42. {validmind-2.4.13 → validmind-2.5.1}/validmind/datasets/llm/rag/datasets/rfp_existing_questions_client_3.csv +0 -0
  43. {validmind-2.4.13 → validmind-2.5.1}/validmind/datasets/llm/rag/datasets/rfp_existing_questions_client_4.csv +0 -0
  44. {validmind-2.4.13 → validmind-2.5.1}/validmind/datasets/llm/rag/datasets/rfp_existing_questions_client_5.csv +0 -0
  45. {validmind-2.4.13 → validmind-2.5.1}/validmind/datasets/llm/rag/rfp.py +0 -0
  46. {validmind-2.4.13 → validmind-2.5.1}/validmind/datasets/nlp/__init__.py +0 -0
  47. {validmind-2.4.13 → validmind-2.5.1}/validmind/datasets/nlp/cnn_dailymail.py +0 -0
  48. {validmind-2.4.13 → validmind-2.5.1}/validmind/datasets/nlp/datasets/Covid_19.csv +0 -0
  49. {validmind-2.4.13 → validmind-2.5.1}/validmind/datasets/nlp/datasets/cnn_dailymail_100_with_predictions.csv +0 -0
  50. {validmind-2.4.13 → validmind-2.5.1}/validmind/datasets/nlp/datasets/cnn_dailymail_500_with_predictions.csv +0 -0
  51. {validmind-2.4.13 → validmind-2.5.1}/validmind/datasets/nlp/datasets/sentiments_with_predictions.csv +0 -0
  52. {validmind-2.4.13 → validmind-2.5.1}/validmind/datasets/nlp/twitter_covid_19.py +0 -0
  53. {validmind-2.4.13 → validmind-2.5.1}/validmind/datasets/regression/__init__.py +0 -0
  54. {validmind-2.4.13 → validmind-2.5.1}/validmind/datasets/regression/california_housing.py +0 -0
  55. {validmind-2.4.13 → validmind-2.5.1}/validmind/datasets/regression/datasets/fred/CPIAUCSL.csv +0 -0
  56. {validmind-2.4.13 → validmind-2.5.1}/validmind/datasets/regression/datasets/fred/CSUSHPISA.csv +0 -0
  57. {validmind-2.4.13 → validmind-2.5.1}/validmind/datasets/regression/datasets/fred/DRSFRMACBS.csv +0 -0
  58. {validmind-2.4.13 → validmind-2.5.1}/validmind/datasets/regression/datasets/fred/FEDFUNDS.csv +0 -0
  59. {validmind-2.4.13 → validmind-2.5.1}/validmind/datasets/regression/datasets/fred/GDP.csv +0 -0
  60. {validmind-2.4.13 → validmind-2.5.1}/validmind/datasets/regression/datasets/fred/GDPC1.csv +0 -0
  61. {validmind-2.4.13 → validmind-2.5.1}/validmind/datasets/regression/datasets/fred/GS10.csv +0 -0
  62. {validmind-2.4.13 → validmind-2.5.1}/validmind/datasets/regression/datasets/fred/GS3.csv +0 -0
  63. {validmind-2.4.13 → validmind-2.5.1}/validmind/datasets/regression/datasets/fred/GS5.csv +0 -0
  64. {validmind-2.4.13 → validmind-2.5.1}/validmind/datasets/regression/datasets/fred/MORTGAGE30US.csv +0 -0
  65. {validmind-2.4.13 → validmind-2.5.1}/validmind/datasets/regression/datasets/fred/UNRATE.csv +0 -0
  66. {validmind-2.4.13 → validmind-2.5.1}/validmind/datasets/regression/datasets/fred_loan_rates.csv +0 -0
  67. {validmind-2.4.13 → validmind-2.5.1}/validmind/datasets/regression/datasets/fred_loan_rates_test_1.csv +0 -0
  68. {validmind-2.4.13 → validmind-2.5.1}/validmind/datasets/regression/datasets/fred_loan_rates_test_2.csv +0 -0
  69. {validmind-2.4.13 → validmind-2.5.1}/validmind/datasets/regression/datasets/fred_loan_rates_test_3.csv +0 -0
  70. {validmind-2.4.13 → validmind-2.5.1}/validmind/datasets/regression/datasets/fred_loan_rates_test_4.csv +0 -0
  71. {validmind-2.4.13 → validmind-2.5.1}/validmind/datasets/regression/datasets/fred_loan_rates_test_5.csv +0 -0
  72. {validmind-2.4.13 → validmind-2.5.1}/validmind/datasets/regression/datasets/leanding_club_loan_rates.csv +0 -0
  73. {validmind-2.4.13 → validmind-2.5.1}/validmind/datasets/regression/fred.py +0 -0
  74. {validmind-2.4.13 → validmind-2.5.1}/validmind/datasets/regression/fred_timeseries.py +0 -0
  75. {validmind-2.4.13 → validmind-2.5.1}/validmind/datasets/regression/lending_club.py +0 -0
  76. {validmind-2.4.13 → validmind-2.5.1}/validmind/datasets/regression/models/fred_loan_rates_model_1.pkl +0 -0
  77. {validmind-2.4.13 → validmind-2.5.1}/validmind/datasets/regression/models/fred_loan_rates_model_2.pkl +0 -0
  78. {validmind-2.4.13 → validmind-2.5.1}/validmind/datasets/regression/models/fred_loan_rates_model_3.pkl +0 -0
  79. {validmind-2.4.13 → validmind-2.5.1}/validmind/datasets/regression/models/fred_loan_rates_model_4.pkl +0 -0
  80. {validmind-2.4.13 → validmind-2.5.1}/validmind/datasets/regression/models/fred_loan_rates_model_5.pkl +0 -0
  81. {validmind-2.4.13 → validmind-2.5.1}/validmind/errors.py +0 -0
  82. {validmind-2.4.13 → validmind-2.5.1}/validmind/html_templates/__init__.py +0 -0
  83. {validmind-2.4.13 → validmind-2.5.1}/validmind/html_templates/content_blocks.py +0 -0
  84. {validmind-2.4.13 → validmind-2.5.1}/validmind/input_registry.py +0 -0
  85. {validmind-2.4.13 → validmind-2.5.1}/validmind/logging.py +0 -0
  86. {validmind-2.4.13 → validmind-2.5.1}/validmind/models/__init__.py +0 -0
  87. {validmind-2.4.13 → validmind-2.5.1}/validmind/models/foundation.py +0 -0
  88. {validmind-2.4.13 → validmind-2.5.1}/validmind/models/function.py +0 -0
  89. {validmind-2.4.13 → validmind-2.5.1}/validmind/models/huggingface.py +0 -0
  90. {validmind-2.4.13 → validmind-2.5.1}/validmind/models/metadata.py +0 -0
  91. {validmind-2.4.13 → validmind-2.5.1}/validmind/models/pipeline.py +0 -0
  92. {validmind-2.4.13 → validmind-2.5.1}/validmind/models/pytorch.py +0 -0
  93. {validmind-2.4.13 → validmind-2.5.1}/validmind/models/r_model.py +0 -0
  94. {validmind-2.4.13 → validmind-2.5.1}/validmind/models/sklearn.py +0 -0
  95. {validmind-2.4.13 → validmind-2.5.1}/validmind/template.py +0 -0
  96. {validmind-2.4.13 → validmind-2.5.1}/validmind/test_suites/__init__.py +0 -0
  97. {validmind-2.4.13 → validmind-2.5.1}/validmind/test_suites/classifier.py +0 -0
  98. {validmind-2.4.13 → validmind-2.5.1}/validmind/test_suites/cluster.py +0 -0
  99. {validmind-2.4.13 → validmind-2.5.1}/validmind/test_suites/embeddings.py +0 -0
  100. {validmind-2.4.13 → validmind-2.5.1}/validmind/test_suites/llm.py +0 -0
  101. {validmind-2.4.13 → validmind-2.5.1}/validmind/test_suites/nlp.py +0 -0
  102. {validmind-2.4.13 → validmind-2.5.1}/validmind/test_suites/parameters_optimization.py +0 -0
  103. {validmind-2.4.13 → validmind-2.5.1}/validmind/test_suites/regression.py +0 -0
  104. {validmind-2.4.13 → validmind-2.5.1}/validmind/test_suites/statsmodels_timeseries.py +0 -0
  105. {validmind-2.4.13 → validmind-2.5.1}/validmind/test_suites/summarization.py +0 -0
  106. {validmind-2.4.13 → validmind-2.5.1}/validmind/test_suites/tabular_datasets.py +0 -0
  107. {validmind-2.4.13 → validmind-2.5.1}/validmind/test_suites/text_data.py +0 -0
  108. {validmind-2.4.13 → validmind-2.5.1}/validmind/test_suites/time_series.py +0 -0
  109. {validmind-2.4.13 → validmind-2.5.1}/validmind/tests/__init__.py +0 -0
  110. {validmind-2.4.13 → validmind-2.5.1}/validmind/tests/_store.py +0 -0
  111. {validmind-2.4.13 → validmind-2.5.1}/validmind/tests/data_validation/ACFandPACFPlot.py +0 -0
  112. {validmind-2.4.13 → validmind-2.5.1}/validmind/tests/data_validation/ADF.py +0 -0
  113. {validmind-2.4.13 → validmind-2.5.1}/validmind/tests/data_validation/ANOVAOneWayTable.py +0 -0
  114. {validmind-2.4.13 → validmind-2.5.1}/validmind/tests/data_validation/AutoAR.py +0 -0
  115. {validmind-2.4.13 → validmind-2.5.1}/validmind/tests/data_validation/AutoMA.py +0 -0
  116. {validmind-2.4.13 → validmind-2.5.1}/validmind/tests/data_validation/AutoSeasonality.py +0 -0
  117. {validmind-2.4.13 → validmind-2.5.1}/validmind/tests/data_validation/AutoStationarity.py +0 -0
  118. {validmind-2.4.13 → validmind-2.5.1}/validmind/tests/data_validation/BivariateFeaturesBarPlots.py +0 -0
  119. {validmind-2.4.13 → validmind-2.5.1}/validmind/tests/data_validation/BivariateHistograms.py +0 -0
  120. {validmind-2.4.13 → validmind-2.5.1}/validmind/tests/data_validation/BivariateScatterPlots.py +0 -0
  121. {validmind-2.4.13 → validmind-2.5.1}/validmind/tests/data_validation/ChiSquaredFeaturesTable.py +0 -0
  122. {validmind-2.4.13 → validmind-2.5.1}/validmind/tests/data_validation/ClassImbalance.py +0 -0
  123. {validmind-2.4.13 → validmind-2.5.1}/validmind/tests/data_validation/DFGLSArch.py +0 -0
  124. {validmind-2.4.13 → validmind-2.5.1}/validmind/tests/data_validation/DatasetDescription.py +0 -0
  125. {validmind-2.4.13 → validmind-2.5.1}/validmind/tests/data_validation/DatasetSplit.py +0 -0
  126. {validmind-2.4.13 → validmind-2.5.1}/validmind/tests/data_validation/DescriptiveStatistics.py +0 -0
  127. {validmind-2.4.13 → validmind-2.5.1}/validmind/tests/data_validation/Duplicates.py +0 -0
  128. {validmind-2.4.13 → validmind-2.5.1}/validmind/tests/data_validation/EngleGrangerCoint.py +0 -0
  129. {validmind-2.4.13 → validmind-2.5.1}/validmind/tests/data_validation/FeatureTargetCorrelationPlot.py +0 -0
  130. {validmind-2.4.13 → validmind-2.5.1}/validmind/tests/data_validation/HeatmapFeatureCorrelations.py +0 -0
  131. {validmind-2.4.13 → validmind-2.5.1}/validmind/tests/data_validation/HighCardinality.py +0 -0
  132. {validmind-2.4.13 → validmind-2.5.1}/validmind/tests/data_validation/HighPearsonCorrelation.py +0 -0
  133. {validmind-2.4.13 → validmind-2.5.1}/validmind/tests/data_validation/IQROutliersBarPlot.py +0 -0
  134. {validmind-2.4.13 → validmind-2.5.1}/validmind/tests/data_validation/IQROutliersTable.py +0 -0
  135. {validmind-2.4.13 → validmind-2.5.1}/validmind/tests/data_validation/IsolationForestOutliers.py +0 -0
  136. {validmind-2.4.13 → validmind-2.5.1}/validmind/tests/data_validation/KPSS.py +0 -0
  137. {validmind-2.4.13 → validmind-2.5.1}/validmind/tests/data_validation/LaggedCorrelationHeatmap.py +0 -0
  138. {validmind-2.4.13 → validmind-2.5.1}/validmind/tests/data_validation/MissingValues.py +0 -0
  139. {validmind-2.4.13 → validmind-2.5.1}/validmind/tests/data_validation/MissingValuesBarPlot.py +0 -0
  140. {validmind-2.4.13 → validmind-2.5.1}/validmind/tests/data_validation/MissingValuesRisk.py +0 -0
  141. {validmind-2.4.13 → validmind-2.5.1}/validmind/tests/data_validation/PearsonCorrelationMatrix.py +0 -0
  142. {validmind-2.4.13 → validmind-2.5.1}/validmind/tests/data_validation/PhillipsPerronArch.py +0 -0
  143. {validmind-2.4.13 → validmind-2.5.1}/validmind/tests/data_validation/RollingStatsPlot.py +0 -0
  144. {validmind-2.4.13 → validmind-2.5.1}/validmind/tests/data_validation/ScatterPlot.py +0 -0
  145. {validmind-2.4.13 → validmind-2.5.1}/validmind/tests/data_validation/SeasonalDecompose.py +0 -0
  146. {validmind-2.4.13 → validmind-2.5.1}/validmind/tests/data_validation/Skewness.py +0 -0
  147. {validmind-2.4.13 → validmind-2.5.1}/validmind/tests/data_validation/SpreadPlot.py +0 -0
  148. {validmind-2.4.13 → validmind-2.5.1}/validmind/tests/data_validation/TabularCategoricalBarPlots.py +0 -0
  149. {validmind-2.4.13 → validmind-2.5.1}/validmind/tests/data_validation/TabularDateTimeHistograms.py +0 -0
  150. {validmind-2.4.13 → validmind-2.5.1}/validmind/tests/data_validation/TabularDescriptionTables.py +0 -0
  151. {validmind-2.4.13 → validmind-2.5.1}/validmind/tests/data_validation/TabularNumericalHistograms.py +0 -0
  152. {validmind-2.4.13 → validmind-2.5.1}/validmind/tests/data_validation/TargetRateBarPlots.py +0 -0
  153. {validmind-2.4.13 → validmind-2.5.1}/validmind/tests/data_validation/TimeSeriesDescription.py +0 -0
  154. {validmind-2.4.13 → validmind-2.5.1}/validmind/tests/data_validation/TimeSeriesDescriptiveStatistics.py +0 -0
  155. {validmind-2.4.13 → validmind-2.5.1}/validmind/tests/data_validation/TimeSeriesFrequency.py +0 -0
  156. {validmind-2.4.13 → validmind-2.5.1}/validmind/tests/data_validation/TimeSeriesHistogram.py +0 -0
  157. {validmind-2.4.13 → validmind-2.5.1}/validmind/tests/data_validation/TimeSeriesLinePlot.py +0 -0
  158. {validmind-2.4.13 → validmind-2.5.1}/validmind/tests/data_validation/TimeSeriesMissingValues.py +0 -0
  159. {validmind-2.4.13 → validmind-2.5.1}/validmind/tests/data_validation/TimeSeriesOutliers.py +0 -0
  160. {validmind-2.4.13 → validmind-2.5.1}/validmind/tests/data_validation/TooManyZeroValues.py +0 -0
  161. {validmind-2.4.13 → validmind-2.5.1}/validmind/tests/data_validation/UniqueRows.py +0 -0
  162. {validmind-2.4.13 → validmind-2.5.1}/validmind/tests/data_validation/WOEBinPlots.py +0 -0
  163. {validmind-2.4.13 → validmind-2.5.1}/validmind/tests/data_validation/WOEBinTable.py +0 -0
  164. {validmind-2.4.13 → validmind-2.5.1}/validmind/tests/data_validation/ZivotAndrewsArch.py +0 -0
  165. {validmind-2.4.13 → validmind-2.5.1}/validmind/tests/data_validation/__init__.py +0 -0
  166. {validmind-2.4.13 → validmind-2.5.1}/validmind/tests/data_validation/nlp/CommonWords.py +0 -0
  167. {validmind-2.4.13 → validmind-2.5.1}/validmind/tests/data_validation/nlp/Hashtags.py +0 -0
  168. {validmind-2.4.13 → validmind-2.5.1}/validmind/tests/data_validation/nlp/LanguageDetection.py +0 -0
  169. {validmind-2.4.13 → validmind-2.5.1}/validmind/tests/data_validation/nlp/Mentions.py +0 -0
  170. {validmind-2.4.13 → validmind-2.5.1}/validmind/tests/data_validation/nlp/PolarityAndSubjectivity.py +0 -0
  171. {validmind-2.4.13 → validmind-2.5.1}/validmind/tests/data_validation/nlp/Punctuations.py +0 -0
  172. {validmind-2.4.13 → validmind-2.5.1}/validmind/tests/data_validation/nlp/Sentiment.py +0 -0
  173. {validmind-2.4.13 → validmind-2.5.1}/validmind/tests/data_validation/nlp/StopWords.py +0 -0
  174. {validmind-2.4.13 → validmind-2.5.1}/validmind/tests/data_validation/nlp/TextDescription.py +0 -0
  175. {validmind-2.4.13 → validmind-2.5.1}/validmind/tests/data_validation/nlp/Toxicity.py +0 -0
  176. {validmind-2.4.13 → validmind-2.5.1}/validmind/tests/data_validation/nlp/__init__.py +0 -0
  177. {validmind-2.4.13 → validmind-2.5.1}/validmind/tests/decorator.py +0 -0
  178. {validmind-2.4.13 → validmind-2.5.1}/validmind/tests/load.py +0 -0
  179. {validmind-2.4.13 → validmind-2.5.1}/validmind/tests/metadata.py +0 -0
  180. {validmind-2.4.13 → validmind-2.5.1}/validmind/tests/model_validation/BertScore.py +0 -0
  181. {validmind-2.4.13 → validmind-2.5.1}/validmind/tests/model_validation/BleuScore.py +0 -0
  182. {validmind-2.4.13 → validmind-2.5.1}/validmind/tests/model_validation/ClusterSizeDistribution.py +0 -0
  183. {validmind-2.4.13 → validmind-2.5.1}/validmind/tests/model_validation/ContextualRecall.py +0 -0
  184. {validmind-2.4.13 → validmind-2.5.1}/validmind/tests/model_validation/FeaturesAUC.py +0 -0
  185. {validmind-2.4.13 → validmind-2.5.1}/validmind/tests/model_validation/MeteorScore.py +0 -0
  186. {validmind-2.4.13 → validmind-2.5.1}/validmind/tests/model_validation/ModelMetadata.py +0 -0
  187. {validmind-2.4.13 → validmind-2.5.1}/validmind/tests/model_validation/ModelMetadataComparison.py +0 -0
  188. {validmind-2.4.13 → validmind-2.5.1}/validmind/tests/model_validation/ModelPredictionResiduals.py +0 -0
  189. {validmind-2.4.13 → validmind-2.5.1}/validmind/tests/model_validation/RegardScore.py +0 -0
  190. {validmind-2.4.13 → validmind-2.5.1}/validmind/tests/model_validation/RegressionResidualsPlot.py +0 -0
  191. {validmind-2.4.13 → validmind-2.5.1}/validmind/tests/model_validation/RougeScore.py +0 -0
  192. {validmind-2.4.13 → validmind-2.5.1}/validmind/tests/model_validation/TimeSeriesPredictionWithCI.py +0 -0
  193. {validmind-2.4.13 → validmind-2.5.1}/validmind/tests/model_validation/TimeSeriesPredictionsPlot.py +0 -0
  194. {validmind-2.4.13 → validmind-2.5.1}/validmind/tests/model_validation/TimeSeriesR2SquareBySegments.py +0 -0
  195. {validmind-2.4.13 → validmind-2.5.1}/validmind/tests/model_validation/TokenDisparity.py +0 -0
  196. {validmind-2.4.13 → validmind-2.5.1}/validmind/tests/model_validation/ToxicityScore.py +0 -0
  197. {validmind-2.4.13 → validmind-2.5.1}/validmind/tests/model_validation/__init__.py +0 -0
  198. {validmind-2.4.13 → validmind-2.5.1}/validmind/tests/model_validation/embeddings/ClusterDistribution.py +0 -0
  199. {validmind-2.4.13 → validmind-2.5.1}/validmind/tests/model_validation/embeddings/CosineSimilarityComparison.py +0 -0
  200. {validmind-2.4.13 → validmind-2.5.1}/validmind/tests/model_validation/embeddings/CosineSimilarityDistribution.py +0 -0
  201. {validmind-2.4.13 → validmind-2.5.1}/validmind/tests/model_validation/embeddings/CosineSimilarityHeatmap.py +0 -0
  202. {validmind-2.4.13 → validmind-2.5.1}/validmind/tests/model_validation/embeddings/DescriptiveAnalytics.py +0 -0
  203. {validmind-2.4.13 → validmind-2.5.1}/validmind/tests/model_validation/embeddings/EmbeddingsVisualization2D.py +0 -0
  204. {validmind-2.4.13 → validmind-2.5.1}/validmind/tests/model_validation/embeddings/EuclideanDistanceComparison.py +0 -0
  205. {validmind-2.4.13 → validmind-2.5.1}/validmind/tests/model_validation/embeddings/EuclideanDistanceHeatmap.py +0 -0
  206. {validmind-2.4.13 → validmind-2.5.1}/validmind/tests/model_validation/embeddings/PCAComponentsPairwisePlots.py +0 -0
  207. {validmind-2.4.13 → validmind-2.5.1}/validmind/tests/model_validation/embeddings/StabilityAnalysis.py +0 -0
  208. {validmind-2.4.13 → validmind-2.5.1}/validmind/tests/model_validation/embeddings/StabilityAnalysisKeyword.py +0 -0
  209. {validmind-2.4.13 → validmind-2.5.1}/validmind/tests/model_validation/embeddings/StabilityAnalysisRandomNoise.py +0 -0
  210. {validmind-2.4.13 → validmind-2.5.1}/validmind/tests/model_validation/embeddings/StabilityAnalysisSynonyms.py +0 -0
  211. {validmind-2.4.13 → validmind-2.5.1}/validmind/tests/model_validation/embeddings/StabilityAnalysisTranslation.py +0 -0
  212. {validmind-2.4.13 → validmind-2.5.1}/validmind/tests/model_validation/embeddings/TSNEComponentsPairwisePlots.py +0 -0
  213. {validmind-2.4.13 → validmind-2.5.1}/validmind/tests/model_validation/ragas/AnswerCorrectness.py +0 -0
  214. {validmind-2.4.13 → validmind-2.5.1}/validmind/tests/model_validation/ragas/AnswerRelevance.py +0 -0
  215. {validmind-2.4.13 → validmind-2.5.1}/validmind/tests/model_validation/ragas/AnswerSimilarity.py +0 -0
  216. {validmind-2.4.13 → validmind-2.5.1}/validmind/tests/model_validation/ragas/AspectCritique.py +0 -0
  217. {validmind-2.4.13 → validmind-2.5.1}/validmind/tests/model_validation/ragas/ContextEntityRecall.py +0 -0
  218. {validmind-2.4.13 → validmind-2.5.1}/validmind/tests/model_validation/ragas/ContextPrecision.py +0 -0
  219. {validmind-2.4.13 → validmind-2.5.1}/validmind/tests/model_validation/ragas/ContextRecall.py +0 -0
  220. {validmind-2.4.13 → validmind-2.5.1}/validmind/tests/model_validation/ragas/ContextRelevancy.py +0 -0
  221. {validmind-2.4.13 → validmind-2.5.1}/validmind/tests/model_validation/ragas/Faithfulness.py +0 -0
  222. {validmind-2.4.13 → validmind-2.5.1}/validmind/tests/model_validation/ragas/utils.py +0 -0
  223. {validmind-2.4.13 → validmind-2.5.1}/validmind/tests/model_validation/sklearn/AdjustedMutualInformation.py +0 -0
  224. {validmind-2.4.13 → validmind-2.5.1}/validmind/tests/model_validation/sklearn/AdjustedRandIndex.py +0 -0
  225. {validmind-2.4.13 → validmind-2.5.1}/validmind/tests/model_validation/sklearn/ClusterCosineSimilarity.py +0 -0
  226. {validmind-2.4.13 → validmind-2.5.1}/validmind/tests/model_validation/sklearn/ClusterPerformanceMetrics.py +0 -0
  227. {validmind-2.4.13 → validmind-2.5.1}/validmind/tests/model_validation/sklearn/CompletenessScore.py +0 -0
  228. {validmind-2.4.13 → validmind-2.5.1}/validmind/tests/model_validation/sklearn/ConfusionMatrix.py +0 -0
  229. {validmind-2.4.13 → validmind-2.5.1}/validmind/tests/model_validation/sklearn/FeatureImportanceComparison.py +0 -0
  230. {validmind-2.4.13 → validmind-2.5.1}/validmind/tests/model_validation/sklearn/FowlkesMallowsScore.py +0 -0
  231. {validmind-2.4.13 → validmind-2.5.1}/validmind/tests/model_validation/sklearn/HomogeneityScore.py +0 -0
  232. {validmind-2.4.13 → validmind-2.5.1}/validmind/tests/model_validation/sklearn/HyperParametersTuning.py +0 -0
  233. {validmind-2.4.13 → validmind-2.5.1}/validmind/tests/model_validation/sklearn/KMeansClustersOptimization.py +0 -0
  234. {validmind-2.4.13 → validmind-2.5.1}/validmind/tests/model_validation/sklearn/MinimumAccuracy.py +0 -0
  235. {validmind-2.4.13 → validmind-2.5.1}/validmind/tests/model_validation/sklearn/MinimumF1Score.py +0 -0
  236. {validmind-2.4.13 → validmind-2.5.1}/validmind/tests/model_validation/sklearn/ModelsPerformanceComparison.py +0 -0
  237. {validmind-2.4.13 → validmind-2.5.1}/validmind/tests/model_validation/sklearn/PermutationFeatureImportance.py +0 -0
  238. {validmind-2.4.13 → validmind-2.5.1}/validmind/tests/model_validation/sklearn/PopulationStabilityIndex.py +0 -0
  239. {validmind-2.4.13 → validmind-2.5.1}/validmind/tests/model_validation/sklearn/ROCCurve.py +0 -0
  240. {validmind-2.4.13 → validmind-2.5.1}/validmind/tests/model_validation/sklearn/RegressionErrors.py +0 -0
  241. {validmind-2.4.13 → validmind-2.5.1}/validmind/tests/model_validation/sklearn/RegressionErrorsComparison.py +0 -0
  242. {validmind-2.4.13 → validmind-2.5.1}/validmind/tests/model_validation/sklearn/RegressionModelsPerformanceComparison.py +0 -0
  243. {validmind-2.4.13 → validmind-2.5.1}/validmind/tests/model_validation/sklearn/RegressionR2Square.py +0 -0
  244. {validmind-2.4.13 → validmind-2.5.1}/validmind/tests/model_validation/sklearn/RegressionR2SquareComparison.py +0 -0
  245. {validmind-2.4.13 → validmind-2.5.1}/validmind/tests/model_validation/sklearn/SHAPGlobalImportance.py +0 -0
  246. {validmind-2.4.13 → validmind-2.5.1}/validmind/tests/model_validation/sklearn/SilhouettePlot.py +0 -0
  247. {validmind-2.4.13 → validmind-2.5.1}/validmind/tests/model_validation/sklearn/TrainingTestDegradation.py +0 -0
  248. {validmind-2.4.13 → validmind-2.5.1}/validmind/tests/model_validation/sklearn/VMeasure.py +0 -0
  249. {validmind-2.4.13 → validmind-2.5.1}/validmind/tests/model_validation/sklearn/WeakspotsDiagnosis.py +0 -0
  250. {validmind-2.4.13 → validmind-2.5.1}/validmind/tests/model_validation/sklearn/__init__.py +0 -0
  251. {validmind-2.4.13 → validmind-2.5.1}/validmind/tests/model_validation/statsmodels/AutoARIMA.py +0 -0
  252. {validmind-2.4.13 → validmind-2.5.1}/validmind/tests/model_validation/statsmodels/BoxPierce.py +0 -0
  253. {validmind-2.4.13 → validmind-2.5.1}/validmind/tests/model_validation/statsmodels/CumulativePredictionProbabilities.py +0 -0
  254. {validmind-2.4.13 → validmind-2.5.1}/validmind/tests/model_validation/statsmodels/DurbinWatsonTest.py +0 -0
  255. {validmind-2.4.13 → validmind-2.5.1}/validmind/tests/model_validation/statsmodels/GINITable.py +0 -0
  256. {validmind-2.4.13 → validmind-2.5.1}/validmind/tests/model_validation/statsmodels/JarqueBera.py +0 -0
  257. {validmind-2.4.13 → validmind-2.5.1}/validmind/tests/model_validation/statsmodels/KolmogorovSmirnov.py +0 -0
  258. {validmind-2.4.13 → validmind-2.5.1}/validmind/tests/model_validation/statsmodels/LJungBox.py +0 -0
  259. {validmind-2.4.13 → validmind-2.5.1}/validmind/tests/model_validation/statsmodels/Lilliefors.py +0 -0
  260. {validmind-2.4.13 → validmind-2.5.1}/validmind/tests/model_validation/statsmodels/PredictionProbabilitiesHistogram.py +0 -0
  261. {validmind-2.4.13 → validmind-2.5.1}/validmind/tests/model_validation/statsmodels/RegressionCoeffsPlot.py +0 -0
  262. {validmind-2.4.13 → validmind-2.5.1}/validmind/tests/model_validation/statsmodels/RegressionFeatureSignificance.py +0 -0
  263. {validmind-2.4.13 → validmind-2.5.1}/validmind/tests/model_validation/statsmodels/RegressionModelForecastPlot.py +0 -0
  264. {validmind-2.4.13 → validmind-2.5.1}/validmind/tests/model_validation/statsmodels/RegressionModelForecastPlotLevels.py +0 -0
  265. {validmind-2.4.13 → validmind-2.5.1}/validmind/tests/model_validation/statsmodels/RegressionModelSensitivityPlot.py +0 -0
  266. {validmind-2.4.13 → validmind-2.5.1}/validmind/tests/model_validation/statsmodels/RegressionModelSummary.py +0 -0
  267. {validmind-2.4.13 → validmind-2.5.1}/validmind/tests/model_validation/statsmodels/RegressionModelsCoeffs.py +0 -0
  268. {validmind-2.4.13 → validmind-2.5.1}/validmind/tests/model_validation/statsmodels/RegressionPermutationFeatureImportance.py +0 -0
  269. {validmind-2.4.13 → validmind-2.5.1}/validmind/tests/model_validation/statsmodels/RunsTest.py +0 -0
  270. {validmind-2.4.13 → validmind-2.5.1}/validmind/tests/model_validation/statsmodels/ScorecardHistogram.py +0 -0
  271. {validmind-2.4.13 → validmind-2.5.1}/validmind/tests/model_validation/statsmodels/ShapiroWilk.py +0 -0
  272. {validmind-2.4.13 → validmind-2.5.1}/validmind/tests/model_validation/statsmodels/__init__.py +0 -0
  273. {validmind-2.4.13 → validmind-2.5.1}/validmind/tests/model_validation/statsmodels/statsutils.py +0 -0
  274. {validmind-2.4.13 → validmind-2.5.1}/validmind/tests/prompt_validation/Bias.py +0 -0
  275. {validmind-2.4.13 → validmind-2.5.1}/validmind/tests/prompt_validation/Clarity.py +0 -0
  276. {validmind-2.4.13 → validmind-2.5.1}/validmind/tests/prompt_validation/Conciseness.py +0 -0
  277. {validmind-2.4.13 → validmind-2.5.1}/validmind/tests/prompt_validation/Delimitation.py +0 -0
  278. {validmind-2.4.13 → validmind-2.5.1}/validmind/tests/prompt_validation/NegativeInstruction.py +0 -0
  279. {validmind-2.4.13 → validmind-2.5.1}/validmind/tests/prompt_validation/Robustness.py +0 -0
  280. {validmind-2.4.13 → validmind-2.5.1}/validmind/tests/prompt_validation/Specificity.py +0 -0
  281. {validmind-2.4.13 → validmind-2.5.1}/validmind/tests/prompt_validation/__init__.py +0 -0
  282. {validmind-2.4.13 → validmind-2.5.1}/validmind/tests/prompt_validation/ai_powered_test.py +0 -0
  283. {validmind-2.4.13 → validmind-2.5.1}/validmind/tests/run.py +0 -0
  284. {validmind-2.4.13 → validmind-2.5.1}/validmind/tests/test_providers.py +0 -0
  285. {validmind-2.4.13 → validmind-2.5.1}/validmind/tests/utils.py +0 -0
  286. {validmind-2.4.13 → validmind-2.5.1}/validmind/unit_metrics/__init__.py +0 -0
  287. {validmind-2.4.13 → validmind-2.5.1}/validmind/unit_metrics/classification/sklearn/Accuracy.py +0 -0
  288. {validmind-2.4.13 → validmind-2.5.1}/validmind/unit_metrics/classification/sklearn/F1.py +0 -0
  289. {validmind-2.4.13 → validmind-2.5.1}/validmind/unit_metrics/classification/sklearn/Precision.py +0 -0
  290. {validmind-2.4.13 → validmind-2.5.1}/validmind/unit_metrics/classification/sklearn/Recall.py +0 -0
  291. {validmind-2.4.13 → validmind-2.5.1}/validmind/unit_metrics/composite.py +0 -0
  292. {validmind-2.4.13 → validmind-2.5.1}/validmind/unit_metrics/regression/GiniCoefficient.py +0 -0
  293. {validmind-2.4.13 → validmind-2.5.1}/validmind/unit_metrics/regression/HuberLoss.py +0 -0
  294. {validmind-2.4.13 → validmind-2.5.1}/validmind/unit_metrics/regression/KolmogorovSmirnovStatistic.py +0 -0
  295. {validmind-2.4.13 → validmind-2.5.1}/validmind/unit_metrics/regression/MeanAbsolutePercentageError.py +0 -0
  296. {validmind-2.4.13 → validmind-2.5.1}/validmind/unit_metrics/regression/MeanBiasDeviation.py +0 -0
  297. {validmind-2.4.13 → validmind-2.5.1}/validmind/unit_metrics/regression/QuantileLoss.py +0 -0
  298. {validmind-2.4.13 → validmind-2.5.1}/validmind/unit_metrics/regression/sklearn/AdjustedRSquaredScore.py +0 -0
  299. {validmind-2.4.13 → validmind-2.5.1}/validmind/unit_metrics/regression/sklearn/MeanAbsoluteError.py +0 -0
  300. {validmind-2.4.13 → validmind-2.5.1}/validmind/unit_metrics/regression/sklearn/MeanSquaredError.py +0 -0
  301. {validmind-2.4.13 → validmind-2.5.1}/validmind/unit_metrics/regression/sklearn/RSquaredScore.py +0 -0
  302. {validmind-2.4.13 → validmind-2.5.1}/validmind/unit_metrics/regression/sklearn/RootMeanSquaredError.py +0 -0
  303. {validmind-2.4.13 → validmind-2.5.1}/validmind/vm_models/__init__.py +0 -0
  304. {validmind-2.4.13 → validmind-2.5.1}/validmind/vm_models/dataset/__init__.py +0 -0
  305. {validmind-2.4.13 → validmind-2.5.1}/validmind/vm_models/dataset/utils.py +0 -0
  306. {validmind-2.4.13 → validmind-2.5.1}/validmind/vm_models/figure.py +0 -0
  307. {validmind-2.4.13 → validmind-2.5.1}/validmind/vm_models/input.py +0 -0
  308. {validmind-2.4.13 → validmind-2.5.1}/validmind/vm_models/model.py +0 -0
  309. {validmind-2.4.13 → validmind-2.5.1}/validmind/vm_models/test/metric.py +0 -0
  310. {validmind-2.4.13 → validmind-2.5.1}/validmind/vm_models/test/metric_result.py +0 -0
  311. {validmind-2.4.13 → validmind-2.5.1}/validmind/vm_models/test/output_template.py +0 -0
  312. {validmind-2.4.13 → validmind-2.5.1}/validmind/vm_models/test/result_summary.py +0 -0
  313. {validmind-2.4.13 → validmind-2.5.1}/validmind/vm_models/test/result_wrapper.py +0 -0
  314. {validmind-2.4.13 → validmind-2.5.1}/validmind/vm_models/test/test.py +0 -0
  315. {validmind-2.4.13 → validmind-2.5.1}/validmind/vm_models/test/threshold_test.py +0 -0
  316. {validmind-2.4.13 → validmind-2.5.1}/validmind/vm_models/test/threshold_test_result.py +0 -0
  317. {validmind-2.4.13 → validmind-2.5.1}/validmind/vm_models/test_context.py +0 -0
  318. {validmind-2.4.13 → validmind-2.5.1}/validmind/vm_models/test_suite/runner.py +0 -0
  319. {validmind-2.4.13 → validmind-2.5.1}/validmind/vm_models/test_suite/summary.py +0 -0
  320. {validmind-2.4.13 → validmind-2.5.1}/validmind/vm_models/test_suite/test.py +0 -0
  321. {validmind-2.4.13 → validmind-2.5.1}/validmind/vm_models/test_suite/test_suite.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: validmind
3
- Version: 2.4.13
3
+ Version: 2.5.1
4
4
  Summary: ValidMind Developer Framework
5
5
  License: Commercial License
6
6
  Author: Andres Rodriguez
@@ -10,7 +10,7 @@ description = "ValidMind Developer Framework"
10
10
  license = "Commercial License"
11
11
  name = "validmind"
12
12
  readme = "README.pypi.md"
13
- version = "2.4.13"
13
+ version = "2.5.1"
14
14
 
15
15
  [tool.poetry.dependencies]
16
16
  python = ">=3.8.1,<3.12"
@@ -0,0 +1 @@
1
+ __version__ = "2.5.1"
@@ -113,6 +113,10 @@ TestID = Literal[
113
113
  "validmind.model_validation.statsmodels.GINITable",
114
114
  "validmind.model_validation.statsmodels.RegressionModelForecastPlot",
115
115
  "validmind.model_validation.statsmodels.DurbinWatsonTest",
116
+ "validmind.ongoing_monitoring.PredictionCorrelation",
117
+ "validmind.ongoing_monitoring.PredictionAcrossEachFeature",
118
+ "validmind.ongoing_monitoring.FeatureDrift",
119
+ "validmind.ongoing_monitoring.TargetPredictionDistributionPlot",
116
120
  "validmind.data_validation.MissingValuesRisk",
117
121
  "validmind.data_validation.IQROutliersTable",
118
122
  "validmind.data_validation.BivariateFeaturesBarPlots",
@@ -4,7 +4,7 @@
4
4
 
5
5
  from dataclasses import dataclass
6
6
 
7
- from numpy import unique
7
+ import numpy as np
8
8
  from sklearn.metrics import classification_report, roc_auc_score
9
9
  from sklearn.preprocessing import LabelBinarizer
10
10
 
@@ -71,7 +71,7 @@ class ClassifierPerformance(Metric):
71
71
  When building a multi-class summary we need to calculate weighted average,
72
72
  macro average and per class metrics.
73
73
  """
74
- classes = {str(i) for i in unique(self.inputs.dataset.y)}
74
+ classes = {str(i) for i in np.unique(self.inputs.dataset.y)}
75
75
  pr_f1_table = [
76
76
  {
77
77
  "Class": class_name,
@@ -126,9 +126,18 @@ class ClassifierPerformance(Metric):
126
126
  output_dict=True,
127
127
  zero_division=0,
128
128
  )
129
- report["roc_auc"] = multiclass_roc_auc_score(
130
- self.inputs.dataset.y,
131
- self.inputs.dataset.y_pred(self.inputs.model),
132
- )
129
+
130
+ y_true = self.inputs.dataset.y
131
+
132
+ if len(np.unique(y_true)) > 2:
133
+ y_pred = self.inputs.dataset.y_pred(self.inputs.model)
134
+ y_true = y_true.astype(y_pred.dtype)
135
+ roc_auc = self.multiclass_roc_auc_score(y_true, y_pred)
136
+ else:
137
+ y_prob = self.inputs.dataset.y_prob(self.inputs.model)
138
+ y_true = y_true.astype(y_prob.dtype).flatten()
139
+ roc_auc = roc_auc_score(y_true, y_prob)
140
+
141
+ report["roc_auc"] = roc_auc
133
142
 
134
143
  return self.cache_results(report)
@@ -57,7 +57,7 @@ class ClusterPerformance(Metric):
57
57
  "model_performance",
58
58
  ]
59
59
 
60
- def cluser_performance_metrics(
60
+ def cluster_performance_metrics(
61
61
  self, y_true_train, y_pred_train, y_true_test, y_pred_test, samples, metric_info
62
62
  ):
63
63
  y_true_train = y_true_train.astype(y_pred_train.dtype).flatten()
@@ -107,7 +107,7 @@ class ClusterPerformance(Metric):
107
107
  y_true_test = y_true_test.astype(class_pred_test.dtype)
108
108
 
109
109
  samples = ["train", "test"]
110
- results = self.cluser_performance_metrics(
110
+ results = self.cluster_performance_metrics(
111
111
  y_true_train,
112
112
  class_pred_train,
113
113
  y_true_test,
@@ -5,6 +5,7 @@
5
5
  from dataclasses import dataclass
6
6
  from typing import List
7
7
 
8
+ import numpy as np
8
9
  import pandas as pd
9
10
  from sklearn import metrics, preprocessing
10
11
 
@@ -99,9 +100,15 @@ class MinimumROCAUCScore(ThresholdTest):
99
100
 
100
101
  def run(self):
101
102
  y_true = self.inputs.dataset.y
102
- class_pred = self.inputs.dataset.y_pred(self.inputs.model)
103
- y_true = y_true.astype(class_pred.dtype)
104
- roc_auc = self.multiclass_roc_auc_score(y_true, class_pred)
103
+
104
+ if len(np.unique(y_true)) > 2:
105
+ class_pred = self.inputs.dataset.y_pred(self.inputs.model)
106
+ y_true = y_true.astype(class_pred.dtype)
107
+ roc_auc = self.multiclass_roc_auc_score(y_true, class_pred)
108
+ else:
109
+ y_prob = self.inputs.dataset.y_prob(self.inputs.model)
110
+ y_true = y_true.astype(y_prob.dtype).flatten()
111
+ roc_auc = metrics.roc_auc_score(y_true, y_prob)
105
112
 
106
113
  passed = roc_auc > self.params["min_threshold"]
107
114
  results = [
@@ -0,0 +1,395 @@
1
+ # Copyright © 2023-2024 ValidMind Inc. All rights reserved.
2
+ # See the LICENSE file in the root of this repository for details.
3
+ # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
+
5
+ from dataclasses import dataclass
6
+ from typing import List
7
+
8
+ import matplotlib.pyplot as plt
9
+ import numpy as np
10
+ import pandas as pd
11
+ import seaborn as sns
12
+ from sklearn import metrics
13
+
14
+ from validmind.logging import get_logger
15
+ from validmind.vm_models import (
16
+ Figure,
17
+ ResultSummary,
18
+ ResultTable,
19
+ ResultTableMetadata,
20
+ ThresholdTest,
21
+ ThresholdTestResult,
22
+ VMDataset,
23
+ VMModel,
24
+ )
25
+
26
+ logger = get_logger(__name__)
27
+
28
+ DEFAULT_THRESHOLD = 0.04
29
+ PERFORMANCE_METRICS = {
30
+ "accuracy": {
31
+ "function": metrics.accuracy_score,
32
+ "is_classification": True,
33
+ "is_lower_better": False,
34
+ },
35
+ "auc": {
36
+ "function": metrics.roc_auc_score,
37
+ "is_classification": True,
38
+ "is_lower_better": False,
39
+ },
40
+ "f1": {
41
+ "function": metrics.f1_score,
42
+ "is_classification": True,
43
+ "is_lower_better": False,
44
+ },
45
+ "precision": {
46
+ "function": metrics.precision_score,
47
+ "is_classification": True,
48
+ "is_lower_better": False,
49
+ },
50
+ "recall": {
51
+ "function": metrics.recall_score,
52
+ "is_classification": True,
53
+ "is_lower_better": False,
54
+ },
55
+ "mse": {
56
+ "function": metrics.mean_squared_error,
57
+ "is_classification": False,
58
+ "is_lower_better": True,
59
+ },
60
+ "mae": {
61
+ "function": metrics.mean_absolute_error,
62
+ "is_classification": False,
63
+ "is_lower_better": True,
64
+ },
65
+ "r2": {
66
+ "function": metrics.r2_score,
67
+ "is_classification": False,
68
+ "is_lower_better": False,
69
+ },
70
+ "mape": {
71
+ "function": metrics.mean_absolute_percentage_error,
72
+ "is_classification": False,
73
+ "is_lower_better": True,
74
+ },
75
+ }
76
+
77
+
78
+ def _prepare_results(
79
+ results_train: dict, results_test: dict, metric: str
80
+ ) -> pd.DataFrame:
81
+ results_train = pd.DataFrame(results_train)
82
+ results_test = pd.DataFrame(results_test)
83
+ results = results_train.copy()
84
+ results.rename(
85
+ columns={"shape": "training records", f"{metric}": f"training {metric}"},
86
+ inplace=True,
87
+ )
88
+ results[f"test {metric}"] = results_test[metric]
89
+
90
+ # Adjust gap calculation based on metric directionality
91
+ if PERFORMANCE_METRICS[metric]["is_lower_better"]:
92
+ results["gap"] = results[f"test {metric}"] - results[f"training {metric}"]
93
+ else:
94
+ results["gap"] = results[f"training {metric}"] - results[f"test {metric}"]
95
+
96
+ return results
97
+
98
+
99
+ def _compute_metrics(
100
+ results: dict,
101
+ region: str,
102
+ df_region: pd.DataFrame,
103
+ target_column: str,
104
+ prob_column: str,
105
+ pred_column: str,
106
+ feature_column: str,
107
+ metric: str,
108
+ is_classification: bool,
109
+ ) -> None:
110
+ results["slice"].append(str(region))
111
+ results["shape"].append(df_region.shape[0])
112
+ results["feature"].append(feature_column)
113
+
114
+ # Check if any records
115
+ if df_region.empty:
116
+ results[metric].append(0)
117
+ return
118
+
119
+ metric_func = PERFORMANCE_METRICS[metric]["function"]
120
+ y_true = df_region[target_column].values
121
+
122
+ # AUC requires probability scores
123
+ if is_classification and metric == "auc":
124
+ # if only one class is present in the data, return 0
125
+ if len(np.unique(y_true)) == 1:
126
+ results[metric].append(0)
127
+ return
128
+
129
+ score = metric_func(y_true, df_region[prob_column].values)
130
+
131
+ # All other classification metrics
132
+ elif is_classification:
133
+ score = metric_func(y_true, df_region[pred_column].values)
134
+
135
+ # Regression metrics
136
+ else:
137
+ score = metric_func(y_true, df_region[pred_column].values)
138
+
139
+ results[metric].append(score)
140
+
141
+
142
+ def _plot_overfit_regions(
143
+ df: pd.DataFrame, feature_column: str, threshold: float, metric: str
144
+ ) -> plt.Figure:
145
+ fig, ax = plt.subplots()
146
+ barplot = sns.barplot(data=df, x="slice", y="gap", ax=ax)
147
+ ax.tick_params(axis="x", rotation=90)
148
+
149
+ # Draw threshold line
150
+ axhline = ax.axhline(
151
+ y=threshold,
152
+ color="red",
153
+ linestyle="--",
154
+ linewidth=1,
155
+ label=f"Cut-Off Threshold: {threshold}",
156
+ )
157
+ ax.tick_params(axis="x", labelsize=20)
158
+ ax.tick_params(axis="y", labelsize=20)
159
+
160
+ ax.set_ylabel(f"{metric.upper()} Gap", weight="bold", fontsize=18)
161
+ ax.set_xlabel("Slice/Segments", weight="bold", fontsize=18)
162
+ ax.set_title(
163
+ f"Overfit regions in feature column: {feature_column}",
164
+ weight="bold",
165
+ fontsize=20,
166
+ wrap=True,
167
+ )
168
+
169
+ handles, labels = barplot.get_legend_handles_labels()
170
+ handles.append(axhline)
171
+ labels.append(axhline.get_label())
172
+
173
+ barplot.legend(
174
+ handles=handles[:-1],
175
+ labels=labels,
176
+ loc="upper center",
177
+ bbox_to_anchor=(0.5, 0.1),
178
+ ncol=len(handles),
179
+ )
180
+
181
+ plt.close("all")
182
+
183
+ return fig
184
+
185
+
186
+ # TODO: make this a functional test instead of class-based when appropriate
187
+ # simply have to remove the class and rename this func to OverfitDiagnosis
188
+ def overfit_diagnosis( # noqa: C901
189
+ model: VMModel,
190
+ datasets: List[VMDataset],
191
+ metric: str = None,
192
+ cut_off_threshold: float = DEFAULT_THRESHOLD,
193
+ ):
194
+ """Identify overfit regions in a model's predictions.
195
+
196
+ This test compares the model's performance on training versus test data, grouped by
197
+ feature columns. It calculates the difference between the training and test performance
198
+ for each group and identifies regions where the difference exceeds a specified threshold.
199
+
200
+ This test works for both classification and regression models and with a variety of
201
+ performance metrics. By default, it uses the AUC metric for classification models and
202
+ the MSE metric for regression models. The threshold for identifying overfit regions
203
+ defaults to 0.04 but should be adjusted based on the specific use case.
204
+
205
+ ## Inputs
206
+ - `model` (VMModel): The ValidMind model object to evaluate.
207
+ - `datasets` (List[VMDataset]): A list of two VMDataset objects where the first dataset
208
+ is the training data and the second dataset is the test data.
209
+
210
+ ## Parameters
211
+ - `metric` (str, optional): The performance metric to use for evaluation. Choose from:
212
+ 'accuracy', 'auc', 'f1', 'precision', 'recall', 'mse', 'mae', 'r2', 'mape'.
213
+ Defaults to 'auc' for classification models and 'mse' for regression models.
214
+ - `cut_off_threshold` (float, optional): The threshold for identifying overfit regions.
215
+ Defaults to 0.04.
216
+ """
217
+
218
+ # Determine if it's a classification or regression model
219
+ is_classification = bool(datasets[0].probability_column(model))
220
+
221
+ # Set default metric if not provided
222
+ if metric is None:
223
+ metric = "auc" if is_classification else "mse"
224
+ logger.info(
225
+ f"Using default {'classification' if is_classification else 'regression'} metric: {metric}"
226
+ )
227
+
228
+ if id(cut_off_threshold) == id(DEFAULT_THRESHOLD):
229
+ logger.info("Using default cut-off threshold of 0.04")
230
+
231
+ metric = metric.lower()
232
+ try:
233
+ _metric = PERFORMANCE_METRICS[metric.lower()]
234
+ except KeyError:
235
+ raise ValueError(
236
+ f"Invalid metric. Choose from: {', '.join(PERFORMANCE_METRICS.keys())}"
237
+ )
238
+
239
+ if is_classification and not _metric["is_classification"]:
240
+ raise ValueError(f"Cannot use regression metric ({metric}) for classification.")
241
+ elif not is_classification and _metric["is_classification"]:
242
+ raise ValueError(f"Cannot use classification metric ({metric}) for regression.")
243
+
244
+ train_df = datasets[0].df
245
+ test_df = datasets[1].df
246
+
247
+ pred_column = f"{datasets[0].target_column}_pred"
248
+ prob_column = f"{datasets[0].target_column}_prob"
249
+
250
+ train_df[pred_column] = datasets[0].y_pred(model)
251
+ test_df[pred_column] = datasets[1].y_pred(model)
252
+
253
+ if is_classification:
254
+ train_df[prob_column] = datasets[0].y_prob(model)
255
+ test_df[prob_column] = datasets[1].y_prob(model)
256
+
257
+ test_results = []
258
+ test_figures = []
259
+ results_headers = ["slice", "shape", "feature", metric]
260
+
261
+ for feature_column in datasets[0].feature_columns:
262
+ bins = 10
263
+ if feature_column in datasets[0].feature_columns_categorical:
264
+ bins = len(train_df[feature_column].unique())
265
+ train_df["bin"] = pd.cut(train_df[feature_column], bins=bins)
266
+
267
+ results_train = {k: [] for k in results_headers}
268
+ results_test = {k: [] for k in results_headers}
269
+
270
+ for region, df_region in train_df.groupby("bin"):
271
+ _compute_metrics(
272
+ results=results_train,
273
+ region=region,
274
+ df_region=df_region,
275
+ feature_column=feature_column,
276
+ target_column=datasets[0].target_column,
277
+ prob_column=prob_column,
278
+ pred_column=pred_column,
279
+ metric=metric,
280
+ is_classification=is_classification,
281
+ )
282
+ df_test_region = test_df[
283
+ (test_df[feature_column] > region.left)
284
+ & (test_df[feature_column] <= region.right)
285
+ ]
286
+ _compute_metrics(
287
+ results=results_test,
288
+ region=region,
289
+ df_region=df_test_region,
290
+ feature_column=feature_column,
291
+ target_column=datasets[1].target_column,
292
+ prob_column=prob_column,
293
+ pred_column=pred_column,
294
+ metric=metric,
295
+ is_classification=is_classification,
296
+ )
297
+
298
+ results = _prepare_results(results_train, results_test, metric)
299
+
300
+ fig = _plot_overfit_regions(results, feature_column, cut_off_threshold, metric)
301
+ test_figures.append(
302
+ Figure(
303
+ key=f"overfit_diagnosis:{metric}:{feature_column}",
304
+ figure=fig,
305
+ metadata={
306
+ "metric": metric,
307
+ "cut_off_threshold": cut_off_threshold,
308
+ "feature": feature_column,
309
+ },
310
+ )
311
+ )
312
+
313
+ for _, row in results[results["gap"] > cut_off_threshold].iterrows():
314
+ test_results.append(
315
+ {
316
+ "Feature": feature_column,
317
+ "Slice": row["slice"],
318
+ "Number of Records": row["training records"],
319
+ f"Training {metric.upper()}": row[f"training {metric}"],
320
+ f"Test {metric.upper()}": row[f"test {metric}"],
321
+ "Gap": row["gap"],
322
+ }
323
+ )
324
+
325
+ return {"Overfit Diagnosis": test_results}, *test_figures
326
+
327
+
328
+ @dataclass
329
+ class OverfitDiagnosis(ThresholdTest):
330
+ """Identify overfit regions in a model's predictions.
331
+
332
+ This test compares the model's performance on training versus test data, grouped by
333
+ feature columns. It calculates the difference between the training and test performance
334
+ for each group and identifies regions where the difference exceeds a specified threshold.
335
+
336
+ This test works for both classification and regression models and with a variety of
337
+ performance metrics. By default, it uses the AUC metric for classification models and
338
+ the MSE metric for regression models. The threshold for identifying overfit regions
339
+ defaults to 0.04 but should be adjusted based on the specific use case.
340
+
341
+ ## Inputs
342
+ - `model` (VMModel): The ValidMind model object to evaluate.
343
+ - `datasets` (List[VMDataset]): A list of two VMDataset objects where the first dataset
344
+ is the training data and the second dataset is the test data.
345
+
346
+ ## Parameters
347
+ - `metric` (str, optional): The performance metric to use for evaluation. Choose from:
348
+ 'accuracy', 'auc', 'f1', 'precision', 'recall', 'mse', 'mae', 'r2', 'mape'.
349
+ Defaults to 'auc' for classification models and 'mse' for regression models.
350
+ - `cut_off_threshold` (float, optional): The threshold for identifying overfit regions.
351
+ Defaults to 0.04.
352
+ """
353
+
354
+ required_inputs = ["model", "datasets"]
355
+ default_params = {"metric": None, "cut_off_threshold": DEFAULT_THRESHOLD}
356
+ tasks = ["classification", "regression"]
357
+ tags = [
358
+ "sklearn",
359
+ "binary_classification",
360
+ "multiclass_classification",
361
+ "linear_regression",
362
+ "model_diagnosis",
363
+ ]
364
+
365
+ def run(self):
366
+ func_result = overfit_diagnosis(
367
+ self.inputs.model,
368
+ self.inputs.datasets,
369
+ metric=self.params["metric"],
370
+ cut_off_threshold=self.params["cut_off_threshold"],
371
+ )
372
+
373
+ return self.cache_results(
374
+ test_results_list=[
375
+ ThresholdTestResult(
376
+ test_name=self.params["metric"],
377
+ column=row["Feature"],
378
+ passed=False,
379
+ values={k: v for k, v in row.items()},
380
+ )
381
+ for row in func_result[0]["Overfit Diagnosis"]
382
+ ],
383
+ passed=(not func_result[0]["Overfit Diagnosis"]),
384
+ figures=func_result[1:],
385
+ )
386
+
387
+ def summary(self, results, _):
388
+ return ResultSummary(
389
+ results=[
390
+ ResultTable(
391
+ data=[result.values for result in results],
392
+ metadata=ResultTableMetadata(title="Overfit Diagnosis"),
393
+ )
394
+ ],
395
+ )
@@ -65,7 +65,7 @@ class PrecisionRecallCurve(Metric):
65
65
  raise SkipTestError("Skipping PrecisionRecallCurve for Foundation models")
66
66
 
67
67
  y_true = self.inputs.dataset.y
68
- y_pred = self.inputs.model.predict_proba(self.inputs.dataset.x)
68
+ y_pred = self.inputs.dataset.y_prob(self.inputs.model)
69
69
 
70
70
  # PR curve is only supported for binary classification
71
71
  if len(np.unique(y_true)) > 2: