validmind 2.5.2__tar.gz → 2.5.8__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (320) hide show
  1. {validmind-2.5.2 → validmind-2.5.8}/PKG-INFO +1 -1
  2. {validmind-2.5.2 → validmind-2.5.8}/pyproject.toml +1 -1
  3. validmind-2.5.8/validmind/__version__.py +1 -0
  4. {validmind-2.5.2 → validmind-2.5.8}/validmind/client.py +6 -0
  5. {validmind-2.5.2 → validmind-2.5.8}/validmind/tests/__types__.py +0 -1
  6. {validmind-2.5.2 → validmind-2.5.8}/validmind/tests/model_validation/ragas/AnswerCorrectness.py +1 -1
  7. {validmind-2.5.2 → validmind-2.5.8}/validmind/tests/model_validation/ragas/AnswerRelevance.py +1 -1
  8. {validmind-2.5.2 → validmind-2.5.8}/validmind/tests/model_validation/ragas/AnswerSimilarity.py +1 -1
  9. {validmind-2.5.2 → validmind-2.5.8}/validmind/tests/model_validation/ragas/AspectCritique.py +1 -1
  10. {validmind-2.5.2 → validmind-2.5.8}/validmind/tests/model_validation/ragas/ContextEntityRecall.py +1 -1
  11. {validmind-2.5.2 → validmind-2.5.8}/validmind/tests/model_validation/ragas/ContextPrecision.py +1 -1
  12. {validmind-2.5.2 → validmind-2.5.8}/validmind/tests/model_validation/ragas/ContextRecall.py +1 -1
  13. {validmind-2.5.2 → validmind-2.5.8}/validmind/tests/model_validation/ragas/Faithfulness.py +1 -1
  14. {validmind-2.5.2 → validmind-2.5.8}/validmind/tests/model_validation/sklearn/OverfitDiagnosis.py +17 -36
  15. validmind-2.5.8/validmind/tests/model_validation/sklearn/RobustnessDiagnosis.py +415 -0
  16. {validmind-2.5.2 → validmind-2.5.8}/validmind/tests/run.py +1 -1
  17. {validmind-2.5.2 → validmind-2.5.8}/validmind/vm_models/dataset/utils.py +9 -2
  18. {validmind-2.5.2 → validmind-2.5.8}/validmind/vm_models/model.py +12 -1
  19. validmind-2.5.2/validmind/__version__.py +0 -1
  20. validmind-2.5.2/validmind/tests/model_validation/ragas/ContextRelevancy.py +0 -119
  21. validmind-2.5.2/validmind/tests/model_validation/sklearn/RobustnessDiagnosis.py +0 -328
  22. {validmind-2.5.2 → validmind-2.5.8}/LICENSE +0 -0
  23. {validmind-2.5.2 → validmind-2.5.8}/README.pypi.md +0 -0
  24. {validmind-2.5.2 → validmind-2.5.8}/validmind/__init__.py +0 -0
  25. {validmind-2.5.2 → validmind-2.5.8}/validmind/ai/test_descriptions.py +0 -0
  26. {validmind-2.5.2 → validmind-2.5.8}/validmind/ai/utils.py +0 -0
  27. {validmind-2.5.2 → validmind-2.5.8}/validmind/api_client.py +0 -0
  28. {validmind-2.5.2 → validmind-2.5.8}/validmind/client_config.py +0 -0
  29. {validmind-2.5.2 → validmind-2.5.8}/validmind/datasets/__init__.py +0 -0
  30. {validmind-2.5.2 → validmind-2.5.8}/validmind/datasets/classification/__init__.py +0 -0
  31. {validmind-2.5.2 → validmind-2.5.8}/validmind/datasets/classification/customer_churn.py +0 -0
  32. {validmind-2.5.2 → validmind-2.5.8}/validmind/datasets/classification/datasets/bank_customer_churn.csv +0 -0
  33. {validmind-2.5.2 → validmind-2.5.8}/validmind/datasets/classification/datasets/taiwan_credit.csv +0 -0
  34. {validmind-2.5.2 → validmind-2.5.8}/validmind/datasets/classification/taiwan_credit.py +0 -0
  35. {validmind-2.5.2 → validmind-2.5.8}/validmind/datasets/cluster/digits.py +0 -0
  36. {validmind-2.5.2 → validmind-2.5.8}/validmind/datasets/credit_risk/__init__.py +0 -0
  37. {validmind-2.5.2 → validmind-2.5.8}/validmind/datasets/credit_risk/datasets/lending_club_loan_data_2007_2014_clean.csv.gz +0 -0
  38. {validmind-2.5.2 → validmind-2.5.8}/validmind/datasets/credit_risk/lending_club.py +0 -0
  39. {validmind-2.5.2 → validmind-2.5.8}/validmind/datasets/llm/rag/__init__.py +0 -0
  40. {validmind-2.5.2 → validmind-2.5.8}/validmind/datasets/llm/rag/datasets/rfp_existing_questions_client_1.csv +0 -0
  41. {validmind-2.5.2 → validmind-2.5.8}/validmind/datasets/llm/rag/datasets/rfp_existing_questions_client_2.csv +0 -0
  42. {validmind-2.5.2 → validmind-2.5.8}/validmind/datasets/llm/rag/datasets/rfp_existing_questions_client_3.csv +0 -0
  43. {validmind-2.5.2 → validmind-2.5.8}/validmind/datasets/llm/rag/datasets/rfp_existing_questions_client_4.csv +0 -0
  44. {validmind-2.5.2 → validmind-2.5.8}/validmind/datasets/llm/rag/datasets/rfp_existing_questions_client_5.csv +0 -0
  45. {validmind-2.5.2 → validmind-2.5.8}/validmind/datasets/llm/rag/rfp.py +0 -0
  46. {validmind-2.5.2 → validmind-2.5.8}/validmind/datasets/nlp/__init__.py +0 -0
  47. {validmind-2.5.2 → validmind-2.5.8}/validmind/datasets/nlp/cnn_dailymail.py +0 -0
  48. {validmind-2.5.2 → validmind-2.5.8}/validmind/datasets/nlp/datasets/Covid_19.csv +0 -0
  49. {validmind-2.5.2 → validmind-2.5.8}/validmind/datasets/nlp/datasets/cnn_dailymail_100_with_predictions.csv +0 -0
  50. {validmind-2.5.2 → validmind-2.5.8}/validmind/datasets/nlp/datasets/cnn_dailymail_500_with_predictions.csv +0 -0
  51. {validmind-2.5.2 → validmind-2.5.8}/validmind/datasets/nlp/datasets/sentiments_with_predictions.csv +0 -0
  52. {validmind-2.5.2 → validmind-2.5.8}/validmind/datasets/nlp/twitter_covid_19.py +0 -0
  53. {validmind-2.5.2 → validmind-2.5.8}/validmind/datasets/regression/__init__.py +0 -0
  54. {validmind-2.5.2 → validmind-2.5.8}/validmind/datasets/regression/california_housing.py +0 -0
  55. {validmind-2.5.2 → validmind-2.5.8}/validmind/datasets/regression/datasets/fred/CPIAUCSL.csv +0 -0
  56. {validmind-2.5.2 → validmind-2.5.8}/validmind/datasets/regression/datasets/fred/CSUSHPISA.csv +0 -0
  57. {validmind-2.5.2 → validmind-2.5.8}/validmind/datasets/regression/datasets/fred/DRSFRMACBS.csv +0 -0
  58. {validmind-2.5.2 → validmind-2.5.8}/validmind/datasets/regression/datasets/fred/FEDFUNDS.csv +0 -0
  59. {validmind-2.5.2 → validmind-2.5.8}/validmind/datasets/regression/datasets/fred/GDP.csv +0 -0
  60. {validmind-2.5.2 → validmind-2.5.8}/validmind/datasets/regression/datasets/fred/GDPC1.csv +0 -0
  61. {validmind-2.5.2 → validmind-2.5.8}/validmind/datasets/regression/datasets/fred/GS10.csv +0 -0
  62. {validmind-2.5.2 → validmind-2.5.8}/validmind/datasets/regression/datasets/fred/GS3.csv +0 -0
  63. {validmind-2.5.2 → validmind-2.5.8}/validmind/datasets/regression/datasets/fred/GS5.csv +0 -0
  64. {validmind-2.5.2 → validmind-2.5.8}/validmind/datasets/regression/datasets/fred/MORTGAGE30US.csv +0 -0
  65. {validmind-2.5.2 → validmind-2.5.8}/validmind/datasets/regression/datasets/fred/UNRATE.csv +0 -0
  66. {validmind-2.5.2 → validmind-2.5.8}/validmind/datasets/regression/datasets/fred_loan_rates.csv +0 -0
  67. {validmind-2.5.2 → validmind-2.5.8}/validmind/datasets/regression/datasets/fred_loan_rates_test_1.csv +0 -0
  68. {validmind-2.5.2 → validmind-2.5.8}/validmind/datasets/regression/datasets/fred_loan_rates_test_2.csv +0 -0
  69. {validmind-2.5.2 → validmind-2.5.8}/validmind/datasets/regression/datasets/fred_loan_rates_test_3.csv +0 -0
  70. {validmind-2.5.2 → validmind-2.5.8}/validmind/datasets/regression/datasets/fred_loan_rates_test_4.csv +0 -0
  71. {validmind-2.5.2 → validmind-2.5.8}/validmind/datasets/regression/datasets/fred_loan_rates_test_5.csv +0 -0
  72. {validmind-2.5.2 → validmind-2.5.8}/validmind/datasets/regression/datasets/leanding_club_loan_rates.csv +0 -0
  73. {validmind-2.5.2 → validmind-2.5.8}/validmind/datasets/regression/fred.py +0 -0
  74. {validmind-2.5.2 → validmind-2.5.8}/validmind/datasets/regression/fred_timeseries.py +0 -0
  75. {validmind-2.5.2 → validmind-2.5.8}/validmind/datasets/regression/lending_club.py +0 -0
  76. {validmind-2.5.2 → validmind-2.5.8}/validmind/datasets/regression/models/fred_loan_rates_model_1.pkl +0 -0
  77. {validmind-2.5.2 → validmind-2.5.8}/validmind/datasets/regression/models/fred_loan_rates_model_2.pkl +0 -0
  78. {validmind-2.5.2 → validmind-2.5.8}/validmind/datasets/regression/models/fred_loan_rates_model_3.pkl +0 -0
  79. {validmind-2.5.2 → validmind-2.5.8}/validmind/datasets/regression/models/fred_loan_rates_model_4.pkl +0 -0
  80. {validmind-2.5.2 → validmind-2.5.8}/validmind/datasets/regression/models/fred_loan_rates_model_5.pkl +0 -0
  81. {validmind-2.5.2 → validmind-2.5.8}/validmind/errors.py +0 -0
  82. {validmind-2.5.2 → validmind-2.5.8}/validmind/html_templates/__init__.py +0 -0
  83. {validmind-2.5.2 → validmind-2.5.8}/validmind/html_templates/content_blocks.py +0 -0
  84. {validmind-2.5.2 → validmind-2.5.8}/validmind/input_registry.py +0 -0
  85. {validmind-2.5.2 → validmind-2.5.8}/validmind/logging.py +0 -0
  86. {validmind-2.5.2 → validmind-2.5.8}/validmind/models/__init__.py +0 -0
  87. {validmind-2.5.2 → validmind-2.5.8}/validmind/models/foundation.py +0 -0
  88. {validmind-2.5.2 → validmind-2.5.8}/validmind/models/function.py +0 -0
  89. {validmind-2.5.2 → validmind-2.5.8}/validmind/models/huggingface.py +0 -0
  90. {validmind-2.5.2 → validmind-2.5.8}/validmind/models/metadata.py +0 -0
  91. {validmind-2.5.2 → validmind-2.5.8}/validmind/models/pipeline.py +0 -0
  92. {validmind-2.5.2 → validmind-2.5.8}/validmind/models/pytorch.py +0 -0
  93. {validmind-2.5.2 → validmind-2.5.8}/validmind/models/r_model.py +0 -0
  94. {validmind-2.5.2 → validmind-2.5.8}/validmind/models/sklearn.py +0 -0
  95. {validmind-2.5.2 → validmind-2.5.8}/validmind/template.py +0 -0
  96. {validmind-2.5.2 → validmind-2.5.8}/validmind/test_suites/__init__.py +0 -0
  97. {validmind-2.5.2 → validmind-2.5.8}/validmind/test_suites/classifier.py +0 -0
  98. {validmind-2.5.2 → validmind-2.5.8}/validmind/test_suites/cluster.py +0 -0
  99. {validmind-2.5.2 → validmind-2.5.8}/validmind/test_suites/embeddings.py +0 -0
  100. {validmind-2.5.2 → validmind-2.5.8}/validmind/test_suites/llm.py +0 -0
  101. {validmind-2.5.2 → validmind-2.5.8}/validmind/test_suites/nlp.py +0 -0
  102. {validmind-2.5.2 → validmind-2.5.8}/validmind/test_suites/parameters_optimization.py +0 -0
  103. {validmind-2.5.2 → validmind-2.5.8}/validmind/test_suites/regression.py +0 -0
  104. {validmind-2.5.2 → validmind-2.5.8}/validmind/test_suites/statsmodels_timeseries.py +0 -0
  105. {validmind-2.5.2 → validmind-2.5.8}/validmind/test_suites/summarization.py +0 -0
  106. {validmind-2.5.2 → validmind-2.5.8}/validmind/test_suites/tabular_datasets.py +0 -0
  107. {validmind-2.5.2 → validmind-2.5.8}/validmind/test_suites/text_data.py +0 -0
  108. {validmind-2.5.2 → validmind-2.5.8}/validmind/test_suites/time_series.py +0 -0
  109. {validmind-2.5.2 → validmind-2.5.8}/validmind/tests/__init__.py +0 -0
  110. {validmind-2.5.2 → validmind-2.5.8}/validmind/tests/_store.py +0 -0
  111. {validmind-2.5.2 → validmind-2.5.8}/validmind/tests/data_validation/ACFandPACFPlot.py +0 -0
  112. {validmind-2.5.2 → validmind-2.5.8}/validmind/tests/data_validation/ADF.py +0 -0
  113. {validmind-2.5.2 → validmind-2.5.8}/validmind/tests/data_validation/ANOVAOneWayTable.py +0 -0
  114. {validmind-2.5.2 → validmind-2.5.8}/validmind/tests/data_validation/AutoAR.py +0 -0
  115. {validmind-2.5.2 → validmind-2.5.8}/validmind/tests/data_validation/AutoMA.py +0 -0
  116. {validmind-2.5.2 → validmind-2.5.8}/validmind/tests/data_validation/AutoSeasonality.py +0 -0
  117. {validmind-2.5.2 → validmind-2.5.8}/validmind/tests/data_validation/AutoStationarity.py +0 -0
  118. {validmind-2.5.2 → validmind-2.5.8}/validmind/tests/data_validation/BivariateFeaturesBarPlots.py +0 -0
  119. {validmind-2.5.2 → validmind-2.5.8}/validmind/tests/data_validation/BivariateHistograms.py +0 -0
  120. {validmind-2.5.2 → validmind-2.5.8}/validmind/tests/data_validation/BivariateScatterPlots.py +0 -0
  121. {validmind-2.5.2 → validmind-2.5.8}/validmind/tests/data_validation/ChiSquaredFeaturesTable.py +0 -0
  122. {validmind-2.5.2 → validmind-2.5.8}/validmind/tests/data_validation/ClassImbalance.py +0 -0
  123. {validmind-2.5.2 → validmind-2.5.8}/validmind/tests/data_validation/DFGLSArch.py +0 -0
  124. {validmind-2.5.2 → validmind-2.5.8}/validmind/tests/data_validation/DatasetDescription.py +0 -0
  125. {validmind-2.5.2 → validmind-2.5.8}/validmind/tests/data_validation/DatasetSplit.py +0 -0
  126. {validmind-2.5.2 → validmind-2.5.8}/validmind/tests/data_validation/DescriptiveStatistics.py +0 -0
  127. {validmind-2.5.2 → validmind-2.5.8}/validmind/tests/data_validation/Duplicates.py +0 -0
  128. {validmind-2.5.2 → validmind-2.5.8}/validmind/tests/data_validation/EngleGrangerCoint.py +0 -0
  129. {validmind-2.5.2 → validmind-2.5.8}/validmind/tests/data_validation/FeatureTargetCorrelationPlot.py +0 -0
  130. {validmind-2.5.2 → validmind-2.5.8}/validmind/tests/data_validation/HeatmapFeatureCorrelations.py +0 -0
  131. {validmind-2.5.2 → validmind-2.5.8}/validmind/tests/data_validation/HighCardinality.py +0 -0
  132. {validmind-2.5.2 → validmind-2.5.8}/validmind/tests/data_validation/HighPearsonCorrelation.py +0 -0
  133. {validmind-2.5.2 → validmind-2.5.8}/validmind/tests/data_validation/IQROutliersBarPlot.py +0 -0
  134. {validmind-2.5.2 → validmind-2.5.8}/validmind/tests/data_validation/IQROutliersTable.py +0 -0
  135. {validmind-2.5.2 → validmind-2.5.8}/validmind/tests/data_validation/IsolationForestOutliers.py +0 -0
  136. {validmind-2.5.2 → validmind-2.5.8}/validmind/tests/data_validation/KPSS.py +0 -0
  137. {validmind-2.5.2 → validmind-2.5.8}/validmind/tests/data_validation/LaggedCorrelationHeatmap.py +0 -0
  138. {validmind-2.5.2 → validmind-2.5.8}/validmind/tests/data_validation/MissingValues.py +0 -0
  139. {validmind-2.5.2 → validmind-2.5.8}/validmind/tests/data_validation/MissingValuesBarPlot.py +0 -0
  140. {validmind-2.5.2 → validmind-2.5.8}/validmind/tests/data_validation/MissingValuesRisk.py +0 -0
  141. {validmind-2.5.2 → validmind-2.5.8}/validmind/tests/data_validation/PearsonCorrelationMatrix.py +0 -0
  142. {validmind-2.5.2 → validmind-2.5.8}/validmind/tests/data_validation/PhillipsPerronArch.py +0 -0
  143. {validmind-2.5.2 → validmind-2.5.8}/validmind/tests/data_validation/RollingStatsPlot.py +0 -0
  144. {validmind-2.5.2 → validmind-2.5.8}/validmind/tests/data_validation/ScatterPlot.py +0 -0
  145. {validmind-2.5.2 → validmind-2.5.8}/validmind/tests/data_validation/SeasonalDecompose.py +0 -0
  146. {validmind-2.5.2 → validmind-2.5.8}/validmind/tests/data_validation/Skewness.py +0 -0
  147. {validmind-2.5.2 → validmind-2.5.8}/validmind/tests/data_validation/SpreadPlot.py +0 -0
  148. {validmind-2.5.2 → validmind-2.5.8}/validmind/tests/data_validation/TabularCategoricalBarPlots.py +0 -0
  149. {validmind-2.5.2 → validmind-2.5.8}/validmind/tests/data_validation/TabularDateTimeHistograms.py +0 -0
  150. {validmind-2.5.2 → validmind-2.5.8}/validmind/tests/data_validation/TabularDescriptionTables.py +0 -0
  151. {validmind-2.5.2 → validmind-2.5.8}/validmind/tests/data_validation/TabularNumericalHistograms.py +0 -0
  152. {validmind-2.5.2 → validmind-2.5.8}/validmind/tests/data_validation/TargetRateBarPlots.py +0 -0
  153. {validmind-2.5.2 → validmind-2.5.8}/validmind/tests/data_validation/TimeSeriesDescription.py +0 -0
  154. {validmind-2.5.2 → validmind-2.5.8}/validmind/tests/data_validation/TimeSeriesDescriptiveStatistics.py +0 -0
  155. {validmind-2.5.2 → validmind-2.5.8}/validmind/tests/data_validation/TimeSeriesFrequency.py +0 -0
  156. {validmind-2.5.2 → validmind-2.5.8}/validmind/tests/data_validation/TimeSeriesHistogram.py +0 -0
  157. {validmind-2.5.2 → validmind-2.5.8}/validmind/tests/data_validation/TimeSeriesLinePlot.py +0 -0
  158. {validmind-2.5.2 → validmind-2.5.8}/validmind/tests/data_validation/TimeSeriesMissingValues.py +0 -0
  159. {validmind-2.5.2 → validmind-2.5.8}/validmind/tests/data_validation/TimeSeriesOutliers.py +0 -0
  160. {validmind-2.5.2 → validmind-2.5.8}/validmind/tests/data_validation/TooManyZeroValues.py +0 -0
  161. {validmind-2.5.2 → validmind-2.5.8}/validmind/tests/data_validation/UniqueRows.py +0 -0
  162. {validmind-2.5.2 → validmind-2.5.8}/validmind/tests/data_validation/WOEBinPlots.py +0 -0
  163. {validmind-2.5.2 → validmind-2.5.8}/validmind/tests/data_validation/WOEBinTable.py +0 -0
  164. {validmind-2.5.2 → validmind-2.5.8}/validmind/tests/data_validation/ZivotAndrewsArch.py +0 -0
  165. {validmind-2.5.2 → validmind-2.5.8}/validmind/tests/data_validation/__init__.py +0 -0
  166. {validmind-2.5.2 → validmind-2.5.8}/validmind/tests/data_validation/nlp/CommonWords.py +0 -0
  167. {validmind-2.5.2 → validmind-2.5.8}/validmind/tests/data_validation/nlp/Hashtags.py +0 -0
  168. {validmind-2.5.2 → validmind-2.5.8}/validmind/tests/data_validation/nlp/LanguageDetection.py +0 -0
  169. {validmind-2.5.2 → validmind-2.5.8}/validmind/tests/data_validation/nlp/Mentions.py +0 -0
  170. {validmind-2.5.2 → validmind-2.5.8}/validmind/tests/data_validation/nlp/PolarityAndSubjectivity.py +0 -0
  171. {validmind-2.5.2 → validmind-2.5.8}/validmind/tests/data_validation/nlp/Punctuations.py +0 -0
  172. {validmind-2.5.2 → validmind-2.5.8}/validmind/tests/data_validation/nlp/Sentiment.py +0 -0
  173. {validmind-2.5.2 → validmind-2.5.8}/validmind/tests/data_validation/nlp/StopWords.py +0 -0
  174. {validmind-2.5.2 → validmind-2.5.8}/validmind/tests/data_validation/nlp/TextDescription.py +0 -0
  175. {validmind-2.5.2 → validmind-2.5.8}/validmind/tests/data_validation/nlp/Toxicity.py +0 -0
  176. {validmind-2.5.2 → validmind-2.5.8}/validmind/tests/data_validation/nlp/__init__.py +0 -0
  177. {validmind-2.5.2 → validmind-2.5.8}/validmind/tests/decorator.py +0 -0
  178. {validmind-2.5.2 → validmind-2.5.8}/validmind/tests/load.py +0 -0
  179. {validmind-2.5.2 → validmind-2.5.8}/validmind/tests/metadata.py +0 -0
  180. {validmind-2.5.2 → validmind-2.5.8}/validmind/tests/model_validation/BertScore.py +0 -0
  181. {validmind-2.5.2 → validmind-2.5.8}/validmind/tests/model_validation/BleuScore.py +0 -0
  182. {validmind-2.5.2 → validmind-2.5.8}/validmind/tests/model_validation/ClusterSizeDistribution.py +0 -0
  183. {validmind-2.5.2 → validmind-2.5.8}/validmind/tests/model_validation/ContextualRecall.py +0 -0
  184. {validmind-2.5.2 → validmind-2.5.8}/validmind/tests/model_validation/FeaturesAUC.py +0 -0
  185. {validmind-2.5.2 → validmind-2.5.8}/validmind/tests/model_validation/MeteorScore.py +0 -0
  186. {validmind-2.5.2 → validmind-2.5.8}/validmind/tests/model_validation/ModelMetadata.py +0 -0
  187. {validmind-2.5.2 → validmind-2.5.8}/validmind/tests/model_validation/ModelMetadataComparison.py +0 -0
  188. {validmind-2.5.2 → validmind-2.5.8}/validmind/tests/model_validation/ModelPredictionResiduals.py +0 -0
  189. {validmind-2.5.2 → validmind-2.5.8}/validmind/tests/model_validation/RegardScore.py +0 -0
  190. {validmind-2.5.2 → validmind-2.5.8}/validmind/tests/model_validation/RegressionResidualsPlot.py +0 -0
  191. {validmind-2.5.2 → validmind-2.5.8}/validmind/tests/model_validation/RougeScore.py +0 -0
  192. {validmind-2.5.2 → validmind-2.5.8}/validmind/tests/model_validation/TimeSeriesPredictionWithCI.py +0 -0
  193. {validmind-2.5.2 → validmind-2.5.8}/validmind/tests/model_validation/TimeSeriesPredictionsPlot.py +0 -0
  194. {validmind-2.5.2 → validmind-2.5.8}/validmind/tests/model_validation/TimeSeriesR2SquareBySegments.py +0 -0
  195. {validmind-2.5.2 → validmind-2.5.8}/validmind/tests/model_validation/TokenDisparity.py +0 -0
  196. {validmind-2.5.2 → validmind-2.5.8}/validmind/tests/model_validation/ToxicityScore.py +0 -0
  197. {validmind-2.5.2 → validmind-2.5.8}/validmind/tests/model_validation/__init__.py +0 -0
  198. {validmind-2.5.2 → validmind-2.5.8}/validmind/tests/model_validation/embeddings/ClusterDistribution.py +0 -0
  199. {validmind-2.5.2 → validmind-2.5.8}/validmind/tests/model_validation/embeddings/CosineSimilarityComparison.py +0 -0
  200. {validmind-2.5.2 → validmind-2.5.8}/validmind/tests/model_validation/embeddings/CosineSimilarityDistribution.py +0 -0
  201. {validmind-2.5.2 → validmind-2.5.8}/validmind/tests/model_validation/embeddings/CosineSimilarityHeatmap.py +0 -0
  202. {validmind-2.5.2 → validmind-2.5.8}/validmind/tests/model_validation/embeddings/DescriptiveAnalytics.py +0 -0
  203. {validmind-2.5.2 → validmind-2.5.8}/validmind/tests/model_validation/embeddings/EmbeddingsVisualization2D.py +0 -0
  204. {validmind-2.5.2 → validmind-2.5.8}/validmind/tests/model_validation/embeddings/EuclideanDistanceComparison.py +0 -0
  205. {validmind-2.5.2 → validmind-2.5.8}/validmind/tests/model_validation/embeddings/EuclideanDistanceHeatmap.py +0 -0
  206. {validmind-2.5.2 → validmind-2.5.8}/validmind/tests/model_validation/embeddings/PCAComponentsPairwisePlots.py +0 -0
  207. {validmind-2.5.2 → validmind-2.5.8}/validmind/tests/model_validation/embeddings/StabilityAnalysis.py +0 -0
  208. {validmind-2.5.2 → validmind-2.5.8}/validmind/tests/model_validation/embeddings/StabilityAnalysisKeyword.py +0 -0
  209. {validmind-2.5.2 → validmind-2.5.8}/validmind/tests/model_validation/embeddings/StabilityAnalysisRandomNoise.py +0 -0
  210. {validmind-2.5.2 → validmind-2.5.8}/validmind/tests/model_validation/embeddings/StabilityAnalysisSynonyms.py +0 -0
  211. {validmind-2.5.2 → validmind-2.5.8}/validmind/tests/model_validation/embeddings/StabilityAnalysisTranslation.py +0 -0
  212. {validmind-2.5.2 → validmind-2.5.8}/validmind/tests/model_validation/embeddings/TSNEComponentsPairwisePlots.py +0 -0
  213. {validmind-2.5.2 → validmind-2.5.8}/validmind/tests/model_validation/ragas/utils.py +0 -0
  214. {validmind-2.5.2 → validmind-2.5.8}/validmind/tests/model_validation/sklearn/AdjustedMutualInformation.py +0 -0
  215. {validmind-2.5.2 → validmind-2.5.8}/validmind/tests/model_validation/sklearn/AdjustedRandIndex.py +0 -0
  216. {validmind-2.5.2 → validmind-2.5.8}/validmind/tests/model_validation/sklearn/ClassifierPerformance.py +0 -0
  217. {validmind-2.5.2 → validmind-2.5.8}/validmind/tests/model_validation/sklearn/ClusterCosineSimilarity.py +0 -0
  218. {validmind-2.5.2 → validmind-2.5.8}/validmind/tests/model_validation/sklearn/ClusterPerformance.py +0 -0
  219. {validmind-2.5.2 → validmind-2.5.8}/validmind/tests/model_validation/sklearn/ClusterPerformanceMetrics.py +0 -0
  220. {validmind-2.5.2 → validmind-2.5.8}/validmind/tests/model_validation/sklearn/CompletenessScore.py +0 -0
  221. {validmind-2.5.2 → validmind-2.5.8}/validmind/tests/model_validation/sklearn/ConfusionMatrix.py +0 -0
  222. {validmind-2.5.2 → validmind-2.5.8}/validmind/tests/model_validation/sklearn/FeatureImportanceComparison.py +0 -0
  223. {validmind-2.5.2 → validmind-2.5.8}/validmind/tests/model_validation/sklearn/FowlkesMallowsScore.py +0 -0
  224. {validmind-2.5.2 → validmind-2.5.8}/validmind/tests/model_validation/sklearn/HomogeneityScore.py +0 -0
  225. {validmind-2.5.2 → validmind-2.5.8}/validmind/tests/model_validation/sklearn/HyperParametersTuning.py +0 -0
  226. {validmind-2.5.2 → validmind-2.5.8}/validmind/tests/model_validation/sklearn/KMeansClustersOptimization.py +0 -0
  227. {validmind-2.5.2 → validmind-2.5.8}/validmind/tests/model_validation/sklearn/MinimumAccuracy.py +0 -0
  228. {validmind-2.5.2 → validmind-2.5.8}/validmind/tests/model_validation/sklearn/MinimumF1Score.py +0 -0
  229. {validmind-2.5.2 → validmind-2.5.8}/validmind/tests/model_validation/sklearn/MinimumROCAUCScore.py +0 -0
  230. {validmind-2.5.2 → validmind-2.5.8}/validmind/tests/model_validation/sklearn/ModelsPerformanceComparison.py +0 -0
  231. {validmind-2.5.2 → validmind-2.5.8}/validmind/tests/model_validation/sklearn/PermutationFeatureImportance.py +0 -0
  232. {validmind-2.5.2 → validmind-2.5.8}/validmind/tests/model_validation/sklearn/PopulationStabilityIndex.py +0 -0
  233. {validmind-2.5.2 → validmind-2.5.8}/validmind/tests/model_validation/sklearn/PrecisionRecallCurve.py +0 -0
  234. {validmind-2.5.2 → validmind-2.5.8}/validmind/tests/model_validation/sklearn/ROCCurve.py +0 -0
  235. {validmind-2.5.2 → validmind-2.5.8}/validmind/tests/model_validation/sklearn/RegressionErrors.py +0 -0
  236. {validmind-2.5.2 → validmind-2.5.8}/validmind/tests/model_validation/sklearn/RegressionErrorsComparison.py +0 -0
  237. {validmind-2.5.2 → validmind-2.5.8}/validmind/tests/model_validation/sklearn/RegressionModelsPerformanceComparison.py +0 -0
  238. {validmind-2.5.2 → validmind-2.5.8}/validmind/tests/model_validation/sklearn/RegressionR2Square.py +0 -0
  239. {validmind-2.5.2 → validmind-2.5.8}/validmind/tests/model_validation/sklearn/RegressionR2SquareComparison.py +0 -0
  240. {validmind-2.5.2 → validmind-2.5.8}/validmind/tests/model_validation/sklearn/SHAPGlobalImportance.py +0 -0
  241. {validmind-2.5.2 → validmind-2.5.8}/validmind/tests/model_validation/sklearn/SilhouettePlot.py +0 -0
  242. {validmind-2.5.2 → validmind-2.5.8}/validmind/tests/model_validation/sklearn/TrainingTestDegradation.py +0 -0
  243. {validmind-2.5.2 → validmind-2.5.8}/validmind/tests/model_validation/sklearn/VMeasure.py +0 -0
  244. {validmind-2.5.2 → validmind-2.5.8}/validmind/tests/model_validation/sklearn/WeakspotsDiagnosis.py +0 -0
  245. {validmind-2.5.2 → validmind-2.5.8}/validmind/tests/model_validation/sklearn/__init__.py +0 -0
  246. {validmind-2.5.2 → validmind-2.5.8}/validmind/tests/model_validation/statsmodels/AutoARIMA.py +0 -0
  247. {validmind-2.5.2 → validmind-2.5.8}/validmind/tests/model_validation/statsmodels/BoxPierce.py +0 -0
  248. {validmind-2.5.2 → validmind-2.5.8}/validmind/tests/model_validation/statsmodels/CumulativePredictionProbabilities.py +0 -0
  249. {validmind-2.5.2 → validmind-2.5.8}/validmind/tests/model_validation/statsmodels/DurbinWatsonTest.py +0 -0
  250. {validmind-2.5.2 → validmind-2.5.8}/validmind/tests/model_validation/statsmodels/GINITable.py +0 -0
  251. {validmind-2.5.2 → validmind-2.5.8}/validmind/tests/model_validation/statsmodels/JarqueBera.py +0 -0
  252. {validmind-2.5.2 → validmind-2.5.8}/validmind/tests/model_validation/statsmodels/KolmogorovSmirnov.py +0 -0
  253. {validmind-2.5.2 → validmind-2.5.8}/validmind/tests/model_validation/statsmodels/LJungBox.py +0 -0
  254. {validmind-2.5.2 → validmind-2.5.8}/validmind/tests/model_validation/statsmodels/Lilliefors.py +0 -0
  255. {validmind-2.5.2 → validmind-2.5.8}/validmind/tests/model_validation/statsmodels/PredictionProbabilitiesHistogram.py +0 -0
  256. {validmind-2.5.2 → validmind-2.5.8}/validmind/tests/model_validation/statsmodels/RegressionCoeffsPlot.py +0 -0
  257. {validmind-2.5.2 → validmind-2.5.8}/validmind/tests/model_validation/statsmodels/RegressionFeatureSignificance.py +0 -0
  258. {validmind-2.5.2 → validmind-2.5.8}/validmind/tests/model_validation/statsmodels/RegressionModelForecastPlot.py +0 -0
  259. {validmind-2.5.2 → validmind-2.5.8}/validmind/tests/model_validation/statsmodels/RegressionModelForecastPlotLevels.py +0 -0
  260. {validmind-2.5.2 → validmind-2.5.8}/validmind/tests/model_validation/statsmodels/RegressionModelSensitivityPlot.py +0 -0
  261. {validmind-2.5.2 → validmind-2.5.8}/validmind/tests/model_validation/statsmodels/RegressionModelSummary.py +0 -0
  262. {validmind-2.5.2 → validmind-2.5.8}/validmind/tests/model_validation/statsmodels/RegressionModelsCoeffs.py +0 -0
  263. {validmind-2.5.2 → validmind-2.5.8}/validmind/tests/model_validation/statsmodels/RegressionPermutationFeatureImportance.py +0 -0
  264. {validmind-2.5.2 → validmind-2.5.8}/validmind/tests/model_validation/statsmodels/RunsTest.py +0 -0
  265. {validmind-2.5.2 → validmind-2.5.8}/validmind/tests/model_validation/statsmodels/ScorecardHistogram.py +0 -0
  266. {validmind-2.5.2 → validmind-2.5.8}/validmind/tests/model_validation/statsmodels/ShapiroWilk.py +0 -0
  267. {validmind-2.5.2 → validmind-2.5.8}/validmind/tests/model_validation/statsmodels/__init__.py +0 -0
  268. {validmind-2.5.2 → validmind-2.5.8}/validmind/tests/model_validation/statsmodels/statsutils.py +0 -0
  269. {validmind-2.5.2 → validmind-2.5.8}/validmind/tests/ongoing_monitoring/FeatureDrift.py +0 -0
  270. {validmind-2.5.2 → validmind-2.5.8}/validmind/tests/ongoing_monitoring/PredictionAcrossEachFeature.py +0 -0
  271. {validmind-2.5.2 → validmind-2.5.8}/validmind/tests/ongoing_monitoring/PredictionCorrelation.py +0 -0
  272. {validmind-2.5.2 → validmind-2.5.8}/validmind/tests/ongoing_monitoring/TargetPredictionDistributionPlot.py +0 -0
  273. {validmind-2.5.2 → validmind-2.5.8}/validmind/tests/prompt_validation/Bias.py +0 -0
  274. {validmind-2.5.2 → validmind-2.5.8}/validmind/tests/prompt_validation/Clarity.py +0 -0
  275. {validmind-2.5.2 → validmind-2.5.8}/validmind/tests/prompt_validation/Conciseness.py +0 -0
  276. {validmind-2.5.2 → validmind-2.5.8}/validmind/tests/prompt_validation/Delimitation.py +0 -0
  277. {validmind-2.5.2 → validmind-2.5.8}/validmind/tests/prompt_validation/NegativeInstruction.py +0 -0
  278. {validmind-2.5.2 → validmind-2.5.8}/validmind/tests/prompt_validation/Robustness.py +0 -0
  279. {validmind-2.5.2 → validmind-2.5.8}/validmind/tests/prompt_validation/Specificity.py +0 -0
  280. {validmind-2.5.2 → validmind-2.5.8}/validmind/tests/prompt_validation/__init__.py +0 -0
  281. {validmind-2.5.2 → validmind-2.5.8}/validmind/tests/prompt_validation/ai_powered_test.py +0 -0
  282. {validmind-2.5.2 → validmind-2.5.8}/validmind/tests/test_providers.py +0 -0
  283. {validmind-2.5.2 → validmind-2.5.8}/validmind/tests/utils.py +0 -0
  284. {validmind-2.5.2 → validmind-2.5.8}/validmind/unit_metrics/__init__.py +0 -0
  285. {validmind-2.5.2 → validmind-2.5.8}/validmind/unit_metrics/classification/sklearn/Accuracy.py +0 -0
  286. {validmind-2.5.2 → validmind-2.5.8}/validmind/unit_metrics/classification/sklearn/F1.py +0 -0
  287. {validmind-2.5.2 → validmind-2.5.8}/validmind/unit_metrics/classification/sklearn/Precision.py +0 -0
  288. {validmind-2.5.2 → validmind-2.5.8}/validmind/unit_metrics/classification/sklearn/ROC_AUC.py +0 -0
  289. {validmind-2.5.2 → validmind-2.5.8}/validmind/unit_metrics/classification/sklearn/Recall.py +0 -0
  290. {validmind-2.5.2 → validmind-2.5.8}/validmind/unit_metrics/composite.py +0 -0
  291. {validmind-2.5.2 → validmind-2.5.8}/validmind/unit_metrics/regression/GiniCoefficient.py +0 -0
  292. {validmind-2.5.2 → validmind-2.5.8}/validmind/unit_metrics/regression/HuberLoss.py +0 -0
  293. {validmind-2.5.2 → validmind-2.5.8}/validmind/unit_metrics/regression/KolmogorovSmirnovStatistic.py +0 -0
  294. {validmind-2.5.2 → validmind-2.5.8}/validmind/unit_metrics/regression/MeanAbsolutePercentageError.py +0 -0
  295. {validmind-2.5.2 → validmind-2.5.8}/validmind/unit_metrics/regression/MeanBiasDeviation.py +0 -0
  296. {validmind-2.5.2 → validmind-2.5.8}/validmind/unit_metrics/regression/QuantileLoss.py +0 -0
  297. {validmind-2.5.2 → validmind-2.5.8}/validmind/unit_metrics/regression/sklearn/AdjustedRSquaredScore.py +0 -0
  298. {validmind-2.5.2 → validmind-2.5.8}/validmind/unit_metrics/regression/sklearn/MeanAbsoluteError.py +0 -0
  299. {validmind-2.5.2 → validmind-2.5.8}/validmind/unit_metrics/regression/sklearn/MeanSquaredError.py +0 -0
  300. {validmind-2.5.2 → validmind-2.5.8}/validmind/unit_metrics/regression/sklearn/RSquaredScore.py +0 -0
  301. {validmind-2.5.2 → validmind-2.5.8}/validmind/unit_metrics/regression/sklearn/RootMeanSquaredError.py +0 -0
  302. {validmind-2.5.2 → validmind-2.5.8}/validmind/utils.py +0 -0
  303. {validmind-2.5.2 → validmind-2.5.8}/validmind/vm_models/__init__.py +0 -0
  304. {validmind-2.5.2 → validmind-2.5.8}/validmind/vm_models/dataset/__init__.py +0 -0
  305. {validmind-2.5.2 → validmind-2.5.8}/validmind/vm_models/dataset/dataset.py +0 -0
  306. {validmind-2.5.2 → validmind-2.5.8}/validmind/vm_models/figure.py +0 -0
  307. {validmind-2.5.2 → validmind-2.5.8}/validmind/vm_models/input.py +0 -0
  308. {validmind-2.5.2 → validmind-2.5.8}/validmind/vm_models/test/metric.py +0 -0
  309. {validmind-2.5.2 → validmind-2.5.8}/validmind/vm_models/test/metric_result.py +0 -0
  310. {validmind-2.5.2 → validmind-2.5.8}/validmind/vm_models/test/output_template.py +0 -0
  311. {validmind-2.5.2 → validmind-2.5.8}/validmind/vm_models/test/result_summary.py +0 -0
  312. {validmind-2.5.2 → validmind-2.5.8}/validmind/vm_models/test/result_wrapper.py +0 -0
  313. {validmind-2.5.2 → validmind-2.5.8}/validmind/vm_models/test/test.py +0 -0
  314. {validmind-2.5.2 → validmind-2.5.8}/validmind/vm_models/test/threshold_test.py +0 -0
  315. {validmind-2.5.2 → validmind-2.5.8}/validmind/vm_models/test/threshold_test_result.py +0 -0
  316. {validmind-2.5.2 → validmind-2.5.8}/validmind/vm_models/test_context.py +0 -0
  317. {validmind-2.5.2 → validmind-2.5.8}/validmind/vm_models/test_suite/runner.py +0 -0
  318. {validmind-2.5.2 → validmind-2.5.8}/validmind/vm_models/test_suite/summary.py +0 -0
  319. {validmind-2.5.2 → validmind-2.5.8}/validmind/vm_models/test_suite/test.py +0 -0
  320. {validmind-2.5.2 → validmind-2.5.8}/validmind/vm_models/test_suite/test_suite.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: validmind
3
- Version: 2.5.2
3
+ Version: 2.5.8
4
4
  Summary: ValidMind Developer Framework
5
5
  License: Commercial License
6
6
  Author: Andres Rodriguez
@@ -10,7 +10,7 @@ description = "ValidMind Developer Framework"
10
10
  license = "Commercial License"
11
11
  name = "validmind"
12
12
  readme = "README.pypi.md"
13
- version = "2.5.2"
13
+ version = "2.5.8"
14
14
 
15
15
  [tool.poetry.dependencies]
16
16
  python = ">=3.8.1,<3.12"
@@ -0,0 +1 @@
1
+ __version__ = "2.5.8"
@@ -240,6 +240,11 @@ def init_model(
240
240
  vm_model = class_obj(
241
241
  pipeline=model,
242
242
  input_id=input_id,
243
+ attributes=(
244
+ ModelAttributes.from_dict(attributes)
245
+ if attributes
246
+ else ModelAttributes()
247
+ ),
243
248
  )
244
249
  # TODO: Add metadata for pipeline model
245
250
  metadata = get_model_info(vm_model)
@@ -248,6 +253,7 @@ def init_model(
248
253
  input_id=input_id,
249
254
  model=model, # Trained model instance
250
255
  predict_fn=predict_fn,
256
+ attributes=ModelAttributes.from_dict(attributes) if attributes else None,
251
257
  **kwargs,
252
258
  )
253
259
  metadata = get_model_info(vm_model)
@@ -56,7 +56,6 @@ TestID = Literal[
56
56
  "validmind.model_validation.ragas.AnswerSimilarity",
57
57
  "validmind.model_validation.ragas.AnswerCorrectness",
58
58
  "validmind.model_validation.ragas.ContextRecall",
59
- "validmind.model_validation.ragas.ContextRelevancy",
60
59
  "validmind.model_validation.ragas.ContextPrecision",
61
60
  "validmind.model_validation.ragas.AnswerRelevance",
62
61
  "validmind.model_validation.sklearn.RegressionModelsPerformanceComparison",
@@ -105,7 +105,7 @@ def AnswerCorrectness(
105
105
  "ground_truth": ground_truth_column,
106
106
  }
107
107
 
108
- df = get_renamed_columns(dataset.df, required_columns)
108
+ df = get_renamed_columns(dataset._df, required_columns)
109
109
 
110
110
  result_df = evaluate(
111
111
  Dataset.from_pandas(df), metrics=[answer_correctness], **get_ragas_config()
@@ -109,7 +109,7 @@ def AnswerRelevance(
109
109
  "contexts": contexts_column,
110
110
  }
111
111
 
112
- df = get_renamed_columns(dataset.df, required_columns)
112
+ df = get_renamed_columns(dataset._df, required_columns)
113
113
 
114
114
  result_df = evaluate(
115
115
  Dataset.from_pandas(df), metrics=[answer_relevancy], **get_ragas_config()
@@ -94,7 +94,7 @@ def AnswerSimilarity(
94
94
  "ground_truth": ground_truth_column,
95
95
  }
96
96
 
97
- df = get_renamed_columns(dataset.df, required_columns)
97
+ df = get_renamed_columns(dataset._df, required_columns)
98
98
 
99
99
  result_df = evaluate(
100
100
  Dataset.from_pandas(df), metrics=[answer_similarity], **get_ragas_config()
@@ -132,7 +132,7 @@ def AspectCritique(
132
132
  "contexts": contexts_column,
133
133
  }
134
134
 
135
- df = get_renamed_columns(dataset.df, required_columns)
135
+ df = get_renamed_columns(dataset._df, required_columns)
136
136
 
137
137
  built_in_aspects = [aspect_map[aspect] for aspect in aspects]
138
138
  custom_aspects = (
@@ -100,7 +100,7 @@ def ContextEntityRecall(
100
100
  "contexts": contexts_column,
101
101
  }
102
102
 
103
- df = get_renamed_columns(dataset.df, required_columns)
103
+ df = get_renamed_columns(dataset._df, required_columns)
104
104
 
105
105
  result_df = evaluate(
106
106
  Dataset.from_pandas(df), metrics=[context_entity_recall], **get_ragas_config()
@@ -96,7 +96,7 @@ def ContextPrecision(
96
96
  "ground_truth": ground_truth_column,
97
97
  }
98
98
 
99
- df = get_renamed_columns(dataset.df, required_columns)
99
+ df = get_renamed_columns(dataset._df, required_columns)
100
100
 
101
101
  result_df = evaluate(
102
102
  Dataset.from_pandas(df), metrics=[context_precision], **get_ragas_config()
@@ -96,7 +96,7 @@ def ContextRecall(
96
96
  "ground_truth": ground_truth_column,
97
97
  }
98
98
 
99
- df = get_renamed_columns(dataset.df, required_columns)
99
+ df = get_renamed_columns(dataset._df, required_columns)
100
100
 
101
101
  result_df = evaluate(
102
102
  Dataset.from_pandas(df), metrics=[context_recall], **get_ragas_config()
@@ -94,7 +94,7 @@ def Faithfulness(
94
94
  "contexts": contexts_column,
95
95
  }
96
96
 
97
- df = get_renamed_columns(dataset.df, required_columns)
97
+ df = get_renamed_columns(dataset._df, required_columns)
98
98
 
99
99
  result_df = evaluate(
100
100
  Dataset.from_pandas(df), metrics=[faithfulness], **get_ragas_config()
@@ -25,51 +25,48 @@ from validmind.vm_models import (
25
25
 
26
26
  logger = get_logger(__name__)
27
27
 
28
+ # TODO: A couple of improvements here could be to:
29
+ # 1. Allow the test to use multiple metrics at once
30
+ # 2. Allow custom functions for computing performance
31
+
28
32
  DEFAULT_THRESHOLD = 0.04
33
+ DEFAULT_CLASSIFICATION_METRIC = "auc"
34
+ DEFAULT_REGRESSION_METRIC = "mse"
29
35
  PERFORMANCE_METRICS = {
30
36
  "accuracy": {
31
37
  "function": metrics.accuracy_score,
32
- "is_classification": True,
33
38
  "is_lower_better": False,
34
39
  },
35
40
  "auc": {
36
41
  "function": metrics.roc_auc_score,
37
- "is_classification": True,
38
42
  "is_lower_better": False,
39
43
  },
40
44
  "f1": {
41
45
  "function": metrics.f1_score,
42
- "is_classification": True,
43
46
  "is_lower_better": False,
44
47
  },
45
48
  "precision": {
46
49
  "function": metrics.precision_score,
47
- "is_classification": True,
48
50
  "is_lower_better": False,
49
51
  },
50
52
  "recall": {
51
53
  "function": metrics.recall_score,
52
- "is_classification": True,
53
54
  "is_lower_better": False,
54
55
  },
55
56
  "mse": {
56
57
  "function": metrics.mean_squared_error,
57
- "is_classification": False,
58
58
  "is_lower_better": True,
59
59
  },
60
60
  "mae": {
61
61
  "function": metrics.mean_absolute_error,
62
- "is_classification": False,
63
62
  "is_lower_better": True,
64
63
  },
65
64
  "r2": {
66
65
  "function": metrics.r2_score,
67
- "is_classification": False,
68
66
  "is_lower_better": False,
69
67
  },
70
68
  "mape": {
71
69
  "function": metrics.mean_absolute_percentage_error,
72
- "is_classification": False,
73
70
  "is_lower_better": True,
74
71
  },
75
72
  }
@@ -123,20 +120,13 @@ def _compute_metrics(
123
120
  if is_classification and metric == "auc":
124
121
  # if only one class is present in the data, return 0
125
122
  if len(np.unique(y_true)) == 1:
126
- results[metric].append(0)
127
- return
128
-
129
- score = metric_func(y_true, df_region[prob_column].values)
130
-
131
- # All other classification metrics
132
- elif is_classification:
133
- score = metric_func(y_true, df_region[pred_column].values)
123
+ return results[metric].append(0)
134
124
 
135
- # Regression metrics
136
- else:
137
- score = metric_func(y_true, df_region[pred_column].values)
125
+ return results[metric].append(
126
+ metric_func(y_true, df_region[prob_column].values)
127
+ )
138
128
 
139
- results[metric].append(score)
129
+ return results[metric].append(metric_func(y_true, df_region[pred_column].values))
140
130
 
141
131
 
142
132
  def _plot_overfit_regions(
@@ -219,8 +209,12 @@ def overfit_diagnosis( # noqa: C901
219
209
  is_classification = bool(datasets[0].probability_column(model))
220
210
 
221
211
  # Set default metric if not provided
222
- if metric is None:
223
- metric = "auc" if is_classification else "mse"
212
+ if not metric:
213
+ metric = (
214
+ DEFAULT_CLASSIFICATION_METRIC
215
+ if is_classification
216
+ else DEFAULT_REGRESSION_METRIC
217
+ )
224
218
  logger.info(
225
219
  f"Using default {'classification' if is_classification else 'regression'} metric: {metric}"
226
220
  )
@@ -228,19 +222,6 @@ def overfit_diagnosis( # noqa: C901
228
222
  if id(cut_off_threshold) == id(DEFAULT_THRESHOLD):
229
223
  logger.info("Using default cut-off threshold of 0.04")
230
224
 
231
- metric = metric.lower()
232
- try:
233
- _metric = PERFORMANCE_METRICS[metric.lower()]
234
- except KeyError:
235
- raise ValueError(
236
- f"Invalid metric. Choose from: {', '.join(PERFORMANCE_METRICS.keys())}"
237
- )
238
-
239
- if is_classification and not _metric["is_classification"]:
240
- raise ValueError(f"Cannot use regression metric ({metric}) for classification.")
241
- elif not is_classification and _metric["is_classification"]:
242
- raise ValueError(f"Cannot use classification metric ({metric}) for regression.")
243
-
244
225
  train_df = datasets[0].df
245
226
  test_df = datasets[1].df
246
227
 
@@ -0,0 +1,415 @@
1
+ # Copyright © 2023-2024 ValidMind Inc. All rights reserved.
2
+ # See the LICENSE file in the root of this repository for details.
3
+ # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
+
5
+ from collections import defaultdict
6
+ from dataclasses import dataclass
7
+ from operator import add
8
+ from typing import List, Tuple
9
+
10
+ import numpy as np
11
+ import pandas as pd
12
+ import plotly.graph_objects as go
13
+ import seaborn as sns
14
+ from sklearn import metrics
15
+
16
+ from validmind.errors import MissingOrInvalidModelPredictFnError
17
+ from validmind.logging import get_logger
18
+ from validmind.vm_models import (
19
+ Figure,
20
+ ResultSummary,
21
+ ResultTable,
22
+ ResultTableMetadata,
23
+ ThresholdTest,
24
+ ThresholdTestResult,
25
+ VMDataset,
26
+ VMModel,
27
+ )
28
+
29
+ logger = get_logger(__name__)
30
+
31
+ DEFAULT_DECAY_THRESHOLD = 0.05
32
+ DEFAULT_STD_DEV_LIST = [0.1, 0.2, 0.3, 0.4, 0.5]
33
+ DEFAULT_CLASSIFICATION_METRIC = "auc"
34
+ DEFAULT_REGRESSION_METRIC = "mse"
35
+ PERFORMANCE_METRICS = {
36
+ "accuracy": {
37
+ "function": metrics.accuracy_score,
38
+ "is_lower_better": False,
39
+ },
40
+ "auc": {
41
+ "function": metrics.roc_auc_score,
42
+ "is_lower_better": False,
43
+ },
44
+ "f1": {
45
+ "function": metrics.f1_score,
46
+ "is_lower_better": False,
47
+ },
48
+ "precision": {
49
+ "function": metrics.precision_score,
50
+ "is_lower_better": False,
51
+ },
52
+ "recall": {
53
+ "function": metrics.recall_score,
54
+ "is_lower_better": False,
55
+ },
56
+ "mse": {
57
+ "function": metrics.mean_squared_error,
58
+ "is_lower_better": True,
59
+ },
60
+ "mae": {
61
+ "function": metrics.mean_absolute_error,
62
+ "is_lower_better": True,
63
+ },
64
+ "r2": {
65
+ "function": metrics.r2_score,
66
+ "is_lower_better": False,
67
+ },
68
+ "mape": {
69
+ "function": metrics.mean_absolute_percentage_error,
70
+ "is_lower_better": True,
71
+ },
72
+ }
73
+
74
+
75
+ def _add_noise_std_dev(
76
+ values: List[float], x_std_dev: float
77
+ ) -> Tuple[List[float], float]:
78
+ """
79
+ Adds Gaussian noise to a list of values.
80
+ Args:
81
+ values (list[float]): A list of numerical values to which noise is added.
82
+ x_std_dev (float): A scaling factor for the standard deviation of the noise.
83
+ Returns:
84
+ tuple[list[float], float]: A tuple containing:
85
+ - A list of noisy values, where each value is the sum of the corresponding value
86
+ in the input list and a randomly generated value sampled from a Gaussian distribution
87
+ with mean 0 and standard deviation x_std_dev times the standard deviation of the input list.
88
+ - The standard deviation of the input list of values.
89
+ """
90
+ std_dev = np.std(values)
91
+ noise_list = np.random.normal(0, x_std_dev * std_dev, size=len(values))
92
+ noisy_values = list(map(add, noise_list, values))
93
+
94
+ return noisy_values
95
+
96
+
97
+ def _compute_metric(
98
+ dataset: VMDataset, model: VMModel, X: pd.DataFrame, metric: str
99
+ ) -> float:
100
+ if metric not in PERFORMANCE_METRICS:
101
+ raise ValueError(
102
+ f"Invalid metric: {metric}, expected one of {PERFORMANCE_METRICS.keys()}"
103
+ )
104
+
105
+ if metric == "auc":
106
+ try:
107
+ y_proba = model.predict_proba(X)
108
+ except MissingOrInvalidModelPredictFnError:
109
+ y_proba = model.predict(X)
110
+ return metrics.roc_auc_score(dataset.y, y_proba)
111
+
112
+ return PERFORMANCE_METRICS[metric]["function"](dataset.y, model.predict(X))
113
+
114
+
115
+ def _compute_gap(result: dict, metric: str) -> float:
116
+ if PERFORMANCE_METRICS[metric]["is_lower_better"]:
117
+ return result[metric.upper()][-1] - result[metric.upper()][0]
118
+
119
+ return result[metric.upper()][0] - result[metric.upper()][-1]
120
+
121
+
122
+ def _combine_results(results: List[dict]):
123
+ final_results = defaultdict(list)
124
+
125
+ # Interleave rows from each dictionary
126
+ for i in range(len(results[0]["Perturbation Size"])):
127
+ for result in results:
128
+ for key in result.keys():
129
+ final_results[key].append(result[key][i])
130
+
131
+ return pd.DataFrame(final_results)
132
+
133
+
134
+ def _plot_robustness(
135
+ results: pd.DataFrame, metric: str, threshold: float, columns: List[str], model: str
136
+ ):
137
+ fig = go.Figure()
138
+
139
+ datasets = results["Dataset"].unique()
140
+ pallete = [
141
+ f"#{int(r*255):02x}{int(g*255):02x}{int(b*255):02x}"
142
+ for r, g, b in sns.color_palette("husl", len(datasets))
143
+ ]
144
+
145
+ for i, dataset in enumerate(datasets):
146
+ dataset_results = results[results["Dataset"] == dataset]
147
+ fig.add_trace(
148
+ go.Scatter(
149
+ x=dataset_results["Perturbation Size"],
150
+ y=dataset_results[metric.upper()],
151
+ mode="lines+markers",
152
+ name=dataset,
153
+ line=dict(width=3, color=pallete[i]),
154
+ marker=dict(size=10),
155
+ )
156
+ )
157
+
158
+ if PERFORMANCE_METRICS[metric]["is_lower_better"]:
159
+ y_label = f"{metric.upper()} (lower is better)"
160
+ else:
161
+ threshold = -threshold
162
+ y_label = f"{metric.upper()} (higher is better)"
163
+
164
+ # add threshold lines
165
+ for i, dataset in enumerate(datasets):
166
+ baseline = results[results["Dataset"] == dataset][metric.upper()].iloc[0]
167
+ fig.add_trace(
168
+ go.Scatter(
169
+ x=results["Perturbation Size"].unique(),
170
+ y=[baseline + threshold] * len(results["Perturbation Size"].unique()),
171
+ mode="lines",
172
+ name=f"threshold_{dataset}",
173
+ line=dict(dash="dash", width=2, color=pallete[i]),
174
+ showlegend=True,
175
+ )
176
+ )
177
+
178
+ columns_lines = [""]
179
+ for column in columns:
180
+ # keep adding to the last line in list until character limit (40)
181
+ if len(columns_lines[-1]) + len(column) < 40:
182
+ columns_lines[-1] += f"{column}, "
183
+ else:
184
+ columns_lines.append(f"{column}, ")
185
+
186
+ fig.update_layout(
187
+ title=dict(
188
+ text=(
189
+ f"Model Robustness for '{model}'<br><sup>As determined by calculating "
190
+ f"{metric.upper()} decay in the presence of random gaussian noise</sup>"
191
+ ),
192
+ font=dict(size=20),
193
+ x=0.5,
194
+ xanchor="center",
195
+ ),
196
+ xaxis_title=dict(
197
+ text="Perturbation Size (X * Standard Deviation)",
198
+ ),
199
+ yaxis_title=dict(text=y_label),
200
+ plot_bgcolor="white",
201
+ margin=dict(t=60, b=80, r=20, l=60),
202
+ xaxis=dict(showgrid=True, gridcolor="lightgrey"),
203
+ yaxis=dict(showgrid=True, gridcolor="lightgrey"),
204
+ annotations=[
205
+ go.layout.Annotation(
206
+ text=f"Perturbed Features:<br><sup>{'<br>'.join(columns_lines)}</sup>",
207
+ align="left",
208
+ font=dict(size=14),
209
+ bordercolor="lightgrey",
210
+ borderwidth=1,
211
+ borderpad=4,
212
+ showarrow=False,
213
+ x=1.025,
214
+ xref="paper",
215
+ xanchor="left",
216
+ y=-0.15,
217
+ yref="paper",
218
+ )
219
+ ],
220
+ )
221
+
222
+ return fig
223
+
224
+
225
+ # TODO: make this a functional test instead of class-based when appropriate
226
+ # simply have to remove the class and rename this func to OverfitDiagnosis
227
+ def robustness_diagnosis(
228
+ model: VMModel,
229
+ datasets: List[VMDataset],
230
+ metric: str = None,
231
+ scaling_factor_std_dev_list: List[float] = DEFAULT_STD_DEV_LIST,
232
+ performance_decay_threshold: float = DEFAULT_DECAY_THRESHOLD,
233
+ ):
234
+ if not metric:
235
+ metric = (
236
+ DEFAULT_CLASSIFICATION_METRIC
237
+ if datasets[0].probability_column(model)
238
+ else DEFAULT_REGRESSION_METRIC
239
+ )
240
+ logger.info(f"Using default metric ({metric.upper()}) for robustness diagnosis")
241
+
242
+ if id(scaling_factor_std_dev_list) == id(DEFAULT_STD_DEV_LIST):
243
+ logger.info(
244
+ f"Using default scaling factors for the standard deviation of the noise: {DEFAULT_STD_DEV_LIST}"
245
+ )
246
+
247
+ if id(performance_decay_threshold) == id(DEFAULT_DECAY_THRESHOLD):
248
+ logger.info(
249
+ f"Using default performance decay threshold of {DEFAULT_DECAY_THRESHOLD}"
250
+ )
251
+
252
+ results = [{} for _ in range(len(datasets))]
253
+
254
+ # add baseline results (no perturbation)
255
+ for dataset, result in zip(datasets, results):
256
+ result["Perturbation Size"] = [0.0]
257
+ result["Dataset"] = [f"{dataset.input_id}"]
258
+ result["Row Count"] = [dataset._df.shape[0]]
259
+
260
+ result[metric.upper()] = [
261
+ _compute_metric(
262
+ dataset=dataset,
263
+ model=model,
264
+ X=dataset.x_df(),
265
+ metric=metric,
266
+ )
267
+ ]
268
+ result["Performance Decay"] = [0.0]
269
+ result["Passed"] = [True]
270
+
271
+ # Iterate scaling factor for the standard deviation list
272
+ for x_std_dev in scaling_factor_std_dev_list:
273
+ for dataset, result in zip(datasets, results):
274
+
275
+ result["Perturbation Size"].append(x_std_dev)
276
+ result["Dataset"].append(result["Dataset"][0])
277
+ result["Row Count"].append(result["Row Count"][0])
278
+
279
+ temp_df = dataset.x_df().copy()
280
+ for feature in dataset.feature_columns_numeric:
281
+ temp_df[feature] = _add_noise_std_dev(
282
+ values=temp_df[feature].to_list(),
283
+ x_std_dev=x_std_dev,
284
+ )
285
+
286
+ result[metric.upper()].append(
287
+ _compute_metric(
288
+ dataset=dataset,
289
+ model=model,
290
+ X=temp_df,
291
+ metric=metric,
292
+ )
293
+ )
294
+ result["Performance Decay"].append(_compute_gap(result, metric))
295
+ result["Passed"].append(
296
+ result["Performance Decay"][-1] < performance_decay_threshold
297
+ )
298
+
299
+ results_df = _combine_results(results)
300
+ fig = _plot_robustness(
301
+ results=results_df,
302
+ metric=metric,
303
+ threshold=performance_decay_threshold,
304
+ columns=datasets[0].feature_columns_numeric,
305
+ model=model.input_id,
306
+ )
307
+
308
+ # rename perturbation size for baseline
309
+ results_df["Perturbation Size"][
310
+ results_df["Perturbation Size"] == 0.0
311
+ ] = "Baseline (0.0)"
312
+
313
+ return results_df, fig
314
+
315
+
316
+ @dataclass
317
+ class RobustnessDiagnosis(ThresholdTest):
318
+ """Evaluate the robustness of a machine learning model to noise
319
+
320
+ Robustness refers to a model's ability to maintain a high level of performance in
321
+ the face of perturbations or changes (particularly noise) added to its input data.
322
+ This test is designed to help gauge how well the model can handle potential real-
323
+ world scenarios where the input data might be incomplete or corrupted.
324
+
325
+ ## Test Methodology
326
+ This test is conducted by adding Gaussian noise, proportional to a particular standard
327
+ deviation scale, to numeric input features of the input datasets. The model's
328
+ performance on the perturbed data is then evaluated using a user-defined metric or the
329
+ default metric of AUC for classification tasks and MSE for regression tasks. The results
330
+ are then plotted to visualize the model's performance decay as the perturbation size
331
+ increases.
332
+
333
+ When using this test, it is highly recommended to tailor the performance metric, list
334
+ of scaling factors for the standard deviation of the noise, and the performance decay
335
+ threshold to the specific use case of the model being evaluated.
336
+
337
+ **Inputs**:
338
+ - model (VMModel): The trained model to be evaluated.
339
+ - datasets (List[VMDataset]): A list of datasets to evaluate the model against.
340
+
341
+ ## Parameters
342
+ - metric (str, optional): The performance metric to be used for evaluation. If not
343
+ provided, the default metric is used based on the task of the model. Default values
344
+ are "auc" for classification tasks and "mse" for regression tasks.
345
+ - scaling_factor_std_dev_list (List[float], optional): A list of scaling factors for
346
+ the standard deviation of the noise to be added to the input features. The default
347
+ values are [0.1, 0.2, 0.3, 0.4, 0.5].
348
+ - performance_decay_threshold (float, optional): The threshold for the performance
349
+ decay of the model. The default value is 0.05.
350
+ """
351
+
352
+ name = "robustness"
353
+ required_inputs = ["model", "datasets"]
354
+ default_params = {
355
+ "metric": None,
356
+ "scaling_factor_std_dev_list": DEFAULT_STD_DEV_LIST,
357
+ "performance_decay_threshold": DEFAULT_DECAY_THRESHOLD,
358
+ }
359
+ tasks = ["classification", "regression"]
360
+ tags = [
361
+ "sklearn",
362
+ "model_diagnosis",
363
+ "visualization",
364
+ ]
365
+
366
+ def run(self):
367
+ results, fig = robustness_diagnosis(
368
+ model=self.inputs.model,
369
+ datasets=self.inputs.datasets,
370
+ metric=self.params["metric"],
371
+ scaling_factor_std_dev_list=self.params["scaling_factor_std_dev_list"],
372
+ performance_decay_threshold=self.params["performance_decay_threshold"],
373
+ )
374
+
375
+ return self.cache_results(
376
+ passed=results["Passed"].all(),
377
+ test_results_list=[
378
+ ThresholdTestResult(
379
+ test_name=self.params["metric"],
380
+ passed=results["Passed"].all(),
381
+ values=results.to_dict(orient="records"),
382
+ )
383
+ ],
384
+ figures=[
385
+ Figure(
386
+ for_object=self,
387
+ key=f"{self.name}:{self.params['metric']}",
388
+ figure=fig,
389
+ )
390
+ ],
391
+ )
392
+
393
+ def summary(self, results: List[ThresholdTestResult], _):
394
+ return ResultSummary(
395
+ results=[
396
+ ResultTable(
397
+ data=results[0].values,
398
+ metadata=ResultTableMetadata(title="Robustness Diagnosis Results"),
399
+ )
400
+ ]
401
+ )
402
+
403
+ def test(self):
404
+ """Unit Test for Robustness Diagnosis Threshold Test"""
405
+ # Verify the result object is present
406
+ assert self.result is not None
407
+
408
+ # Verify test results and their type
409
+ assert isinstance(self.result.test_results.results, list)
410
+
411
+ # Check for presence and validity of 'values' and 'passed' flag in each result
412
+ for test_result in self.result.test_results.results:
413
+ assert "values" in test_result.__dict__
414
+ assert "passed" in test_result.__dict__
415
+ assert isinstance(test_result.values, list)
@@ -405,7 +405,7 @@ def run_test(
405
405
 
406
406
  if unit_metrics:
407
407
  metric_id_name = "".join(word[0].upper() + word[1:] for word in name.split())
408
- test_id = f"validmind.composite_test.{metric_id_name}"
408
+ test_id = f"validmind.composite_metric.{metric_id_name}"
409
409
 
410
410
  error, TestClass = load_composite_metric(
411
411
  unit_metrics=unit_metrics, metric_name=metric_id_name