validmind 2.5.8__tar.gz → 2.5.18__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (365) hide show
  1. {validmind-2.5.8 → validmind-2.5.18}/PKG-INFO +4 -3
  2. {validmind-2.5.8 → validmind-2.5.18}/pyproject.toml +5 -4
  3. validmind-2.5.18/validmind/__version__.py +1 -0
  4. validmind-2.5.18/validmind/ai/test_descriptions.py +240 -0
  5. validmind-2.5.18/validmind/ai/test_result_description/config.yaml +29 -0
  6. validmind-2.5.18/validmind/ai/test_result_description/context.py +73 -0
  7. validmind-2.5.18/validmind/ai/test_result_description/image_processing.py +124 -0
  8. validmind-2.5.18/validmind/ai/test_result_description/system.jinja +39 -0
  9. validmind-2.5.18/validmind/ai/test_result_description/user.jinja +25 -0
  10. {validmind-2.5.8 → validmind-2.5.18}/validmind/api_client.py +89 -43
  11. {validmind-2.5.8 → validmind-2.5.18}/validmind/client.py +2 -2
  12. {validmind-2.5.8 → validmind-2.5.18}/validmind/client_config.py +11 -14
  13. {validmind-2.5.8 → validmind-2.5.18}/validmind/datasets/credit_risk/__init__.py +1 -0
  14. validmind-2.5.18/validmind/datasets/credit_risk/datasets/lending_club_biased.csv.gz +0 -0
  15. validmind-2.5.18/validmind/datasets/credit_risk/lending_club_bias.py +142 -0
  16. validmind-2.5.18/validmind/datasets/regression/fred_timeseries.py +201 -0
  17. {validmind-2.5.8 → validmind-2.5.18}/validmind/template.py +1 -0
  18. {validmind-2.5.8 → validmind-2.5.18}/validmind/test_suites/__init__.py +0 -2
  19. {validmind-2.5.8 → validmind-2.5.18}/validmind/test_suites/statsmodels_timeseries.py +1 -1
  20. {validmind-2.5.8 → validmind-2.5.18}/validmind/test_suites/summarization.py +0 -1
  21. {validmind-2.5.8 → validmind-2.5.18}/validmind/test_suites/time_series.py +0 -43
  22. {validmind-2.5.8 → validmind-2.5.18}/validmind/tests/__types__.py +14 -15
  23. {validmind-2.5.8 → validmind-2.5.18}/validmind/tests/data_validation/ACFandPACFPlot.py +15 -13
  24. {validmind-2.5.8 → validmind-2.5.18}/validmind/tests/data_validation/ADF.py +31 -24
  25. {validmind-2.5.8 → validmind-2.5.18}/validmind/tests/data_validation/AutoAR.py +9 -9
  26. {validmind-2.5.8 → validmind-2.5.18}/validmind/tests/data_validation/AutoMA.py +23 -16
  27. {validmind-2.5.8 → validmind-2.5.18}/validmind/tests/data_validation/AutoSeasonality.py +18 -16
  28. {validmind-2.5.8 → validmind-2.5.18}/validmind/tests/data_validation/AutoStationarity.py +21 -16
  29. validmind-2.5.18/validmind/tests/data_validation/BivariateScatterPlots.py +82 -0
  30. {validmind-2.5.8/validmind/tests/model_validation/statsmodels → validmind-2.5.18/validmind/tests/data_validation}/BoxPierce.py +34 -34
  31. validmind-2.5.18/validmind/tests/data_validation/ChiSquaredFeaturesTable.py +102 -0
  32. {validmind-2.5.8 → validmind-2.5.18}/validmind/tests/data_validation/ClassImbalance.py +15 -12
  33. {validmind-2.5.8 → validmind-2.5.18}/validmind/tests/data_validation/DFGLSArch.py +19 -13
  34. {validmind-2.5.8 → validmind-2.5.18}/validmind/tests/data_validation/DatasetDescription.py +17 -11
  35. {validmind-2.5.8 → validmind-2.5.18}/validmind/tests/data_validation/DatasetSplit.py +7 -5
  36. {validmind-2.5.8 → validmind-2.5.18}/validmind/tests/data_validation/DescriptiveStatistics.py +28 -21
  37. {validmind-2.5.8 → validmind-2.5.18}/validmind/tests/data_validation/Duplicates.py +33 -25
  38. {validmind-2.5.8 → validmind-2.5.18}/validmind/tests/data_validation/EngleGrangerCoint.py +35 -33
  39. validmind-2.5.18/validmind/tests/data_validation/FeatureTargetCorrelationPlot.py +97 -0
  40. {validmind-2.5.8 → validmind-2.5.18}/validmind/tests/data_validation/HighCardinality.py +19 -12
  41. {validmind-2.5.8 → validmind-2.5.18}/validmind/tests/data_validation/HighPearsonCorrelation.py +27 -22
  42. {validmind-2.5.8 → validmind-2.5.18}/validmind/tests/data_validation/IQROutliersBarPlot.py +13 -10
  43. {validmind-2.5.8 → validmind-2.5.18}/validmind/tests/data_validation/IQROutliersTable.py +40 -36
  44. {validmind-2.5.8 → validmind-2.5.18}/validmind/tests/data_validation/IsolationForestOutliers.py +21 -14
  45. validmind-2.5.18/validmind/tests/data_validation/JarqueBera.py +70 -0
  46. {validmind-2.5.8 → validmind-2.5.18}/validmind/tests/data_validation/KPSS.py +34 -29
  47. validmind-2.5.18/validmind/tests/data_validation/LJungBox.py +66 -0
  48. {validmind-2.5.8 → validmind-2.5.18}/validmind/tests/data_validation/LaggedCorrelationHeatmap.py +22 -15
  49. {validmind-2.5.8 → validmind-2.5.18}/validmind/tests/data_validation/MissingValues.py +32 -27
  50. {validmind-2.5.8 → validmind-2.5.18}/validmind/tests/data_validation/MissingValuesBarPlot.py +25 -21
  51. validmind-2.5.18/validmind/tests/data_validation/PearsonCorrelationMatrix.py +91 -0
  52. {validmind-2.5.8 → validmind-2.5.18}/validmind/tests/data_validation/PhillipsPerronArch.py +37 -30
  53. validmind-2.5.18/validmind/tests/data_validation/ProtectedClassesCombination.py +197 -0
  54. validmind-2.5.18/validmind/tests/data_validation/ProtectedClassesDescription.py +130 -0
  55. validmind-2.5.18/validmind/tests/data_validation/ProtectedClassesDisparity.py +133 -0
  56. validmind-2.5.18/validmind/tests/data_validation/ProtectedClassesThresholdOptimizer.py +172 -0
  57. {validmind-2.5.8 → validmind-2.5.18}/validmind/tests/data_validation/RollingStatsPlot.py +31 -23
  58. validmind-2.5.18/validmind/tests/data_validation/RunsTest.py +72 -0
  59. validmind-2.5.18/validmind/tests/data_validation/ScatterPlot.py +75 -0
  60. {validmind-2.5.8 → validmind-2.5.18}/validmind/tests/data_validation/SeasonalDecompose.py +38 -34
  61. {validmind-2.5.8/validmind/tests/model_validation/statsmodels → validmind-2.5.18/validmind/tests/data_validation}/ShapiroWilk.py +35 -30
  62. validmind-2.5.18/validmind/tests/data_validation/Skewness.py +115 -0
  63. {validmind-2.5.8 → validmind-2.5.18}/validmind/tests/data_validation/SpreadPlot.py +35 -35
  64. {validmind-2.5.8 → validmind-2.5.18}/validmind/tests/data_validation/TabularCategoricalBarPlots.py +23 -17
  65. {validmind-2.5.8 → validmind-2.5.18}/validmind/tests/data_validation/TabularDateTimeHistograms.py +21 -13
  66. {validmind-2.5.8 → validmind-2.5.18}/validmind/tests/data_validation/TabularDescriptionTables.py +51 -16
  67. {validmind-2.5.8 → validmind-2.5.18}/validmind/tests/data_validation/TabularNumericalHistograms.py +25 -22
  68. {validmind-2.5.8 → validmind-2.5.18}/validmind/tests/data_validation/TargetRateBarPlots.py +21 -14
  69. {validmind-2.5.8 → validmind-2.5.18}/validmind/tests/data_validation/TimeSeriesDescription.py +25 -18
  70. {validmind-2.5.8 → validmind-2.5.18}/validmind/tests/data_validation/TimeSeriesDescriptiveStatistics.py +23 -17
  71. {validmind-2.5.8 → validmind-2.5.18}/validmind/tests/data_validation/TimeSeriesFrequency.py +24 -17
  72. validmind-2.5.18/validmind/tests/data_validation/TimeSeriesHistogram.py +78 -0
  73. {validmind-2.5.8 → validmind-2.5.18}/validmind/tests/data_validation/TimeSeriesLinePlot.py +17 -10
  74. {validmind-2.5.8 → validmind-2.5.18}/validmind/tests/data_validation/TimeSeriesMissingValues.py +15 -10
  75. {validmind-2.5.8 → validmind-2.5.18}/validmind/tests/data_validation/TimeSeriesOutliers.py +37 -33
  76. {validmind-2.5.8 → validmind-2.5.18}/validmind/tests/data_validation/TooManyZeroValues.py +16 -11
  77. {validmind-2.5.8 → validmind-2.5.18}/validmind/tests/data_validation/UniqueRows.py +11 -6
  78. {validmind-2.5.8 → validmind-2.5.18}/validmind/tests/data_validation/WOEBinPlots.py +23 -16
  79. {validmind-2.5.8 → validmind-2.5.18}/validmind/tests/data_validation/WOEBinTable.py +35 -30
  80. {validmind-2.5.8 → validmind-2.5.18}/validmind/tests/data_validation/ZivotAndrewsArch.py +34 -28
  81. {validmind-2.5.8 → validmind-2.5.18}/validmind/tests/data_validation/nlp/CommonWords.py +21 -14
  82. validmind-2.5.18/validmind/tests/data_validation/nlp/Hashtags.py +101 -0
  83. validmind-2.5.18/validmind/tests/data_validation/nlp/LanguageDetection.py +78 -0
  84. {validmind-2.5.8 → validmind-2.5.18}/validmind/tests/data_validation/nlp/Mentions.py +21 -15
  85. validmind-2.5.18/validmind/tests/data_validation/nlp/PolarityAndSubjectivity.py +72 -0
  86. {validmind-2.5.8 → validmind-2.5.18}/validmind/tests/data_validation/nlp/Punctuations.py +24 -20
  87. validmind-2.5.18/validmind/tests/data_validation/nlp/Sentiment.py +76 -0
  88. {validmind-2.5.8 → validmind-2.5.18}/validmind/tests/data_validation/nlp/StopWords.py +26 -19
  89. {validmind-2.5.8 → validmind-2.5.18}/validmind/tests/data_validation/nlp/TextDescription.py +39 -36
  90. validmind-2.5.18/validmind/tests/data_validation/nlp/Toxicity.py +68 -0
  91. {validmind-2.5.8 → validmind-2.5.18}/validmind/tests/decorator.py +81 -42
  92. {validmind-2.5.8 → validmind-2.5.18}/validmind/tests/model_validation/BertScore.py +36 -27
  93. {validmind-2.5.8 → validmind-2.5.18}/validmind/tests/model_validation/BleuScore.py +25 -19
  94. validmind-2.5.18/validmind/tests/model_validation/ClusterSizeDistribution.py +98 -0
  95. {validmind-2.5.8 → validmind-2.5.18}/validmind/tests/model_validation/ContextualRecall.py +38 -13
  96. {validmind-2.5.8 → validmind-2.5.18}/validmind/tests/model_validation/FeaturesAUC.py +32 -13
  97. {validmind-2.5.8 → validmind-2.5.18}/validmind/tests/model_validation/MeteorScore.py +46 -33
  98. validmind-2.5.8/validmind/tests/model_validation/ModelMetadataComparison.py → validmind-2.5.18/validmind/tests/model_validation/ModelMetadata.py +12 -13
  99. validmind-2.5.18/validmind/tests/model_validation/ModelPredictionResiduals.py +105 -0
  100. {validmind-2.5.8 → validmind-2.5.18}/validmind/tests/model_validation/RegardScore.py +30 -14
  101. {validmind-2.5.8 → validmind-2.5.18}/validmind/tests/model_validation/RegressionResidualsPlot.py +10 -5
  102. {validmind-2.5.8 → validmind-2.5.18}/validmind/tests/model_validation/RougeScore.py +36 -30
  103. {validmind-2.5.8 → validmind-2.5.18}/validmind/tests/model_validation/TimeSeriesPredictionWithCI.py +30 -14
  104. validmind-2.5.18/validmind/tests/model_validation/TimeSeriesPredictionsPlot.py +73 -0
  105. validmind-2.5.18/validmind/tests/model_validation/TimeSeriesR2SquareBySegments.py +108 -0
  106. {validmind-2.5.8 → validmind-2.5.18}/validmind/tests/model_validation/TokenDisparity.py +31 -23
  107. {validmind-2.5.8 → validmind-2.5.18}/validmind/tests/model_validation/ToxicityScore.py +26 -17
  108. {validmind-2.5.8 → validmind-2.5.18}/validmind/tests/model_validation/embeddings/ClusterDistribution.py +24 -20
  109. {validmind-2.5.8 → validmind-2.5.18}/validmind/tests/model_validation/embeddings/CosineSimilarityComparison.py +30 -27
  110. {validmind-2.5.8 → validmind-2.5.18}/validmind/tests/model_validation/embeddings/CosineSimilarityDistribution.py +7 -5
  111. {validmind-2.5.8 → validmind-2.5.18}/validmind/tests/model_validation/embeddings/CosineSimilarityHeatmap.py +32 -23
  112. {validmind-2.5.8 → validmind-2.5.18}/validmind/tests/model_validation/embeddings/DescriptiveAnalytics.py +7 -5
  113. {validmind-2.5.8 → validmind-2.5.18}/validmind/tests/model_validation/embeddings/EmbeddingsVisualization2D.py +15 -11
  114. {validmind-2.5.8 → validmind-2.5.18}/validmind/tests/model_validation/embeddings/EuclideanDistanceComparison.py +29 -29
  115. {validmind-2.5.8 → validmind-2.5.18}/validmind/tests/model_validation/embeddings/EuclideanDistanceHeatmap.py +34 -25
  116. {validmind-2.5.8 → validmind-2.5.18}/validmind/tests/model_validation/embeddings/PCAComponentsPairwisePlots.py +38 -26
  117. {validmind-2.5.8 → validmind-2.5.18}/validmind/tests/model_validation/embeddings/StabilityAnalysis.py +40 -1
  118. {validmind-2.5.8 → validmind-2.5.18}/validmind/tests/model_validation/embeddings/StabilityAnalysisKeyword.py +18 -17
  119. {validmind-2.5.8 → validmind-2.5.18}/validmind/tests/model_validation/embeddings/StabilityAnalysisRandomNoise.py +40 -45
  120. {validmind-2.5.8 → validmind-2.5.18}/validmind/tests/model_validation/embeddings/StabilityAnalysisSynonyms.py +17 -19
  121. {validmind-2.5.8 → validmind-2.5.18}/validmind/tests/model_validation/embeddings/StabilityAnalysisTranslation.py +29 -25
  122. {validmind-2.5.8 → validmind-2.5.18}/validmind/tests/model_validation/embeddings/TSNEComponentsPairwisePlots.py +38 -28
  123. {validmind-2.5.8 → validmind-2.5.18}/validmind/tests/model_validation/ragas/AnswerCorrectness.py +5 -4
  124. {validmind-2.5.8 → validmind-2.5.18}/validmind/tests/model_validation/ragas/AnswerRelevance.py +5 -4
  125. {validmind-2.5.8 → validmind-2.5.18}/validmind/tests/model_validation/ragas/AnswerSimilarity.py +5 -4
  126. {validmind-2.5.8 → validmind-2.5.18}/validmind/tests/model_validation/ragas/AspectCritique.py +12 -6
  127. {validmind-2.5.8 → validmind-2.5.18}/validmind/tests/model_validation/ragas/ContextEntityRecall.py +9 -8
  128. {validmind-2.5.8 → validmind-2.5.18}/validmind/tests/model_validation/ragas/ContextPrecision.py +5 -4
  129. {validmind-2.5.8 → validmind-2.5.18}/validmind/tests/model_validation/ragas/ContextRecall.py +5 -4
  130. validmind-2.5.18/validmind/tests/model_validation/ragas/ContextUtilization.py +155 -0
  131. {validmind-2.5.8 → validmind-2.5.18}/validmind/tests/model_validation/ragas/Faithfulness.py +5 -4
  132. validmind-2.5.18/validmind/tests/model_validation/ragas/NoiseSensitivity.py +152 -0
  133. {validmind-2.5.8 → validmind-2.5.18}/validmind/tests/model_validation/ragas/utils.py +6 -0
  134. {validmind-2.5.8 → validmind-2.5.18}/validmind/tests/model_validation/sklearn/AdjustedMutualInformation.py +19 -12
  135. {validmind-2.5.8 → validmind-2.5.18}/validmind/tests/model_validation/sklearn/AdjustedRandIndex.py +22 -17
  136. {validmind-2.5.8 → validmind-2.5.18}/validmind/tests/model_validation/sklearn/ClassifierPerformance.py +27 -25
  137. {validmind-2.5.8 → validmind-2.5.18}/validmind/tests/model_validation/sklearn/ClusterCosineSimilarity.py +7 -5
  138. validmind-2.5.18/validmind/tests/model_validation/sklearn/ClusterPerformance.py +80 -0
  139. {validmind-2.5.8 → validmind-2.5.18}/validmind/tests/model_validation/sklearn/ClusterPerformanceMetrics.py +15 -17
  140. {validmind-2.5.8 → validmind-2.5.18}/validmind/tests/model_validation/sklearn/CompletenessScore.py +17 -11
  141. {validmind-2.5.8 → validmind-2.5.18}/validmind/tests/model_validation/sklearn/ConfusionMatrix.py +22 -15
  142. validmind-2.5.18/validmind/tests/model_validation/sklearn/FeatureImportance.py +95 -0
  143. {validmind-2.5.8 → validmind-2.5.18}/validmind/tests/model_validation/sklearn/FowlkesMallowsScore.py +7 -7
  144. {validmind-2.5.8 → validmind-2.5.18}/validmind/tests/model_validation/sklearn/HomogeneityScore.py +19 -12
  145. {validmind-2.5.8 → validmind-2.5.18}/validmind/tests/model_validation/sklearn/HyperParametersTuning.py +35 -30
  146. {validmind-2.5.8 → validmind-2.5.18}/validmind/tests/model_validation/sklearn/KMeansClustersOptimization.py +10 -5
  147. {validmind-2.5.8 → validmind-2.5.18}/validmind/tests/model_validation/sklearn/MinimumAccuracy.py +32 -32
  148. {validmind-2.5.8 → validmind-2.5.18}/validmind/tests/model_validation/sklearn/MinimumF1Score.py +23 -23
  149. {validmind-2.5.8 → validmind-2.5.18}/validmind/tests/model_validation/sklearn/MinimumROCAUCScore.py +15 -10
  150. {validmind-2.5.8 → validmind-2.5.18}/validmind/tests/model_validation/sklearn/ModelsPerformanceComparison.py +26 -19
  151. {validmind-2.5.8 → validmind-2.5.18}/validmind/tests/model_validation/sklearn/OverfitDiagnosis.py +38 -18
  152. {validmind-2.5.8 → validmind-2.5.18}/validmind/tests/model_validation/sklearn/PermutationFeatureImportance.py +32 -26
  153. {validmind-2.5.8 → validmind-2.5.18}/validmind/tests/model_validation/sklearn/PopulationStabilityIndex.py +8 -6
  154. {validmind-2.5.8 → validmind-2.5.18}/validmind/tests/model_validation/sklearn/PrecisionRecallCurve.py +24 -17
  155. {validmind-2.5.8 → validmind-2.5.18}/validmind/tests/model_validation/sklearn/ROCCurve.py +12 -7
  156. validmind-2.5.18/validmind/tests/model_validation/sklearn/RegressionErrors.py +86 -0
  157. {validmind-2.5.8 → validmind-2.5.18}/validmind/tests/model_validation/sklearn/RegressionErrorsComparison.py +27 -12
  158. validmind-2.5.8/validmind/tests/model_validation/sklearn/RegressionModelsPerformanceComparison.py → validmind-2.5.18/validmind/tests/model_validation/sklearn/RegressionPerformance.py +18 -20
  159. validmind-2.5.18/validmind/tests/model_validation/sklearn/RegressionR2Square.py +67 -0
  160. validmind-2.5.18/validmind/tests/model_validation/sklearn/RegressionR2SquareComparison.py +82 -0
  161. {validmind-2.5.8 → validmind-2.5.18}/validmind/tests/model_validation/sklearn/RobustnessDiagnosis.py +36 -32
  162. {validmind-2.5.8 → validmind-2.5.18}/validmind/tests/model_validation/sklearn/SHAPGlobalImportance.py +66 -5
  163. {validmind-2.5.8 → validmind-2.5.18}/validmind/tests/model_validation/sklearn/SilhouettePlot.py +27 -19
  164. {validmind-2.5.8 → validmind-2.5.18}/validmind/tests/model_validation/sklearn/TrainingTestDegradation.py +25 -18
  165. {validmind-2.5.8 → validmind-2.5.18}/validmind/tests/model_validation/sklearn/VMeasure.py +14 -13
  166. {validmind-2.5.8 → validmind-2.5.18}/validmind/tests/model_validation/sklearn/WeakspotsDiagnosis.py +7 -5
  167. {validmind-2.5.8 → validmind-2.5.18}/validmind/tests/model_validation/statsmodels/AutoARIMA.py +24 -18
  168. validmind-2.5.18/validmind/tests/model_validation/statsmodels/CumulativePredictionProbabilities.py +108 -0
  169. validmind-2.5.18/validmind/tests/model_validation/statsmodels/DurbinWatsonTest.py +86 -0
  170. validmind-2.5.18/validmind/tests/model_validation/statsmodels/GINITable.py +87 -0
  171. validmind-2.5.18/validmind/tests/model_validation/statsmodels/KolmogorovSmirnov.py +94 -0
  172. {validmind-2.5.8 → validmind-2.5.18}/validmind/tests/model_validation/statsmodels/Lilliefors.py +27 -24
  173. validmind-2.5.18/validmind/tests/model_validation/statsmodels/PredictionProbabilitiesHistogram.py +102 -0
  174. validmind-2.5.18/validmind/tests/model_validation/statsmodels/RegressionCoeffs.py +100 -0
  175. {validmind-2.5.8 → validmind-2.5.18}/validmind/tests/model_validation/statsmodels/RegressionFeatureSignificance.py +14 -9
  176. {validmind-2.5.8 → validmind-2.5.18}/validmind/tests/model_validation/statsmodels/RegressionModelForecastPlot.py +17 -13
  177. {validmind-2.5.8 → validmind-2.5.18}/validmind/tests/model_validation/statsmodels/RegressionModelForecastPlotLevels.py +46 -43
  178. {validmind-2.5.8 → validmind-2.5.18}/validmind/tests/model_validation/statsmodels/RegressionModelSensitivityPlot.py +38 -36
  179. {validmind-2.5.8 → validmind-2.5.18}/validmind/tests/model_validation/statsmodels/RegressionModelSummary.py +30 -28
  180. {validmind-2.5.8 → validmind-2.5.18}/validmind/tests/model_validation/statsmodels/RegressionPermutationFeatureImportance.py +18 -11
  181. validmind-2.5.18/validmind/tests/model_validation/statsmodels/ScorecardHistogram.py +105 -0
  182. {validmind-2.5.8 → validmind-2.5.18}/validmind/tests/ongoing_monitoring/FeatureDrift.py +10 -6
  183. {validmind-2.5.8 → validmind-2.5.18}/validmind/tests/ongoing_monitoring/PredictionAcrossEachFeature.py +31 -25
  184. {validmind-2.5.8 → validmind-2.5.18}/validmind/tests/ongoing_monitoring/PredictionCorrelation.py +29 -21
  185. {validmind-2.5.8 → validmind-2.5.18}/validmind/tests/ongoing_monitoring/TargetPredictionDistributionPlot.py +31 -23
  186. {validmind-2.5.8 → validmind-2.5.18}/validmind/tests/prompt_validation/Bias.py +14 -11
  187. {validmind-2.5.8 → validmind-2.5.18}/validmind/tests/prompt_validation/Clarity.py +16 -14
  188. {validmind-2.5.8 → validmind-2.5.18}/validmind/tests/prompt_validation/Conciseness.py +7 -5
  189. {validmind-2.5.8 → validmind-2.5.18}/validmind/tests/prompt_validation/Delimitation.py +23 -22
  190. {validmind-2.5.8 → validmind-2.5.18}/validmind/tests/prompt_validation/NegativeInstruction.py +7 -5
  191. {validmind-2.5.8 → validmind-2.5.18}/validmind/tests/prompt_validation/Robustness.py +12 -10
  192. {validmind-2.5.8 → validmind-2.5.18}/validmind/tests/prompt_validation/Specificity.py +13 -11
  193. {validmind-2.5.8 → validmind-2.5.18}/validmind/tests/prompt_validation/ai_powered_test.py +6 -0
  194. {validmind-2.5.8 → validmind-2.5.18}/validmind/tests/run.py +68 -23
  195. validmind-2.5.18/validmind/unit_metrics/__init__.py +171 -0
  196. {validmind-2.5.8/validmind/unit_metrics/classification/sklearn → validmind-2.5.18/validmind/unit_metrics/classification}/Accuracy.py +1 -1
  197. {validmind-2.5.8/validmind/unit_metrics/classification/sklearn → validmind-2.5.18/validmind/unit_metrics/classification}/F1.py +1 -1
  198. {validmind-2.5.8/validmind/unit_metrics/classification/sklearn → validmind-2.5.18/validmind/unit_metrics/classification}/Precision.py +1 -1
  199. {validmind-2.5.8/validmind/unit_metrics/classification/sklearn → validmind-2.5.18/validmind/unit_metrics/classification}/ROC_AUC.py +1 -2
  200. {validmind-2.5.8/validmind/unit_metrics/classification/sklearn → validmind-2.5.18/validmind/unit_metrics/classification}/Recall.py +1 -1
  201. {validmind-2.5.8/validmind/unit_metrics/regression/sklearn → validmind-2.5.18/validmind/unit_metrics/regression}/AdjustedRSquaredScore.py +1 -1
  202. {validmind-2.5.8 → validmind-2.5.18}/validmind/unit_metrics/regression/GiniCoefficient.py +1 -1
  203. {validmind-2.5.8 → validmind-2.5.18}/validmind/unit_metrics/regression/HuberLoss.py +1 -1
  204. {validmind-2.5.8 → validmind-2.5.18}/validmind/unit_metrics/regression/KolmogorovSmirnovStatistic.py +1 -1
  205. {validmind-2.5.8/validmind/unit_metrics/regression/sklearn → validmind-2.5.18/validmind/unit_metrics/regression}/MeanAbsoluteError.py +1 -1
  206. {validmind-2.5.8 → validmind-2.5.18}/validmind/unit_metrics/regression/MeanAbsolutePercentageError.py +1 -1
  207. {validmind-2.5.8 → validmind-2.5.18}/validmind/unit_metrics/regression/MeanBiasDeviation.py +1 -1
  208. {validmind-2.5.8/validmind/unit_metrics/regression/sklearn → validmind-2.5.18/validmind/unit_metrics/regression}/MeanSquaredError.py +1 -1
  209. {validmind-2.5.8 → validmind-2.5.18}/validmind/unit_metrics/regression/QuantileLoss.py +1 -1
  210. {validmind-2.5.8/validmind/unit_metrics/regression/sklearn → validmind-2.5.18/validmind/unit_metrics/regression}/RSquaredScore.py +1 -1
  211. {validmind-2.5.8/validmind/unit_metrics/regression/sklearn → validmind-2.5.18/validmind/unit_metrics/regression}/RootMeanSquaredError.py +1 -1
  212. {validmind-2.5.8 → validmind-2.5.18}/validmind/utils.py +4 -0
  213. {validmind-2.5.8 → validmind-2.5.18}/validmind/vm_models/dataset/dataset.py +2 -0
  214. {validmind-2.5.8 → validmind-2.5.18}/validmind/vm_models/figure.py +5 -0
  215. {validmind-2.5.8 → validmind-2.5.18}/validmind/vm_models/test/metric.py +1 -0
  216. {validmind-2.5.8 → validmind-2.5.18}/validmind/vm_models/test/result_wrapper.py +143 -158
  217. {validmind-2.5.8 → validmind-2.5.18}/validmind/vm_models/test/threshold_test.py +1 -0
  218. validmind-2.5.8/validmind/__version__.py +0 -1
  219. validmind-2.5.8/validmind/ai/test_descriptions.py +0 -279
  220. validmind-2.5.8/validmind/datasets/regression/fred_timeseries.py +0 -272
  221. validmind-2.5.8/validmind/tests/data_validation/ANOVAOneWayTable.py +0 -138
  222. validmind-2.5.8/validmind/tests/data_validation/BivariateFeaturesBarPlots.py +0 -142
  223. validmind-2.5.8/validmind/tests/data_validation/BivariateHistograms.py +0 -117
  224. validmind-2.5.8/validmind/tests/data_validation/BivariateScatterPlots.py +0 -111
  225. validmind-2.5.8/validmind/tests/data_validation/ChiSquaredFeaturesTable.py +0 -141
  226. validmind-2.5.8/validmind/tests/data_validation/FeatureTargetCorrelationPlot.py +0 -109
  227. validmind-2.5.8/validmind/tests/data_validation/HeatmapFeatureCorrelations.py +0 -124
  228. validmind-2.5.8/validmind/tests/data_validation/MissingValuesRisk.py +0 -88
  229. validmind-2.5.8/validmind/tests/data_validation/PearsonCorrelationMatrix.py +0 -104
  230. validmind-2.5.8/validmind/tests/data_validation/ScatterPlot.py +0 -90
  231. validmind-2.5.8/validmind/tests/data_validation/Skewness.py +0 -117
  232. validmind-2.5.8/validmind/tests/data_validation/TimeSeriesHistogram.py +0 -77
  233. validmind-2.5.8/validmind/tests/data_validation/nlp/Hashtags.py +0 -99
  234. validmind-2.5.8/validmind/tests/data_validation/nlp/LanguageDetection.py +0 -59
  235. validmind-2.5.8/validmind/tests/data_validation/nlp/PolarityAndSubjectivity.py +0 -49
  236. validmind-2.5.8/validmind/tests/data_validation/nlp/Sentiment.py +0 -57
  237. validmind-2.5.8/validmind/tests/data_validation/nlp/Toxicity.py +0 -45
  238. validmind-2.5.8/validmind/tests/model_validation/ClusterSizeDistribution.py +0 -94
  239. validmind-2.5.8/validmind/tests/model_validation/ModelMetadata.py +0 -90
  240. validmind-2.5.8/validmind/tests/model_validation/ModelPredictionResiduals.py +0 -103
  241. validmind-2.5.8/validmind/tests/model_validation/TimeSeriesPredictionsPlot.py +0 -76
  242. validmind-2.5.8/validmind/tests/model_validation/TimeSeriesR2SquareBySegments.py +0 -103
  243. validmind-2.5.8/validmind/tests/model_validation/sklearn/ClusterPerformance.py +0 -118
  244. validmind-2.5.8/validmind/tests/model_validation/sklearn/FeatureImportanceComparison.py +0 -83
  245. validmind-2.5.8/validmind/tests/model_validation/sklearn/RegressionErrors.py +0 -142
  246. validmind-2.5.8/validmind/tests/model_validation/sklearn/RegressionR2Square.py +0 -106
  247. validmind-2.5.8/validmind/tests/model_validation/sklearn/RegressionR2SquareComparison.py +0 -63
  248. validmind-2.5.8/validmind/tests/model_validation/statsmodels/CumulativePredictionProbabilities.py +0 -139
  249. validmind-2.5.8/validmind/tests/model_validation/statsmodels/DurbinWatsonTest.py +0 -59
  250. validmind-2.5.8/validmind/tests/model_validation/statsmodels/GINITable.py +0 -120
  251. validmind-2.5.8/validmind/tests/model_validation/statsmodels/JarqueBera.py +0 -73
  252. validmind-2.5.8/validmind/tests/model_validation/statsmodels/KolmogorovSmirnov.py +0 -95
  253. validmind-2.5.8/validmind/tests/model_validation/statsmodels/LJungBox.py +0 -66
  254. validmind-2.5.8/validmind/tests/model_validation/statsmodels/PredictionProbabilitiesHistogram.py +0 -135
  255. validmind-2.5.8/validmind/tests/model_validation/statsmodels/RegressionCoeffsPlot.py +0 -135
  256. validmind-2.5.8/validmind/tests/model_validation/statsmodels/RegressionModelsCoeffs.py +0 -103
  257. validmind-2.5.8/validmind/tests/model_validation/statsmodels/RunsTest.py +0 -71
  258. validmind-2.5.8/validmind/tests/model_validation/statsmodels/ScorecardHistogram.py +0 -137
  259. validmind-2.5.8/validmind/unit_metrics/__init__.py +0 -234
  260. {validmind-2.5.8 → validmind-2.5.18}/LICENSE +0 -0
  261. {validmind-2.5.8 → validmind-2.5.18}/README.pypi.md +0 -0
  262. {validmind-2.5.8 → validmind-2.5.18}/validmind/__init__.py +0 -0
  263. {validmind-2.5.8 → validmind-2.5.18}/validmind/ai/utils.py +0 -0
  264. {validmind-2.5.8 → validmind-2.5.18}/validmind/datasets/__init__.py +0 -0
  265. {validmind-2.5.8 → validmind-2.5.18}/validmind/datasets/classification/__init__.py +0 -0
  266. {validmind-2.5.8 → validmind-2.5.18}/validmind/datasets/classification/customer_churn.py +0 -0
  267. {validmind-2.5.8 → validmind-2.5.18}/validmind/datasets/classification/datasets/bank_customer_churn.csv +0 -0
  268. {validmind-2.5.8 → validmind-2.5.18}/validmind/datasets/classification/datasets/taiwan_credit.csv +0 -0
  269. {validmind-2.5.8 → validmind-2.5.18}/validmind/datasets/classification/taiwan_credit.py +0 -0
  270. {validmind-2.5.8 → validmind-2.5.18}/validmind/datasets/cluster/digits.py +0 -0
  271. {validmind-2.5.8 → validmind-2.5.18}/validmind/datasets/credit_risk/datasets/lending_club_loan_data_2007_2014_clean.csv.gz +0 -0
  272. {validmind-2.5.8 → validmind-2.5.18}/validmind/datasets/credit_risk/lending_club.py +0 -0
  273. {validmind-2.5.8 → validmind-2.5.18}/validmind/datasets/llm/rag/__init__.py +0 -0
  274. {validmind-2.5.8 → validmind-2.5.18}/validmind/datasets/llm/rag/datasets/rfp_existing_questions_client_1.csv +0 -0
  275. {validmind-2.5.8 → validmind-2.5.18}/validmind/datasets/llm/rag/datasets/rfp_existing_questions_client_2.csv +0 -0
  276. {validmind-2.5.8 → validmind-2.5.18}/validmind/datasets/llm/rag/datasets/rfp_existing_questions_client_3.csv +0 -0
  277. {validmind-2.5.8 → validmind-2.5.18}/validmind/datasets/llm/rag/datasets/rfp_existing_questions_client_4.csv +0 -0
  278. {validmind-2.5.8 → validmind-2.5.18}/validmind/datasets/llm/rag/datasets/rfp_existing_questions_client_5.csv +0 -0
  279. {validmind-2.5.8 → validmind-2.5.18}/validmind/datasets/llm/rag/rfp.py +0 -0
  280. {validmind-2.5.8 → validmind-2.5.18}/validmind/datasets/nlp/__init__.py +0 -0
  281. {validmind-2.5.8 → validmind-2.5.18}/validmind/datasets/nlp/cnn_dailymail.py +0 -0
  282. {validmind-2.5.8 → validmind-2.5.18}/validmind/datasets/nlp/datasets/Covid_19.csv +0 -0
  283. {validmind-2.5.8 → validmind-2.5.18}/validmind/datasets/nlp/datasets/cnn_dailymail_100_with_predictions.csv +0 -0
  284. {validmind-2.5.8 → validmind-2.5.18}/validmind/datasets/nlp/datasets/cnn_dailymail_500_with_predictions.csv +0 -0
  285. {validmind-2.5.8 → validmind-2.5.18}/validmind/datasets/nlp/datasets/sentiments_with_predictions.csv +0 -0
  286. {validmind-2.5.8 → validmind-2.5.18}/validmind/datasets/nlp/twitter_covid_19.py +0 -0
  287. {validmind-2.5.8 → validmind-2.5.18}/validmind/datasets/regression/__init__.py +0 -0
  288. {validmind-2.5.8 → validmind-2.5.18}/validmind/datasets/regression/california_housing.py +0 -0
  289. {validmind-2.5.8 → validmind-2.5.18}/validmind/datasets/regression/datasets/fred/CPIAUCSL.csv +0 -0
  290. {validmind-2.5.8 → validmind-2.5.18}/validmind/datasets/regression/datasets/fred/CSUSHPISA.csv +0 -0
  291. {validmind-2.5.8 → validmind-2.5.18}/validmind/datasets/regression/datasets/fred/DRSFRMACBS.csv +0 -0
  292. {validmind-2.5.8 → validmind-2.5.18}/validmind/datasets/regression/datasets/fred/FEDFUNDS.csv +0 -0
  293. {validmind-2.5.8 → validmind-2.5.18}/validmind/datasets/regression/datasets/fred/GDP.csv +0 -0
  294. {validmind-2.5.8 → validmind-2.5.18}/validmind/datasets/regression/datasets/fred/GDPC1.csv +0 -0
  295. {validmind-2.5.8 → validmind-2.5.18}/validmind/datasets/regression/datasets/fred/GS10.csv +0 -0
  296. {validmind-2.5.8 → validmind-2.5.18}/validmind/datasets/regression/datasets/fred/GS3.csv +0 -0
  297. {validmind-2.5.8 → validmind-2.5.18}/validmind/datasets/regression/datasets/fred/GS5.csv +0 -0
  298. {validmind-2.5.8 → validmind-2.5.18}/validmind/datasets/regression/datasets/fred/MORTGAGE30US.csv +0 -0
  299. {validmind-2.5.8 → validmind-2.5.18}/validmind/datasets/regression/datasets/fred/UNRATE.csv +0 -0
  300. {validmind-2.5.8 → validmind-2.5.18}/validmind/datasets/regression/datasets/fred_loan_rates.csv +0 -0
  301. {validmind-2.5.8 → validmind-2.5.18}/validmind/datasets/regression/datasets/fred_loan_rates_test_1.csv +0 -0
  302. {validmind-2.5.8 → validmind-2.5.18}/validmind/datasets/regression/datasets/fred_loan_rates_test_2.csv +0 -0
  303. {validmind-2.5.8 → validmind-2.5.18}/validmind/datasets/regression/datasets/fred_loan_rates_test_3.csv +0 -0
  304. {validmind-2.5.8 → validmind-2.5.18}/validmind/datasets/regression/datasets/fred_loan_rates_test_4.csv +0 -0
  305. {validmind-2.5.8 → validmind-2.5.18}/validmind/datasets/regression/datasets/fred_loan_rates_test_5.csv +0 -0
  306. {validmind-2.5.8 → validmind-2.5.18}/validmind/datasets/regression/datasets/leanding_club_loan_rates.csv +0 -0
  307. {validmind-2.5.8 → validmind-2.5.18}/validmind/datasets/regression/fred.py +0 -0
  308. {validmind-2.5.8 → validmind-2.5.18}/validmind/datasets/regression/lending_club.py +0 -0
  309. {validmind-2.5.8 → validmind-2.5.18}/validmind/datasets/regression/models/fred_loan_rates_model_1.pkl +0 -0
  310. {validmind-2.5.8 → validmind-2.5.18}/validmind/datasets/regression/models/fred_loan_rates_model_2.pkl +0 -0
  311. {validmind-2.5.8 → validmind-2.5.18}/validmind/datasets/regression/models/fred_loan_rates_model_3.pkl +0 -0
  312. {validmind-2.5.8 → validmind-2.5.18}/validmind/datasets/regression/models/fred_loan_rates_model_4.pkl +0 -0
  313. {validmind-2.5.8 → validmind-2.5.18}/validmind/datasets/regression/models/fred_loan_rates_model_5.pkl +0 -0
  314. {validmind-2.5.8 → validmind-2.5.18}/validmind/errors.py +0 -0
  315. {validmind-2.5.8 → validmind-2.5.18}/validmind/html_templates/__init__.py +0 -0
  316. {validmind-2.5.8 → validmind-2.5.18}/validmind/html_templates/content_blocks.py +0 -0
  317. {validmind-2.5.8 → validmind-2.5.18}/validmind/input_registry.py +0 -0
  318. {validmind-2.5.8 → validmind-2.5.18}/validmind/logging.py +0 -0
  319. {validmind-2.5.8 → validmind-2.5.18}/validmind/models/__init__.py +0 -0
  320. {validmind-2.5.8 → validmind-2.5.18}/validmind/models/foundation.py +0 -0
  321. {validmind-2.5.8 → validmind-2.5.18}/validmind/models/function.py +0 -0
  322. {validmind-2.5.8 → validmind-2.5.18}/validmind/models/huggingface.py +0 -0
  323. {validmind-2.5.8 → validmind-2.5.18}/validmind/models/metadata.py +0 -0
  324. {validmind-2.5.8 → validmind-2.5.18}/validmind/models/pipeline.py +0 -0
  325. {validmind-2.5.8 → validmind-2.5.18}/validmind/models/pytorch.py +0 -0
  326. {validmind-2.5.8 → validmind-2.5.18}/validmind/models/r_model.py +0 -0
  327. {validmind-2.5.8 → validmind-2.5.18}/validmind/models/sklearn.py +0 -0
  328. {validmind-2.5.8 → validmind-2.5.18}/validmind/test_suites/classifier.py +0 -0
  329. {validmind-2.5.8 → validmind-2.5.18}/validmind/test_suites/cluster.py +0 -0
  330. {validmind-2.5.8 → validmind-2.5.18}/validmind/test_suites/embeddings.py +0 -0
  331. {validmind-2.5.8 → validmind-2.5.18}/validmind/test_suites/llm.py +0 -0
  332. {validmind-2.5.8 → validmind-2.5.18}/validmind/test_suites/nlp.py +0 -0
  333. {validmind-2.5.8 → validmind-2.5.18}/validmind/test_suites/parameters_optimization.py +0 -0
  334. {validmind-2.5.8 → validmind-2.5.18}/validmind/test_suites/regression.py +0 -0
  335. {validmind-2.5.8 → validmind-2.5.18}/validmind/test_suites/tabular_datasets.py +0 -0
  336. {validmind-2.5.8 → validmind-2.5.18}/validmind/test_suites/text_data.py +0 -0
  337. {validmind-2.5.8 → validmind-2.5.18}/validmind/tests/__init__.py +0 -0
  338. {validmind-2.5.8 → validmind-2.5.18}/validmind/tests/_store.py +0 -0
  339. {validmind-2.5.8 → validmind-2.5.18}/validmind/tests/data_validation/__init__.py +0 -0
  340. {validmind-2.5.8 → validmind-2.5.18}/validmind/tests/data_validation/nlp/__init__.py +0 -0
  341. {validmind-2.5.8 → validmind-2.5.18}/validmind/tests/load.py +0 -0
  342. {validmind-2.5.8 → validmind-2.5.18}/validmind/tests/metadata.py +0 -0
  343. {validmind-2.5.8 → validmind-2.5.18}/validmind/tests/model_validation/__init__.py +0 -0
  344. {validmind-2.5.8 → validmind-2.5.18}/validmind/tests/model_validation/sklearn/__init__.py +0 -0
  345. {validmind-2.5.8 → validmind-2.5.18}/validmind/tests/model_validation/statsmodels/__init__.py +0 -0
  346. {validmind-2.5.8 → validmind-2.5.18}/validmind/tests/model_validation/statsmodels/statsutils.py +0 -0
  347. {validmind-2.5.8 → validmind-2.5.18}/validmind/tests/prompt_validation/__init__.py +0 -0
  348. {validmind-2.5.8 → validmind-2.5.18}/validmind/tests/test_providers.py +0 -0
  349. {validmind-2.5.8 → validmind-2.5.18}/validmind/tests/utils.py +0 -0
  350. {validmind-2.5.8 → validmind-2.5.18}/validmind/unit_metrics/composite.py +0 -0
  351. {validmind-2.5.8 → validmind-2.5.18}/validmind/vm_models/__init__.py +0 -0
  352. {validmind-2.5.8 → validmind-2.5.18}/validmind/vm_models/dataset/__init__.py +0 -0
  353. {validmind-2.5.8 → validmind-2.5.18}/validmind/vm_models/dataset/utils.py +0 -0
  354. {validmind-2.5.8 → validmind-2.5.18}/validmind/vm_models/input.py +0 -0
  355. {validmind-2.5.8 → validmind-2.5.18}/validmind/vm_models/model.py +0 -0
  356. {validmind-2.5.8 → validmind-2.5.18}/validmind/vm_models/test/metric_result.py +0 -0
  357. {validmind-2.5.8 → validmind-2.5.18}/validmind/vm_models/test/output_template.py +0 -0
  358. {validmind-2.5.8 → validmind-2.5.18}/validmind/vm_models/test/result_summary.py +0 -0
  359. {validmind-2.5.8 → validmind-2.5.18}/validmind/vm_models/test/test.py +0 -0
  360. {validmind-2.5.8 → validmind-2.5.18}/validmind/vm_models/test/threshold_test_result.py +0 -0
  361. {validmind-2.5.8 → validmind-2.5.18}/validmind/vm_models/test_context.py +0 -0
  362. {validmind-2.5.8 → validmind-2.5.18}/validmind/vm_models/test_suite/runner.py +0 -0
  363. {validmind-2.5.8 → validmind-2.5.18}/validmind/vm_models/test_suite/summary.py +0 -0
  364. {validmind-2.5.8 → validmind-2.5.18}/validmind/vm_models/test_suite/test.py +0 -0
  365. {validmind-2.5.8 → validmind-2.5.18}/validmind/vm_models/test_suite/test_suite.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: validmind
3
- Version: 2.5.8
3
+ Version: 2.5.18
4
4
  Summary: ValidMind Developer Framework
5
5
  License: Commercial License
6
6
  Author: Andres Rodriguez
@@ -20,6 +20,7 @@ Requires-Dist: aiohttp[speedups]
20
20
  Requires-Dist: arch
21
21
  Requires-Dist: bert-score (>=0.3.13)
22
22
  Requires-Dist: catboost
23
+ Requires-Dist: datasets (>=2.10.0,<3.0.0)
23
24
  Requires-Dist: evaluate
24
25
  Requires-Dist: ipywidgets
25
26
  Requires-Dist: kaleido (>=0.2.1,!=0.2.1.post1)
@@ -34,13 +35,13 @@ Requires-Dist: nltk (>=3.8.1,<4.0.0)
34
35
  Requires-Dist: numba (<0.59.0)
35
36
  Requires-Dist: numpy
36
37
  Requires-Dist: openai (>=1)
37
- Requires-Dist: pandas (>=1.1,<2)
38
+ Requires-Dist: pandas (>=1.1,<=2.0.3)
38
39
  Requires-Dist: plotly
39
40
  Requires-Dist: plotly-express
40
41
  Requires-Dist: polars
41
42
  Requires-Dist: pycocoevalcap (>=1.2,<2.0) ; extra == "all" or extra == "llm"
42
43
  Requires-Dist: python-dotenv
43
- Requires-Dist: ragas (>=0.1.7) ; extra == "all" or extra == "llm"
44
+ Requires-Dist: ragas (>=0.1.19) ; extra == "all" or extra == "llm"
44
45
  Requires-Dist: rouge (>=1)
45
46
  Requires-Dist: rpy2 (>=3.5.10,<4.0.0) ; extra == "all" or extra == "r-support"
46
47
  Requires-Dist: scikit-learn
@@ -10,15 +10,15 @@ description = "ValidMind Developer Framework"
10
10
  license = "Commercial License"
11
11
  name = "validmind"
12
12
  readme = "README.pypi.md"
13
- version = "2.5.8"
13
+ version = "2.5.18"
14
14
 
15
15
  [tool.poetry.dependencies]
16
- python = ">=3.8.1,<3.12"
17
16
  aiohttp = {extras = ["speedups"], version = "*"}
18
17
  arch = "*"
19
18
  bert-score = ">=0.3.13"
20
19
  catboost = "*"
21
20
  evaluate = "*"
21
+ datasets = "^2.10.0"
22
22
  ipywidgets = "*"
23
23
  kaleido = ">=0.2.1,!=0.2.1.post1"
24
24
  langchain-openai = {version = ">=0.1.8", optional = true}
@@ -32,13 +32,14 @@ nltk = "^3.8.1"
32
32
  numba = "<0.59.0" # TODO: https://github.com/validmind/developer-framework/pull/28
33
33
  numpy = "*"
34
34
  openai = ">=1"
35
- pandas = ">=1.1,<2" # TODO: small issues blocking 2.0 but it would speed things up to use arrow backend
35
+ pandas = ">=1.1,<=2.0.3"
36
36
  plotly = "*"
37
37
  plotly-express = "*"
38
38
  polars = "*"
39
39
  pycocoevalcap = {version = "^1.2", optional = true}
40
+ python = ">=3.8.1,<3.12"
40
41
  python-dotenv = "*"
41
- ragas = {version = ">=0.1.7", optional = true}
42
+ ragas = {version = ">=0.1.19", optional = true}
42
43
  rouge = ">=1"
43
44
  rpy2 = {version = "^3.5.10", optional = true}
44
45
  scikit-learn = "*"
@@ -0,0 +1 @@
1
+ __version__ = "2.5.18"
@@ -0,0 +1,240 @@
1
+ # Copyright © 2023-2024 ValidMind Inc. All rights reserved.
2
+ # See the LICENSE file in the root of this repository for details.
3
+ # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
+
5
+ import os
6
+ import re
7
+ from concurrent.futures import ThreadPoolExecutor
8
+ from typing import Union
9
+
10
+ from jinja2 import Template
11
+
12
+ from validmind.utils import md_to_html
13
+
14
+ from ..client_config import client_config
15
+ from ..logging import get_logger
16
+
17
+ __executor = ThreadPoolExecutor()
18
+ __prompt = None
19
+
20
+ logger = get_logger(__name__)
21
+
22
+
23
+ AI_REVISION_NAME = "Generated by ValidMind AI"
24
+ DEFAULT_REVISION_NAME = "Default Description"
25
+
26
+
27
+ def _load_prompt():
28
+ global __prompt
29
+
30
+ if not __prompt:
31
+ folder_path = os.path.join(os.path.dirname(__file__), "test_result_description")
32
+ with open(os.path.join(folder_path, "system.jinja"), "r") as f:
33
+ system_prompt = f.read()
34
+ with open(os.path.join(folder_path, "user.jinja"), "r") as f:
35
+ user_prompt = f.read()
36
+
37
+ __prompt = (Template(system_prompt), Template(user_prompt))
38
+
39
+ return __prompt
40
+
41
+
42
+ def prompt_to_message(role, prompt):
43
+ if "[[IMAGE:" not in prompt:
44
+ return {"role": role, "content": prompt}
45
+
46
+ content = []
47
+
48
+ # Regex pattern to find [[IMAGE:<b64-data>]] markers
49
+ pattern = re.compile(r"\[\[IMAGE:(.*?)\]\]", re.DOTALL)
50
+
51
+ last_index = 0
52
+ for match in pattern.finditer(prompt):
53
+ # Text before the image marker
54
+ start, end = match.span()
55
+ if start > last_index:
56
+ content.append({"type": "text", "text": prompt[last_index:start]})
57
+
58
+ # Image
59
+ content.append({"type": "image_url", "image_url": {"url": match.group(1)}})
60
+
61
+ last_index = end
62
+
63
+ # Text after the last image
64
+ if last_index < len(prompt):
65
+ content.append({"type": "text", "text": prompt[last_index:]})
66
+
67
+ return {"role": role, "content": content}
68
+
69
+
70
+ class DescriptionFuture:
71
+ """This will be immediately returned from generate_description so that
72
+ the tests can continue to be run in parallel while the description is
73
+ retrieved asynchronously.
74
+
75
+ The value will be retrieved later and if its not ready yet, it should
76
+ block until it is.
77
+ """
78
+
79
+ def __init__(self, future):
80
+ self._future = future
81
+
82
+ def get_description(self):
83
+ if isinstance(self._future, str):
84
+ description = self._future
85
+ else:
86
+ # This will block until the future is completed
87
+ description = self._future.result()
88
+
89
+ return md_to_html(description, mathml=True)
90
+
91
+
92
+ def generate_description(
93
+ test_id: str,
94
+ test_description: str,
95
+ test_summary: str,
96
+ metric: Union[float, int] = None,
97
+ figures: list = None,
98
+ ):
99
+ """Generate the description for the test results"""
100
+ if not test_summary and not figures and not metric:
101
+ raise ValueError(
102
+ "No summary, unit metric or figures provided - cannot generate description"
103
+ )
104
+
105
+ # TODO: fix circular import
106
+ from validmind.ai.utils import get_client_and_model
107
+
108
+ client, model = get_client_and_model()
109
+
110
+ # get last part of test id
111
+ test_name = test_id.split(".")[-1]
112
+ # truncate the test description to save time
113
+ test_description = (
114
+ f"{test_description[:500]}..."
115
+ if len(test_description) > 500
116
+ else test_description
117
+ )
118
+
119
+ if metric:
120
+ metric_summary = f"**Metric Value**: {metric}"
121
+ if test_summary:
122
+ test_summary = metric_summary + "\n" + test_summary
123
+ else:
124
+ test_summary = metric_summary
125
+
126
+ figures = [] if test_summary else figures
127
+
128
+ input_data = {
129
+ "test_name": test_name,
130
+ "test_description": test_description,
131
+ "summary": test_summary,
132
+ "figures": [figure._get_b64_url() for figure in figures],
133
+ }
134
+ system, user = _load_prompt()
135
+
136
+ response = client.chat.completions.create(
137
+ model=model,
138
+ temperature=0.0,
139
+ messages=[
140
+ prompt_to_message("system", system.render(input_data)),
141
+ prompt_to_message("user", user.render(input_data)),
142
+ ],
143
+ )
144
+
145
+ return response.choices[0].message.content
146
+
147
+
148
+ def background_generate_description(
149
+ test_id: str,
150
+ test_description: str,
151
+ test_summary: str,
152
+ figures: list = None,
153
+ metric: Union[int, float] = None,
154
+ ):
155
+ def wrapped():
156
+ try:
157
+ return generate_description(
158
+ test_id=test_id,
159
+ test_description=test_description,
160
+ test_summary=test_summary,
161
+ figures=figures,
162
+ metric=metric,
163
+ )
164
+ except Exception as e:
165
+ logger.error(f"Failed to generate description: {e}")
166
+
167
+ return test_description
168
+
169
+ return DescriptionFuture(__executor.submit(wrapped))
170
+
171
+
172
+ def get_description_metadata(
173
+ test_id,
174
+ default_description,
175
+ summary=None,
176
+ figures=None,
177
+ metric=None,
178
+ prefix="metric_description",
179
+ should_generate=True,
180
+ ):
181
+ """Get Metadata Dictionary for a Test or Metric Result
182
+
183
+ Generates an LLM interpretation of the test results or uses the default
184
+ description and returns a metadata object that can be logged with the test results.
185
+
186
+ By default, the description is generated by an LLM that will interpret the test
187
+ results and provide a human-readable description. If the summary or figures are
188
+ not provided, or the `VALIDMIND_LLM_DESCRIPTIONS_ENABLED` environment variable is
189
+ set to `0` or `false` or no LLM has been configured, the default description will
190
+ be used as the test result description.
191
+
192
+ Note: Either the summary or figures must be provided to generate the description.
193
+
194
+ Args:
195
+ test_id (str): The test ID
196
+ default_description (str): The default description for the test
197
+ summary (Any): The test summary or results to interpret
198
+ figures (List[Figure]): The figures to attach to the test suite result
199
+ metric (Union[int, float]): Unit metrics attached to the test result
200
+ prefix (str): The prefix to use for the content ID (Default: "metric_description")
201
+ should_generate (bool): Whether to generate the description or not (Default: True)
202
+
203
+ Returns:
204
+ dict: The metadata object to be logged with the test results
205
+ """
206
+ # Check the feature flag first, then the environment variable
207
+ llm_descriptions_enabled = (
208
+ client_config.can_generate_llm_test_descriptions()
209
+ and os.getenv("VALIDMIND_LLM_DESCRIPTIONS_ENABLED", "1") not in ["0", "false"]
210
+ )
211
+
212
+ # TODO: fix circular import
213
+ from validmind.ai.utils import is_configured
214
+
215
+ if (
216
+ should_generate
217
+ and (summary or figures)
218
+ and llm_descriptions_enabled
219
+ and is_configured()
220
+ ):
221
+ revision_name = AI_REVISION_NAME
222
+
223
+ # get description future and set it as the description in the metadata
224
+ # this will lazily retrieved so it can run in the background in parallel
225
+ description = background_generate_description(
226
+ test_id=test_id,
227
+ test_description=default_description,
228
+ test_summary=summary,
229
+ figures=figures,
230
+ metric=metric,
231
+ )
232
+
233
+ else:
234
+ revision_name = DEFAULT_REVISION_NAME
235
+ description = md_to_html(default_description, mathml=True)
236
+
237
+ return {
238
+ "content_id": f"{prefix}:{test_id}::{revision_name}",
239
+ "text": description,
240
+ }
@@ -0,0 +1,29 @@
1
+ id: test_result_description
2
+ name: Test Result Description
3
+ description: Generate a description for a test result
4
+ version: 0.1.0
5
+ model: gpt-4o
6
+ temperature: 0.0
7
+ output_type: markdown
8
+ prompts:
9
+ system:
10
+ role: system
11
+ path: system.jinja
12
+ user:
13
+ role: user
14
+ path: user.jinja
15
+ inputs:
16
+ test_name:
17
+ description: The name of the test that produced the result (usually the last part of the test ID)
18
+ type: string
19
+ test_description:
20
+ description: The description (docstring) of the test that was run
21
+ type: string
22
+ summary:
23
+ description: The json result summary (i.e. the table(s) returned by the test)
24
+ type: list
25
+ optional: true
26
+ figures:
27
+ description: A list of base64 encoded images of the figures returned by the test
28
+ type: list
29
+ optional: true
@@ -0,0 +1,73 @@
1
+ # Copyright © 2023-2024 ValidMind Inc. All rights reserved.
2
+ # See the LICENSE file in the root of this repository for details.
3
+ # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
+
5
+ import multiprocessing
6
+
7
+ MIN_IMAGES_FOR_PARALLEL = 4
8
+ MAX_WORKERS = multiprocessing.cpu_count()
9
+
10
+
11
+ def parallel_downsample_images(base64_strings):
12
+ import os
13
+ import sys
14
+
15
+ sys.path.insert(0, os.path.dirname(os.path.dirname(__file__)))
16
+ from test_result_description.image_processing import (
17
+ downsample_image, # type: ignore
18
+ )
19
+
20
+ num_images = len(base64_strings)
21
+
22
+ if num_images < MIN_IMAGES_FOR_PARALLEL:
23
+ return [downsample_image(img) for img in base64_strings]
24
+
25
+ num_workers = min(num_images, MAX_WORKERS)
26
+
27
+ with multiprocessing.Pool(processes=num_workers) as pool:
28
+ results = pool.map(downsample_image, base64_strings)
29
+
30
+ sys.path.pop(0)
31
+
32
+ return results
33
+
34
+
35
+ class Context:
36
+ def __init__(self, mode="local"):
37
+ pass
38
+
39
+ def load(self, input_data):
40
+ # this task can accept a dict or a test result object from the dev framework
41
+ if isinstance(input_data, dict):
42
+ return input_data
43
+
44
+ # we are likely running outside of the dev framework and need to convert
45
+ # the test result object to a dictionary
46
+ test_result = input_data
47
+
48
+ try:
49
+ from markdownify import markdownify as md
50
+ except ImportError as e:
51
+ raise ImportError(
52
+ "Failed to import markdownify. Please install the package to use this task."
53
+ ) from e
54
+
55
+ input_data = {
56
+ "test_name": test_result.result_id.split(".")[-1],
57
+ "test_description": md(test_result.result_metadata[0]["text"]),
58
+ }
59
+
60
+ if hasattr(test_result, "metric") and test_result.metric.summary is not None:
61
+ input_data["summary"] = test_result.metric.summary.serialize()
62
+ elif (
63
+ hasattr(test_result, "test_results")
64
+ and test_result.test_results.summary is not None
65
+ ):
66
+ input_data["summary"] = test_result.test_results.summary.serialize()
67
+
68
+ if test_result.figures:
69
+ input_data["figures"] = parallel_downsample_images(
70
+ [figure._get_b64_url() for figure in test_result.figures]
71
+ )
72
+
73
+ return input_data
@@ -0,0 +1,124 @@
1
+ # Copyright © 2023-2024 ValidMind Inc. All rights reserved.
2
+ # See the LICENSE file in the root of this repository for details.
3
+ # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
+
5
+ import base64
6
+ import io
7
+
8
+ import numpy as np
9
+ from PIL import Image, ImageEnhance, ImageFilter
10
+
11
+ DOWNSAMPLE_PERCENTAGE = 50
12
+
13
+
14
+ def open_base64_image(base64_string):
15
+ if base64_string.startswith("data:image/png;base64,"):
16
+ base64_string = base64_string.split(",")[1]
17
+
18
+ image_data = base64.b64decode(base64_string)
19
+ image_buffer = io.BytesIO(image_data)
20
+ image = Image.open(image_buffer)
21
+
22
+ return image
23
+
24
+
25
+ def downsample_image(base64_string):
26
+ image = open_base64_image(base64_string)
27
+
28
+ # Calculate the target dimensions based on the reduction percentage
29
+ target_width = int(image.width * (1 - DOWNSAMPLE_PERCENTAGE / 100))
30
+ target_height = int(image.height * (1 - DOWNSAMPLE_PERCENTAGE / 100))
31
+
32
+ # If the image is already smaller than the target size, return the original
33
+ if image.width <= target_width and image.height <= target_height:
34
+ return base64_string
35
+
36
+ # remove any margins from the image
37
+ # Find the bounding box of non-uniform pixels (margin detection)
38
+ width, height = image.size
39
+ background = image.getpixel((0, 0)) # Assume top-left pixel is background color
40
+
41
+ def is_different(pixel):
42
+ return pixel != background
43
+
44
+ left = next(
45
+ x
46
+ for x in range(width)
47
+ if any(is_different(image.getpixel((x, y))) for y in range(height))
48
+ )
49
+ right = next(
50
+ x
51
+ for x in range(width - 1, -1, -1)
52
+ if any(is_different(image.getpixel((x, y))) for y in range(height))
53
+ )
54
+ top = next(
55
+ y
56
+ for y in range(height)
57
+ if any(is_different(image.getpixel((x, y))) for x in range(width))
58
+ )
59
+ bottom = next(
60
+ y
61
+ for y in range(height - 1, -1, -1)
62
+ if any(is_different(image.getpixel((x, y))) for x in range(width))
63
+ )
64
+
65
+ # Crop the image to remove the uniform margin (with some padding)
66
+ bbox = (left - 5, top - 5, right + 6, bottom + 6)
67
+ image = image.crop(bbox)
68
+
69
+ # If the image has an alpha channel, remove any transparent margins
70
+ if image.mode in ("RGBA", "LA"):
71
+ alpha = image.getchannel("A")
72
+ bbox = alpha.getbbox()
73
+ if bbox:
74
+ image = image.crop(bbox)
75
+
76
+ # Apply unsharp mask to enhance edges
77
+ image = image.filter(ImageFilter.UnsharpMask(radius=2, percent=150, threshold=3))
78
+
79
+ # Calculate new dimensions
80
+ aspect_ratio = image.width / image.height
81
+ new_height = target_height
82
+ new_width = int(new_height * aspect_ratio)
83
+
84
+ # print(f"downsampling from {width}x{height} to {new_width}x{new_height}")
85
+
86
+ # Ensure we don't exceed the target width
87
+ if new_width > target_width:
88
+ new_width = target_width
89
+ new_height = int(new_width / aspect_ratio)
90
+
91
+ # print(f"downsampling from {image.width}x{image.height} to {new_width}x{new_height}")
92
+
93
+ # Convert to numpy array for custom downsampling
94
+ img_array = np.array(image)
95
+
96
+ # Optimized area interpolation
97
+ h_factor = img_array.shape[0] / new_height
98
+ w_factor = img_array.shape[1] / new_width
99
+
100
+ h_indices = (np.arange(new_height).reshape(-1, 1) * h_factor).astype(int)
101
+ w_indices = (np.arange(new_width).reshape(1, -1) * w_factor).astype(int)
102
+
103
+ h_indices = np.minimum(h_indices, img_array.shape[0] - 1)
104
+ w_indices = np.minimum(w_indices, img_array.shape[1] - 1)
105
+
106
+ # Convert back to PIL Image
107
+ image = Image.fromarray(img_array[h_indices, w_indices].astype(np.uint8))
108
+
109
+ # Enhance contrast slightly
110
+ enhancer = ImageEnhance.Contrast(image)
111
+ image = enhancer.enhance(1.2)
112
+
113
+ # Sharpen the image
114
+ image = image.filter(ImageFilter.SHARPEN)
115
+
116
+ # Convert the image to bytes in PNG format
117
+ buffered = io.BytesIO()
118
+ image.save(buffered, format="PNG")
119
+ img_bytes = buffered.getvalue()
120
+
121
+ # Encode the bytes to base64
122
+ b64_encoded = base64.b64encode(img_bytes).decode("utf-8")
123
+
124
+ return f"data:image/png;base64,{b64_encoded}"
@@ -0,0 +1,39 @@
1
+ You are an expert data scientist and MRM specialist.
2
+ You are tasked with analyzing the results of a quantitative test run on some model or dataset.
3
+ Your goal is to create a test description that will act as part of the model documentation.
4
+ You will provide both the developer and other consumers of the documentation with a clear and concise "interpretation" of the results they will see.
5
+ The overarching theme to maintain is MRM documentation.
6
+
7
+ Examine the provided statistical test results and compose a description of the results.
8
+ The results are either in the form of serialized tables or images of plots.
9
+ Compose a description and interpretation of the result to accompany it in MRM documentation.
10
+ It will be read by other data scientists and developers and by validators and stakeholders.
11
+
12
+ Use valid Markdown syntax to format the response.
13
+ Avoid long sentences and complex vocabulary.
14
+ Avoid overly verbose explanations - the goal is to explain to a user what they are seeing in the results.
15
+ Structure the response clearly and logically.
16
+ Respond only with your analysis and insights, not the verbatim test results.
17
+ Respond only with the markdown content, no explanation or context for your response is necessary.
18
+ Use the Test ID that is provided to form the Test Name e.g. "ClassImbalance" -> "Class Imbalance".
19
+
20
+ Explain the test, its purpose, its mechanism/formula etc and why it is useful.
21
+ If relevant, provide a very brief description of the way this test is used in model/dataset evaluation and how it is interpreted.
22
+ Highlight the key insights from the test results. The key insights should be concise and easily understood.
23
+ An insight should only be included if it is something not entirely obvious from the test results.
24
+ End the response with any closing remarks, summary or additional useful information.
25
+
26
+ Use the following format for the response (feel free to stray from it if necessary - this is a suggested starting point):
27
+
28
+ <ResponseFormat>
29
+ **<Test Name>** calculates the xyz <continue to explain what it does in detail>...
30
+
31
+ This test is useful for <explain why and for what this test is useful>...
32
+
33
+ **Key Insights:**
34
+
35
+ The following key insights can be identified in the test results:
36
+
37
+ - **<key insight 1 - title>**: <concise explanation of key insight 1>
38
+ - ...<continue with any other key insights using the same format>
39
+ </ResponseFormat>
@@ -0,0 +1,25 @@
1
+ **Test ID**: `{{ test_name }}`
2
+
3
+ **Test Description**:
4
+
5
+ {{ test_description }}
6
+
7
+ ---
8
+
9
+ Generate a description of the following result of the test using the instructions given in your system prompt.
10
+
11
+ {%- if summary %}
12
+ **Test Result Tables** *(Raw Data)*:
13
+ {{ summary }}
14
+ {%- endif %}
15
+
16
+ {%- if figures %}
17
+ The following images make up the results of the test.
18
+ {%- for b64_image_url in figures %}
19
+ [[IMAGE:{{ b64_image_url }}]]
20
+ {%- endfor %}
21
+ {%- endif %}
22
+
23
+ Keep your response concise and to the point!
24
+ Only include content in your response if its something truly insightful or interesting!
25
+ DO NOT VERBOSELY EXPLAIN THE TEST OR THE RESULTS!!!