validmind 2.2.5__tar.gz → 2.3.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (308) hide show
  1. {validmind-2.2.5 → validmind-2.3.1}/PKG-INFO +1 -1
  2. {validmind-2.2.5 → validmind-2.3.1}/pyproject.toml +1 -1
  3. validmind-2.3.1/validmind/__version__.py +1 -0
  4. validmind-2.2.5/validmind/ai.py → validmind-2.3.1/validmind/ai/test_descriptions.py +127 -69
  5. validmind-2.3.1/validmind/ai/utils.py +104 -0
  6. {validmind-2.2.5 → validmind-2.3.1}/validmind/api_client.py +70 -31
  7. {validmind-2.2.5 → validmind-2.3.1}/validmind/client.py +5 -5
  8. {validmind-2.2.5 → validmind-2.3.1}/validmind/logging.py +38 -32
  9. {validmind-2.2.5 → validmind-2.3.1}/validmind/models/foundation.py +10 -6
  10. {validmind-2.2.5 → validmind-2.3.1}/validmind/models/function.py +3 -1
  11. {validmind-2.2.5 → validmind-2.3.1}/validmind/models/metadata.py +1 -1
  12. {validmind-2.2.5 → validmind-2.3.1}/validmind/test_suites/__init__.py +1 -7
  13. {validmind-2.2.5 → validmind-2.3.1}/validmind/test_suites/regression.py +0 -16
  14. {validmind-2.2.5 → validmind-2.3.1}/validmind/test_suites/statsmodels_timeseries.py +1 -1
  15. {validmind-2.2.5 → validmind-2.3.1}/validmind/tests/data_validation/ACFandPACFPlot.py +36 -27
  16. {validmind-2.2.5/validmind/tests/model_validation/statsmodels → validmind-2.3.1/validmind/tests/data_validation}/ADF.py +42 -13
  17. {validmind-2.2.5 → validmind-2.3.1}/validmind/tests/data_validation/BivariateScatterPlots.py +38 -41
  18. {validmind-2.2.5/validmind/tests/model_validation/statsmodels → validmind-2.3.1/validmind/tests/data_validation}/DFGLSArch.py +67 -11
  19. {validmind-2.2.5 → validmind-2.3.1}/validmind/tests/data_validation/HeatmapFeatureCorrelations.py +1 -1
  20. {validmind-2.2.5 → validmind-2.3.1}/validmind/tests/data_validation/HighPearsonCorrelation.py +12 -3
  21. {validmind-2.2.5 → validmind-2.3.1}/validmind/tests/data_validation/IsolationForestOutliers.py +2 -2
  22. {validmind-2.2.5/validmind/tests/model_validation/statsmodels → validmind-2.3.1/validmind/tests/data_validation}/KPSS.py +64 -11
  23. {validmind-2.2.5/validmind/tests/model_validation/statsmodels → validmind-2.3.1/validmind/tests/data_validation}/PhillipsPerronArch.py +65 -11
  24. {validmind-2.2.5 → validmind-2.3.1}/validmind/tests/data_validation/ScatterPlot.py +1 -1
  25. {validmind-2.2.5 → validmind-2.3.1}/validmind/tests/data_validation/SeasonalDecompose.py +12 -7
  26. {validmind-2.2.5 → validmind-2.3.1}/validmind/tests/data_validation/TabularDateTimeHistograms.py +29 -33
  27. {validmind-2.2.5 → validmind-2.3.1}/validmind/tests/data_validation/WOEBinPlots.py +1 -1
  28. {validmind-2.2.5 → validmind-2.3.1}/validmind/tests/data_validation/WOEBinTable.py +1 -1
  29. {validmind-2.2.5/validmind/tests/model_validation/statsmodels → validmind-2.3.1/validmind/tests/data_validation}/ZivotAndrewsArch.py +65 -11
  30. {validmind-2.2.5 → validmind-2.3.1}/validmind/tests/data_validation/nlp/CommonWords.py +1 -1
  31. {validmind-2.2.5 → validmind-2.3.1}/validmind/tests/data_validation/nlp/Hashtags.py +1 -1
  32. {validmind-2.2.5 → validmind-2.3.1}/validmind/tests/data_validation/nlp/Mentions.py +1 -1
  33. {validmind-2.2.5 → validmind-2.3.1}/validmind/tests/data_validation/nlp/PolarityAndSubjectivity.py +2 -1
  34. {validmind-2.2.5 → validmind-2.3.1}/validmind/tests/data_validation/nlp/Punctuations.py +1 -1
  35. {validmind-2.2.5 → validmind-2.3.1}/validmind/tests/data_validation/nlp/Sentiment.py +1 -1
  36. {validmind-2.2.5 → validmind-2.3.1}/validmind/tests/data_validation/nlp/TextDescription.py +5 -1
  37. {validmind-2.2.5 → validmind-2.3.1}/validmind/tests/data_validation/nlp/Toxicity.py +1 -1
  38. {validmind-2.2.5 → validmind-2.3.1}/validmind/tests/decorator.py +1 -1
  39. {validmind-2.2.5 → validmind-2.3.1}/validmind/tests/model_validation/FeaturesAUC.py +5 -3
  40. {validmind-2.2.5 → validmind-2.3.1}/validmind/tests/model_validation/embeddings/CosineSimilarityComparison.py +4 -0
  41. {validmind-2.2.5 → validmind-2.3.1}/validmind/tests/model_validation/embeddings/CosineSimilarityHeatmap.py +4 -0
  42. {validmind-2.2.5 → validmind-2.3.1}/validmind/tests/model_validation/embeddings/EuclideanDistanceComparison.py +4 -0
  43. {validmind-2.2.5 → validmind-2.3.1}/validmind/tests/model_validation/embeddings/EuclideanDistanceHeatmap.py +4 -0
  44. {validmind-2.2.5 → validmind-2.3.1}/validmind/tests/model_validation/embeddings/PCAComponentsPairwisePlots.py +4 -0
  45. {validmind-2.2.5 → validmind-2.3.1}/validmind/tests/model_validation/embeddings/TSNEComponentsPairwisePlots.py +4 -0
  46. {validmind-2.2.5 → validmind-2.3.1}/validmind/tests/model_validation/ragas/AnswerCorrectness.py +3 -3
  47. {validmind-2.2.5 → validmind-2.3.1}/validmind/tests/model_validation/ragas/AnswerRelevance.py +5 -4
  48. {validmind-2.2.5 → validmind-2.3.1}/validmind/tests/model_validation/ragas/AnswerSimilarity.py +5 -4
  49. {validmind-2.2.5 → validmind-2.3.1}/validmind/tests/model_validation/ragas/AspectCritique.py +14 -8
  50. {validmind-2.2.5 → validmind-2.3.1}/validmind/tests/model_validation/ragas/ContextEntityRecall.py +3 -4
  51. {validmind-2.2.5 → validmind-2.3.1}/validmind/tests/model_validation/ragas/ContextPrecision.py +4 -5
  52. {validmind-2.2.5 → validmind-2.3.1}/validmind/tests/model_validation/ragas/ContextRecall.py +3 -4
  53. {validmind-2.2.5 → validmind-2.3.1}/validmind/tests/model_validation/ragas/ContextRelevancy.py +5 -4
  54. {validmind-2.2.5 → validmind-2.3.1}/validmind/tests/model_validation/ragas/Faithfulness.py +6 -5
  55. {validmind-2.2.5 → validmind-2.3.1}/validmind/tests/model_validation/ragas/utils.py +35 -9
  56. {validmind-2.2.5 → validmind-2.3.1}/validmind/tests/model_validation/sklearn/ClusterPerformance.py +2 -2
  57. {validmind-2.2.5 → validmind-2.3.1}/validmind/tests/model_validation/sklearn/ClusterPerformanceMetrics.py +1 -1
  58. {validmind-2.2.5 → validmind-2.3.1}/validmind/tests/model_validation/sklearn/ModelsPerformanceComparison.py +6 -8
  59. {validmind-2.2.5 → validmind-2.3.1}/validmind/tests/model_validation/sklearn/RegressionErrors.py +1 -1
  60. {validmind-2.2.5 → validmind-2.3.1}/validmind/tests/model_validation/sklearn/RegressionModelsPerformanceComparison.py +14 -8
  61. {validmind-2.2.5 → validmind-2.3.1}/validmind/tests/model_validation/sklearn/RegressionR2Square.py +1 -1
  62. {validmind-2.2.5 → validmind-2.3.1}/validmind/tests/model_validation/statsmodels/DurbinWatsonTest.py +1 -1
  63. {validmind-2.2.5 → validmind-2.3.1}/validmind/tests/model_validation/statsmodels/GINITable.py +1 -1
  64. {validmind-2.2.5 → validmind-2.3.1}/validmind/tests/model_validation/statsmodels/JarqueBera.py +1 -1
  65. {validmind-2.2.5 → validmind-2.3.1}/validmind/tests/model_validation/statsmodels/KolmogorovSmirnov.py +1 -1
  66. {validmind-2.2.5 → validmind-2.3.1}/validmind/tests/model_validation/statsmodels/LJungBox.py +1 -1
  67. {validmind-2.2.5 → validmind-2.3.1}/validmind/tests/model_validation/statsmodels/Lilliefors.py +1 -1
  68. {validmind-2.2.5 → validmind-2.3.1}/validmind/tests/model_validation/statsmodels/RegressionCoeffsPlot.py +4 -0
  69. {validmind-2.2.5 → validmind-2.3.1}/validmind/tests/model_validation/statsmodels/RegressionFeatureSignificance.py +9 -4
  70. {validmind-2.2.5 → validmind-2.3.1}/validmind/tests/model_validation/statsmodels/RegressionModelsCoeffs.py +2 -2
  71. {validmind-2.2.5 → validmind-2.3.1}/validmind/tests/model_validation/statsmodels/RunsTest.py +1 -1
  72. {validmind-2.2.5 → validmind-2.3.1}/validmind/tests/model_validation/statsmodels/ShapiroWilk.py +1 -1
  73. {validmind-2.2.5 → validmind-2.3.1}/validmind/tests/prompt_validation/Bias.py +14 -11
  74. {validmind-2.2.5 → validmind-2.3.1}/validmind/tests/prompt_validation/Clarity.py +14 -11
  75. {validmind-2.2.5 → validmind-2.3.1}/validmind/tests/prompt_validation/Conciseness.py +14 -11
  76. {validmind-2.2.5 → validmind-2.3.1}/validmind/tests/prompt_validation/Delimitation.py +14 -11
  77. {validmind-2.2.5 → validmind-2.3.1}/validmind/tests/prompt_validation/NegativeInstruction.py +14 -11
  78. {validmind-2.2.5 → validmind-2.3.1}/validmind/tests/prompt_validation/Robustness.py +11 -11
  79. {validmind-2.2.5 → validmind-2.3.1}/validmind/tests/prompt_validation/Specificity.py +14 -11
  80. validmind-2.3.1/validmind/tests/prompt_validation/ai_powered_test.py +69 -0
  81. {validmind-2.2.5 → validmind-2.3.1}/validmind/unit_metrics/composite.py +2 -1
  82. {validmind-2.2.5 → validmind-2.3.1}/validmind/utils.py +4 -49
  83. {validmind-2.2.5 → validmind-2.3.1}/validmind/vm_models/dataset/dataset.py +17 -3
  84. {validmind-2.2.5 → validmind-2.3.1}/validmind/vm_models/dataset/utils.py +2 -2
  85. {validmind-2.2.5 → validmind-2.3.1}/validmind/vm_models/model.py +1 -1
  86. {validmind-2.2.5 → validmind-2.3.1}/validmind/vm_models/test/metric.py +1 -8
  87. {validmind-2.2.5 → validmind-2.3.1}/validmind/vm_models/test/result_wrapper.py +27 -34
  88. {validmind-2.2.5 → validmind-2.3.1}/validmind/vm_models/test/test.py +3 -0
  89. {validmind-2.2.5 → validmind-2.3.1}/validmind/vm_models/test/threshold_test.py +1 -1
  90. {validmind-2.2.5 → validmind-2.3.1}/validmind/vm_models/test_suite/runner.py +12 -6
  91. {validmind-2.2.5 → validmind-2.3.1}/validmind/vm_models/test_suite/summary.py +18 -7
  92. {validmind-2.2.5 → validmind-2.3.1}/validmind/vm_models/test_suite/test.py +13 -20
  93. validmind-2.2.5/validmind/__version__.py +0 -1
  94. validmind-2.2.5/validmind/tests/data_validation/DefaultRatesbyRiskBandPlot.py +0 -114
  95. validmind-2.2.5/validmind/tests/data_validation/PiTCreditScoresHistogram.py +0 -150
  96. validmind-2.2.5/validmind/tests/data_validation/PiTPDHistogram.py +0 -152
  97. validmind-2.2.5/validmind/tests/model_validation/statsmodels/ADFTest.py +0 -88
  98. validmind-2.2.5/validmind/tests/model_validation/statsmodels/FeatureImportanceAndSignificance.py +0 -198
  99. validmind-2.2.5/validmind/tests/model_validation/statsmodels/PDRatingClassPlot.py +0 -151
  100. validmind-2.2.5/validmind/tests/model_validation/statsmodels/RegressionModelInsampleComparison.py +0 -146
  101. validmind-2.2.5/validmind/tests/model_validation/statsmodels/RegressionModelOutsampleComparison.py +0 -144
  102. validmind-2.2.5/validmind/tests/model_validation/statsmodels/RegressionModelsPerformance.py +0 -127
  103. validmind-2.2.5/validmind/tests/model_validation/statsmodels/ResidualsVisualInspection.py +0 -130
  104. validmind-2.2.5/validmind/tests/prompt_validation/ai_powered_test.py +0 -91
  105. {validmind-2.2.5 → validmind-2.3.1}/LICENSE +0 -0
  106. {validmind-2.2.5 → validmind-2.3.1}/README.pypi.md +0 -0
  107. {validmind-2.2.5 → validmind-2.3.1}/validmind/__init__.py +0 -0
  108. {validmind-2.2.5 → validmind-2.3.1}/validmind/client_config.py +0 -0
  109. {validmind-2.2.5 → validmind-2.3.1}/validmind/datasets/__init__.py +0 -0
  110. {validmind-2.2.5 → validmind-2.3.1}/validmind/datasets/classification/__init__.py +0 -0
  111. {validmind-2.2.5 → validmind-2.3.1}/validmind/datasets/classification/customer_churn.py +0 -0
  112. {validmind-2.2.5 → validmind-2.3.1}/validmind/datasets/classification/datasets/bank_customer_churn.csv +0 -0
  113. {validmind-2.2.5 → validmind-2.3.1}/validmind/datasets/classification/datasets/taiwan_credit.csv +0 -0
  114. {validmind-2.2.5 → validmind-2.3.1}/validmind/datasets/classification/taiwan_credit.py +0 -0
  115. {validmind-2.2.5 → validmind-2.3.1}/validmind/datasets/cluster/digits.py +0 -0
  116. {validmind-2.2.5 → validmind-2.3.1}/validmind/datasets/credit_risk/__init__.py +0 -0
  117. {validmind-2.2.5 → validmind-2.3.1}/validmind/datasets/credit_risk/datasets/lending_club_loan_data_2007_2014_clean.csv.gz +0 -0
  118. {validmind-2.2.5 → validmind-2.3.1}/validmind/datasets/credit_risk/lending_club.py +0 -0
  119. {validmind-2.2.5 → validmind-2.3.1}/validmind/datasets/llm/rag/__init__.py +0 -0
  120. {validmind-2.2.5 → validmind-2.3.1}/validmind/datasets/llm/rag/datasets/rfp_existing_questions_client_1.csv +0 -0
  121. {validmind-2.2.5 → validmind-2.3.1}/validmind/datasets/llm/rag/datasets/rfp_existing_questions_client_2.csv +0 -0
  122. {validmind-2.2.5 → validmind-2.3.1}/validmind/datasets/llm/rag/datasets/rfp_existing_questions_client_3.csv +0 -0
  123. {validmind-2.2.5 → validmind-2.3.1}/validmind/datasets/llm/rag/datasets/rfp_existing_questions_client_4.csv +0 -0
  124. {validmind-2.2.5 → validmind-2.3.1}/validmind/datasets/llm/rag/datasets/rfp_existing_questions_client_5.csv +0 -0
  125. {validmind-2.2.5 → validmind-2.3.1}/validmind/datasets/llm/rag/rfp.py +0 -0
  126. {validmind-2.2.5 → validmind-2.3.1}/validmind/datasets/nlp/__init__.py +0 -0
  127. {validmind-2.2.5 → validmind-2.3.1}/validmind/datasets/nlp/cnn_dailymail.py +0 -0
  128. {validmind-2.2.5 → validmind-2.3.1}/validmind/datasets/nlp/datasets/Covid_19.csv +0 -0
  129. {validmind-2.2.5 → validmind-2.3.1}/validmind/datasets/nlp/datasets/cnn_dailymail_100_with_predictions.csv +0 -0
  130. {validmind-2.2.5 → validmind-2.3.1}/validmind/datasets/nlp/datasets/cnn_dailymail_500_with_predictions.csv +0 -0
  131. {validmind-2.2.5 → validmind-2.3.1}/validmind/datasets/nlp/datasets/sentiments_with_predictions.csv +0 -0
  132. {validmind-2.2.5 → validmind-2.3.1}/validmind/datasets/nlp/twitter_covid_19.py +0 -0
  133. {validmind-2.2.5 → validmind-2.3.1}/validmind/datasets/regression/__init__.py +0 -0
  134. {validmind-2.2.5 → validmind-2.3.1}/validmind/datasets/regression/california_housing.py +0 -0
  135. {validmind-2.2.5 → validmind-2.3.1}/validmind/datasets/regression/datasets/fred/CPIAUCSL.csv +0 -0
  136. {validmind-2.2.5 → validmind-2.3.1}/validmind/datasets/regression/datasets/fred/CSUSHPISA.csv +0 -0
  137. {validmind-2.2.5 → validmind-2.3.1}/validmind/datasets/regression/datasets/fred/DRSFRMACBS.csv +0 -0
  138. {validmind-2.2.5 → validmind-2.3.1}/validmind/datasets/regression/datasets/fred/FEDFUNDS.csv +0 -0
  139. {validmind-2.2.5 → validmind-2.3.1}/validmind/datasets/regression/datasets/fred/GDP.csv +0 -0
  140. {validmind-2.2.5 → validmind-2.3.1}/validmind/datasets/regression/datasets/fred/GDPC1.csv +0 -0
  141. {validmind-2.2.5 → validmind-2.3.1}/validmind/datasets/regression/datasets/fred/GS10.csv +0 -0
  142. {validmind-2.2.5 → validmind-2.3.1}/validmind/datasets/regression/datasets/fred/GS3.csv +0 -0
  143. {validmind-2.2.5 → validmind-2.3.1}/validmind/datasets/regression/datasets/fred/GS5.csv +0 -0
  144. {validmind-2.2.5 → validmind-2.3.1}/validmind/datasets/regression/datasets/fred/MORTGAGE30US.csv +0 -0
  145. {validmind-2.2.5 → validmind-2.3.1}/validmind/datasets/regression/datasets/fred/UNRATE.csv +0 -0
  146. {validmind-2.2.5 → validmind-2.3.1}/validmind/datasets/regression/datasets/fred_loan_rates.csv +0 -0
  147. {validmind-2.2.5 → validmind-2.3.1}/validmind/datasets/regression/datasets/fred_loan_rates_test_1.csv +0 -0
  148. {validmind-2.2.5 → validmind-2.3.1}/validmind/datasets/regression/datasets/fred_loan_rates_test_2.csv +0 -0
  149. {validmind-2.2.5 → validmind-2.3.1}/validmind/datasets/regression/datasets/fred_loan_rates_test_3.csv +0 -0
  150. {validmind-2.2.5 → validmind-2.3.1}/validmind/datasets/regression/datasets/fred_loan_rates_test_4.csv +0 -0
  151. {validmind-2.2.5 → validmind-2.3.1}/validmind/datasets/regression/datasets/fred_loan_rates_test_5.csv +0 -0
  152. {validmind-2.2.5 → validmind-2.3.1}/validmind/datasets/regression/datasets/lending_club_loan_rates.csv +0 -0
  153. {validmind-2.2.5 → validmind-2.3.1}/validmind/datasets/regression/fred.py +0 -0
  154. {validmind-2.2.5 → validmind-2.3.1}/validmind/datasets/regression/lending_club.py +0 -0
  155. {validmind-2.2.5 → validmind-2.3.1}/validmind/datasets/regression/models/fred_loan_rates_model_1.pkl +0 -0
  156. {validmind-2.2.5 → validmind-2.3.1}/validmind/datasets/regression/models/fred_loan_rates_model_2.pkl +0 -0
  157. {validmind-2.2.5 → validmind-2.3.1}/validmind/datasets/regression/models/fred_loan_rates_model_3.pkl +0 -0
  158. {validmind-2.2.5 → validmind-2.3.1}/validmind/datasets/regression/models/fred_loan_rates_model_4.pkl +0 -0
  159. {validmind-2.2.5 → validmind-2.3.1}/validmind/datasets/regression/models/fred_loan_rates_model_5.pkl +0 -0
  160. {validmind-2.2.5 → validmind-2.3.1}/validmind/errors.py +0 -0
  161. {validmind-2.2.5 → validmind-2.3.1}/validmind/html_templates/__init__.py +0 -0
  162. {validmind-2.2.5 → validmind-2.3.1}/validmind/html_templates/content_blocks.py +0 -0
  163. {validmind-2.2.5 → validmind-2.3.1}/validmind/input_registry.py +0 -0
  164. {validmind-2.2.5 → validmind-2.3.1}/validmind/models/__init__.py +0 -0
  165. {validmind-2.2.5 → validmind-2.3.1}/validmind/models/huggingface.py +0 -0
  166. {validmind-2.2.5 → validmind-2.3.1}/validmind/models/pipeline.py +0 -0
  167. {validmind-2.2.5 → validmind-2.3.1}/validmind/models/pytorch.py +0 -0
  168. {validmind-2.2.5 → validmind-2.3.1}/validmind/models/r_model.py +0 -0
  169. {validmind-2.2.5 → validmind-2.3.1}/validmind/models/sklearn.py +0 -0
  170. {validmind-2.2.5 → validmind-2.3.1}/validmind/template.py +0 -0
  171. {validmind-2.2.5 → validmind-2.3.1}/validmind/test_suites/classifier.py +0 -0
  172. {validmind-2.2.5 → validmind-2.3.1}/validmind/test_suites/cluster.py +0 -0
  173. {validmind-2.2.5 → validmind-2.3.1}/validmind/test_suites/embeddings.py +0 -0
  174. {validmind-2.2.5 → validmind-2.3.1}/validmind/test_suites/llm.py +0 -0
  175. {validmind-2.2.5 → validmind-2.3.1}/validmind/test_suites/nlp.py +0 -0
  176. {validmind-2.2.5 → validmind-2.3.1}/validmind/test_suites/parameters_optimization.py +0 -0
  177. {validmind-2.2.5 → validmind-2.3.1}/validmind/test_suites/summarization.py +0 -0
  178. {validmind-2.2.5 → validmind-2.3.1}/validmind/test_suites/tabular_datasets.py +0 -0
  179. {validmind-2.2.5 → validmind-2.3.1}/validmind/test_suites/text_data.py +0 -0
  180. {validmind-2.2.5 → validmind-2.3.1}/validmind/test_suites/time_series.py +0 -0
  181. {validmind-2.2.5 → validmind-2.3.1}/validmind/tests/__init__.py +0 -0
  182. {validmind-2.2.5 → validmind-2.3.1}/validmind/tests/data_validation/ANOVAOneWayTable.py +0 -0
  183. {validmind-2.2.5 → validmind-2.3.1}/validmind/tests/data_validation/AutoAR.py +0 -0
  184. {validmind-2.2.5 → validmind-2.3.1}/validmind/tests/data_validation/AutoMA.py +0 -0
  185. {validmind-2.2.5 → validmind-2.3.1}/validmind/tests/data_validation/AutoSeasonality.py +0 -0
  186. {validmind-2.2.5 → validmind-2.3.1}/validmind/tests/data_validation/AutoStationarity.py +0 -0
  187. {validmind-2.2.5 → validmind-2.3.1}/validmind/tests/data_validation/BivariateFeaturesBarPlots.py +0 -0
  188. {validmind-2.2.5 → validmind-2.3.1}/validmind/tests/data_validation/BivariateHistograms.py +0 -0
  189. {validmind-2.2.5 → validmind-2.3.1}/validmind/tests/data_validation/ChiSquaredFeaturesTable.py +0 -0
  190. {validmind-2.2.5 → validmind-2.3.1}/validmind/tests/data_validation/ClassImbalance.py +0 -0
  191. {validmind-2.2.5 → validmind-2.3.1}/validmind/tests/data_validation/DatasetDescription.py +0 -0
  192. {validmind-2.2.5 → validmind-2.3.1}/validmind/tests/data_validation/DatasetSplit.py +0 -0
  193. {validmind-2.2.5 → validmind-2.3.1}/validmind/tests/data_validation/DescriptiveStatistics.py +0 -0
  194. {validmind-2.2.5 → validmind-2.3.1}/validmind/tests/data_validation/Duplicates.py +0 -0
  195. {validmind-2.2.5 → validmind-2.3.1}/validmind/tests/data_validation/EngleGrangerCoint.py +0 -0
  196. {validmind-2.2.5 → validmind-2.3.1}/validmind/tests/data_validation/FeatureTargetCorrelationPlot.py +0 -0
  197. {validmind-2.2.5 → validmind-2.3.1}/validmind/tests/data_validation/HighCardinality.py +0 -0
  198. {validmind-2.2.5 → validmind-2.3.1}/validmind/tests/data_validation/IQROutliersBarPlot.py +0 -0
  199. {validmind-2.2.5 → validmind-2.3.1}/validmind/tests/data_validation/IQROutliersTable.py +0 -0
  200. {validmind-2.2.5 → validmind-2.3.1}/validmind/tests/data_validation/LaggedCorrelationHeatmap.py +0 -0
  201. {validmind-2.2.5 → validmind-2.3.1}/validmind/tests/data_validation/MissingValues.py +0 -0
  202. {validmind-2.2.5 → validmind-2.3.1}/validmind/tests/data_validation/MissingValuesBarPlot.py +0 -0
  203. {validmind-2.2.5 → validmind-2.3.1}/validmind/tests/data_validation/MissingValuesRisk.py +0 -0
  204. {validmind-2.2.5 → validmind-2.3.1}/validmind/tests/data_validation/PearsonCorrelationMatrix.py +0 -0
  205. {validmind-2.2.5 → validmind-2.3.1}/validmind/tests/data_validation/RollingStatsPlot.py +0 -0
  206. {validmind-2.2.5 → validmind-2.3.1}/validmind/tests/data_validation/Skewness.py +0 -0
  207. {validmind-2.2.5 → validmind-2.3.1}/validmind/tests/data_validation/SpreadPlot.py +0 -0
  208. {validmind-2.2.5 → validmind-2.3.1}/validmind/tests/data_validation/TabularCategoricalBarPlots.py +0 -0
  209. {validmind-2.2.5 → validmind-2.3.1}/validmind/tests/data_validation/TabularDescriptionTables.py +0 -0
  210. {validmind-2.2.5 → validmind-2.3.1}/validmind/tests/data_validation/TabularNumericalHistograms.py +0 -0
  211. {validmind-2.2.5 → validmind-2.3.1}/validmind/tests/data_validation/TargetRateBarPlots.py +0 -0
  212. {validmind-2.2.5 → validmind-2.3.1}/validmind/tests/data_validation/TimeSeriesFrequency.py +0 -0
  213. {validmind-2.2.5 → validmind-2.3.1}/validmind/tests/data_validation/TimeSeriesHistogram.py +0 -0
  214. {validmind-2.2.5 → validmind-2.3.1}/validmind/tests/data_validation/TimeSeriesLinePlot.py +0 -0
  215. {validmind-2.2.5 → validmind-2.3.1}/validmind/tests/data_validation/TimeSeriesMissingValues.py +0 -0
  216. {validmind-2.2.5 → validmind-2.3.1}/validmind/tests/data_validation/TimeSeriesOutliers.py +0 -0
  217. {validmind-2.2.5 → validmind-2.3.1}/validmind/tests/data_validation/TooManyZeroValues.py +0 -0
  218. {validmind-2.2.5 → validmind-2.3.1}/validmind/tests/data_validation/UniqueRows.py +0 -0
  219. {validmind-2.2.5 → validmind-2.3.1}/validmind/tests/data_validation/__init__.py +0 -0
  220. {validmind-2.2.5 → validmind-2.3.1}/validmind/tests/data_validation/nlp/LanguageDetection.py +0 -0
  221. {validmind-2.2.5 → validmind-2.3.1}/validmind/tests/data_validation/nlp/StopWords.py +0 -0
  222. {validmind-2.2.5 → validmind-2.3.1}/validmind/tests/data_validation/nlp/__init__.py +0 -0
  223. {validmind-2.2.5 → validmind-2.3.1}/validmind/tests/model_validation/BertScore.py +0 -0
  224. {validmind-2.2.5 → validmind-2.3.1}/validmind/tests/model_validation/BleuScore.py +0 -0
  225. {validmind-2.2.5 → validmind-2.3.1}/validmind/tests/model_validation/ClusterSizeDistribution.py +0 -0
  226. {validmind-2.2.5 → validmind-2.3.1}/validmind/tests/model_validation/ContextualRecall.py +0 -0
  227. {validmind-2.2.5 → validmind-2.3.1}/validmind/tests/model_validation/MeteorScore.py +0 -0
  228. {validmind-2.2.5 → validmind-2.3.1}/validmind/tests/model_validation/ModelMetadata.py +0 -0
  229. {validmind-2.2.5 → validmind-2.3.1}/validmind/tests/model_validation/RegardScore.py +0 -0
  230. {validmind-2.2.5 → validmind-2.3.1}/validmind/tests/model_validation/RegressionResidualsPlot.py +0 -0
  231. {validmind-2.2.5 → validmind-2.3.1}/validmind/tests/model_validation/RougeScore.py +0 -0
  232. {validmind-2.2.5 → validmind-2.3.1}/validmind/tests/model_validation/TokenDisparity.py +0 -0
  233. {validmind-2.2.5 → validmind-2.3.1}/validmind/tests/model_validation/ToxicityScore.py +0 -0
  234. {validmind-2.2.5 → validmind-2.3.1}/validmind/tests/model_validation/__init__.py +0 -0
  235. {validmind-2.2.5 → validmind-2.3.1}/validmind/tests/model_validation/embeddings/ClusterDistribution.py +0 -0
  236. {validmind-2.2.5 → validmind-2.3.1}/validmind/tests/model_validation/embeddings/CosineSimilarityDistribution.py +0 -0
  237. {validmind-2.2.5 → validmind-2.3.1}/validmind/tests/model_validation/embeddings/DescriptiveAnalytics.py +0 -0
  238. {validmind-2.2.5 → validmind-2.3.1}/validmind/tests/model_validation/embeddings/EmbeddingsVisualization2D.py +0 -0
  239. {validmind-2.2.5 → validmind-2.3.1}/validmind/tests/model_validation/embeddings/StabilityAnalysis.py +0 -0
  240. {validmind-2.2.5 → validmind-2.3.1}/validmind/tests/model_validation/embeddings/StabilityAnalysisKeyword.py +0 -0
  241. {validmind-2.2.5 → validmind-2.3.1}/validmind/tests/model_validation/embeddings/StabilityAnalysisRandomNoise.py +0 -0
  242. {validmind-2.2.5 → validmind-2.3.1}/validmind/tests/model_validation/embeddings/StabilityAnalysisSynonyms.py +0 -0
  243. {validmind-2.2.5 → validmind-2.3.1}/validmind/tests/model_validation/embeddings/StabilityAnalysisTranslation.py +0 -0
  244. {validmind-2.2.5 → validmind-2.3.1}/validmind/tests/model_validation/sklearn/AdjustedMutualInformation.py +0 -0
  245. {validmind-2.2.5 → validmind-2.3.1}/validmind/tests/model_validation/sklearn/AdjustedRandIndex.py +0 -0
  246. {validmind-2.2.5 → validmind-2.3.1}/validmind/tests/model_validation/sklearn/ClassifierPerformance.py +0 -0
  247. {validmind-2.2.5 → validmind-2.3.1}/validmind/tests/model_validation/sklearn/ClusterCosineSimilarity.py +0 -0
  248. {validmind-2.2.5 → validmind-2.3.1}/validmind/tests/model_validation/sklearn/CompletenessScore.py +0 -0
  249. {validmind-2.2.5 → validmind-2.3.1}/validmind/tests/model_validation/sklearn/ConfusionMatrix.py +0 -0
  250. {validmind-2.2.5 → validmind-2.3.1}/validmind/tests/model_validation/sklearn/FowlkesMallowsScore.py +0 -0
  251. {validmind-2.2.5 → validmind-2.3.1}/validmind/tests/model_validation/sklearn/HomogeneityScore.py +0 -0
  252. {validmind-2.2.5 → validmind-2.3.1}/validmind/tests/model_validation/sklearn/HyperParametersTuning.py +0 -0
  253. {validmind-2.2.5 → validmind-2.3.1}/validmind/tests/model_validation/sklearn/KMeansClustersOptimization.py +0 -0
  254. {validmind-2.2.5 → validmind-2.3.1}/validmind/tests/model_validation/sklearn/MinimumAccuracy.py +0 -0
  255. {validmind-2.2.5 → validmind-2.3.1}/validmind/tests/model_validation/sklearn/MinimumF1Score.py +0 -0
  256. {validmind-2.2.5 → validmind-2.3.1}/validmind/tests/model_validation/sklearn/MinimumROCAUCScore.py +0 -0
  257. {validmind-2.2.5 → validmind-2.3.1}/validmind/tests/model_validation/sklearn/OverfitDiagnosis.py +0 -0
  258. {validmind-2.2.5 → validmind-2.3.1}/validmind/tests/model_validation/sklearn/PermutationFeatureImportance.py +0 -0
  259. {validmind-2.2.5 → validmind-2.3.1}/validmind/tests/model_validation/sklearn/PopulationStabilityIndex.py +0 -0
  260. {validmind-2.2.5 → validmind-2.3.1}/validmind/tests/model_validation/sklearn/PrecisionRecallCurve.py +0 -0
  261. {validmind-2.2.5 → validmind-2.3.1}/validmind/tests/model_validation/sklearn/ROCCurve.py +0 -0
  262. {validmind-2.2.5 → validmind-2.3.1}/validmind/tests/model_validation/sklearn/RobustnessDiagnosis.py +0 -0
  263. {validmind-2.2.5 → validmind-2.3.1}/validmind/tests/model_validation/sklearn/SHAPGlobalImportance.py +0 -0
  264. {validmind-2.2.5 → validmind-2.3.1}/validmind/tests/model_validation/sklearn/SilhouettePlot.py +0 -0
  265. {validmind-2.2.5 → validmind-2.3.1}/validmind/tests/model_validation/sklearn/TrainingTestDegradation.py +0 -0
  266. {validmind-2.2.5 → validmind-2.3.1}/validmind/tests/model_validation/sklearn/VMeasure.py +0 -0
  267. {validmind-2.2.5 → validmind-2.3.1}/validmind/tests/model_validation/sklearn/WeakspotsDiagnosis.py +0 -0
  268. {validmind-2.2.5 → validmind-2.3.1}/validmind/tests/model_validation/sklearn/__init__.py +0 -0
  269. {validmind-2.2.5 → validmind-2.3.1}/validmind/tests/model_validation/statsmodels/AutoARIMA.py +0 -0
  270. {validmind-2.2.5 → validmind-2.3.1}/validmind/tests/model_validation/statsmodels/BoxPierce.py +0 -0
  271. {validmind-2.2.5 → validmind-2.3.1}/validmind/tests/model_validation/statsmodels/CumulativePredictionProbabilities.py +0 -0
  272. {validmind-2.2.5 → validmind-2.3.1}/validmind/tests/model_validation/statsmodels/PredictionProbabilitiesHistogram.py +0 -0
  273. {validmind-2.2.5 → validmind-2.3.1}/validmind/tests/model_validation/statsmodels/RegressionModelForecastPlot.py +0 -0
  274. {validmind-2.2.5 → validmind-2.3.1}/validmind/tests/model_validation/statsmodels/RegressionModelForecastPlotLevels.py +0 -0
  275. {validmind-2.2.5 → validmind-2.3.1}/validmind/tests/model_validation/statsmodels/RegressionModelSensitivityPlot.py +0 -0
  276. {validmind-2.2.5 → validmind-2.3.1}/validmind/tests/model_validation/statsmodels/RegressionModelSummary.py +0 -0
  277. {validmind-2.2.5 → validmind-2.3.1}/validmind/tests/model_validation/statsmodels/RegressionPermutationFeatureImportance.py +0 -0
  278. {validmind-2.2.5 → validmind-2.3.1}/validmind/tests/model_validation/statsmodels/ScorecardHistogram.py +0 -0
  279. {validmind-2.2.5 → validmind-2.3.1}/validmind/tests/model_validation/statsmodels/__init__.py +0 -0
  280. {validmind-2.2.5 → validmind-2.3.1}/validmind/tests/model_validation/statsmodels/statsutils.py +0 -0
  281. {validmind-2.2.5 → validmind-2.3.1}/validmind/tests/prompt_validation/__init__.py +0 -0
  282. {validmind-2.2.5 → validmind-2.3.1}/validmind/tests/test_providers.py +0 -0
  283. {validmind-2.2.5 → validmind-2.3.1}/validmind/unit_metrics/__init__.py +0 -0
  284. {validmind-2.2.5 → validmind-2.3.1}/validmind/unit_metrics/classification/sklearn/Accuracy.py +0 -0
  285. {validmind-2.2.5 → validmind-2.3.1}/validmind/unit_metrics/classification/sklearn/F1.py +0 -0
  286. {validmind-2.2.5 → validmind-2.3.1}/validmind/unit_metrics/classification/sklearn/Precision.py +0 -0
  287. {validmind-2.2.5 → validmind-2.3.1}/validmind/unit_metrics/classification/sklearn/ROC_AUC.py +0 -0
  288. {validmind-2.2.5 → validmind-2.3.1}/validmind/unit_metrics/classification/sklearn/Recall.py +0 -0
  289. {validmind-2.2.5 → validmind-2.3.1}/validmind/unit_metrics/regression/GiniCoefficient.py +0 -0
  290. {validmind-2.2.5 → validmind-2.3.1}/validmind/unit_metrics/regression/HuberLoss.py +0 -0
  291. {validmind-2.2.5 → validmind-2.3.1}/validmind/unit_metrics/regression/KolmogorovSmirnovStatistic.py +0 -0
  292. {validmind-2.2.5 → validmind-2.3.1}/validmind/unit_metrics/regression/MeanAbsolutePercentageError.py +0 -0
  293. {validmind-2.2.5 → validmind-2.3.1}/validmind/unit_metrics/regression/MeanBiasDeviation.py +0 -0
  294. {validmind-2.2.5 → validmind-2.3.1}/validmind/unit_metrics/regression/QuantileLoss.py +0 -0
  295. {validmind-2.2.5 → validmind-2.3.1}/validmind/unit_metrics/regression/sklearn/AdjustedRSquaredScore.py +0 -0
  296. {validmind-2.2.5 → validmind-2.3.1}/validmind/unit_metrics/regression/sklearn/MeanAbsoluteError.py +0 -0
  297. {validmind-2.2.5 → validmind-2.3.1}/validmind/unit_metrics/regression/sklearn/MeanSquaredError.py +0 -0
  298. {validmind-2.2.5 → validmind-2.3.1}/validmind/unit_metrics/regression/sklearn/RSquaredScore.py +0 -0
  299. {validmind-2.2.5 → validmind-2.3.1}/validmind/unit_metrics/regression/sklearn/RootMeanSquaredError.py +0 -0
  300. {validmind-2.2.5 → validmind-2.3.1}/validmind/vm_models/__init__.py +0 -0
  301. {validmind-2.2.5 → validmind-2.3.1}/validmind/vm_models/dataset/__init__.py +0 -0
  302. {validmind-2.2.5 → validmind-2.3.1}/validmind/vm_models/figure.py +0 -0
  303. {validmind-2.2.5 → validmind-2.3.1}/validmind/vm_models/test/metric_result.py +0 -0
  304. {validmind-2.2.5 → validmind-2.3.1}/validmind/vm_models/test/output_template.py +0 -0
  305. {validmind-2.2.5 → validmind-2.3.1}/validmind/vm_models/test/result_summary.py +0 -0
  306. {validmind-2.2.5 → validmind-2.3.1}/validmind/vm_models/test/threshold_test_result.py +0 -0
  307. {validmind-2.2.5 → validmind-2.3.1}/validmind/vm_models/test_context.py +0 -0
  308. {validmind-2.2.5 → validmind-2.3.1}/validmind/vm_models/test_suite/test_suite.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: validmind
3
- Version: 2.2.5
3
+ Version: 2.3.1
4
4
  Summary: ValidMind Developer Framework
5
5
  License: Commercial License
6
6
  Author: Andres Rodriguez
@@ -10,7 +10,7 @@ description = "ValidMind Developer Framework"
10
10
  license = "Commercial License"
11
11
  name = "validmind"
12
12
  readme = "README.pypi.md"
13
- version = "2.2.5"
13
+ version = "2.3.1"
14
14
 
15
15
  [tool.poetry.dependencies]
16
16
  python = ">=3.8.1,<3.12"
@@ -0,0 +1 @@
1
+ __version__ = "2.3.1"
@@ -2,12 +2,22 @@
2
2
  # See the LICENSE file in the root of this repository for details.
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
- import concurrent.futures
6
5
  import os
6
+ from concurrent.futures import ThreadPoolExecutor
7
7
 
8
- from openai import AzureOpenAI, OpenAI
8
+ from validmind.utils import md_to_html
9
9
 
10
- SYSTEM_PROMPT = """
10
+ from ..logging import get_logger
11
+
12
+ __executor = ThreadPoolExecutor()
13
+
14
+ logger = get_logger(__name__)
15
+
16
+
17
+ AI_REVISION_NAME = "Generated by ValidMind AI"
18
+ DEFAULT_REVISION_NAME = "Default Description"
19
+
20
+ SYSTEM_PROMPT = """ # noqa
11
21
  You are an expert data scientist and MRM specialist.
12
22
  You are tasked with analyzing the results of a quantitative test run on some model or dataset.
13
23
  Your goal is to create a test description that will act as part of the model documentation.
@@ -15,12 +25,14 @@ You will provide both the developer and other consumers of the documentation wit
15
25
  The overarching theme to maintain is MRM documentation.
16
26
 
17
27
  Examine the provided statistical test results and compose a description of the results.
18
- This will act as the description and interpretation of the result in the model documentation.
19
- It will be displayed alongside the test results table and figures.
28
+ The results are either in the form of serialized tables or images of plots.
29
+ Compose a description and interpretation of the result to accompany it in MRM documentation.
30
+ It will be read by other data scientists and developers and by validators and stakeholders.
20
31
 
32
+ Use valid Markdown syntax to format the response.
21
33
  Avoid long sentences and complex vocabulary.
34
+ Avoid overly verbose explanations - the goal is to explain to a user what they are seeing in the results.
22
35
  Structure the response clearly and logically.
23
- Use valid Markdown syntax to format the response.
24
36
  Respond only with your analysis and insights, not the verbatim test results.
25
37
  Respond only with the markdown content, no explanation or context for your response is necessary.
26
38
  Use the Test ID that is provided to form the Test Name e.g. "ClassImbalance" -> "Class Imbalance".
@@ -28,9 +40,10 @@ Use the Test ID that is provided to form the Test Name e.g. "ClassImbalance" ->
28
40
  Explain the test, its purpose, its mechanism/formula etc and why it is useful.
29
41
  If relevant, provide a very brief description of the way this test is used in model/dataset evaluation and how it is interpreted.
30
42
  Highlight the key insights from the test results. The key insights should be concise and easily understood.
43
+ An insight should only be included if it is something not entirely obvious from the test results.
31
44
  End the response with any closing remarks, summary or additional useful information.
32
45
 
33
- Use the following format for the response (feel free to modify slightly if necessary):
46
+ Use the following format for the response (feel free to stray from it if necessary - this is a suggested starting point):
34
47
 
35
48
  <ResponseFormat>
36
49
  **<Test Name>** calculates the xyz <continue to explain what it does in detail>...
@@ -54,9 +67,9 @@ Test ID: `{test_name}`
54
67
  {test_description}
55
68
  </Test Docstring>
56
69
 
57
- <Test Results Summary>
70
+ <Test Results Table(s)>
58
71
  {test_summary}
59
- </Test Results Summary>
72
+ </Test Results Table(s)>
60
73
  """.strip()
61
74
 
62
75
 
@@ -70,48 +83,6 @@ Test ID: `{test_name}`
70
83
  The attached plots show the results of the test.
71
84
  """.strip()
72
85
 
73
- __client = None
74
- __model = None
75
-
76
- __executor = concurrent.futures.ThreadPoolExecutor()
77
-
78
-
79
- def __get_client_and_model():
80
- """
81
- Get the model to use for generating interpretations
82
- """
83
- global __client, __model
84
-
85
- if __client and __model:
86
- return __client, __model
87
-
88
- if "OPENAI_API_KEY" in os.environ:
89
- __client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY"))
90
- __model = os.environ.get("VM_OPENAI_MODEL", "gpt-4o")
91
-
92
- elif "AZURE_OPENAI_KEY" in os.environ:
93
- if "AZURE_OPENAI_ENDPOINT" not in os.environ:
94
- raise ValueError(
95
- "AZURE_OPENAI_ENDPOINT must be set to run LLM tests with Azure"
96
- )
97
-
98
- if "AZURE_OPENAI_MODEL" not in os.environ:
99
- raise ValueError(
100
- "AZURE_OPENAI_MODEL must be set to run LLM tests with Azure"
101
- )
102
-
103
- __client = AzureOpenAI(
104
- azure_endpoint=os.environ.get("AZURE_OPENAI_ENDPOINT"),
105
- api_key=os.environ.get("AZURE_OPENAI_KEY"),
106
- api_version=os.environ.get("AZURE_OPENAI_VERSION", "2023-05-15"),
107
- )
108
- __model = os.environ.get("AZURE_OPENAI_MODEL")
109
-
110
- else:
111
- raise ValueError("OPENAI_API_KEY or AZURE_OPENAI_KEY must be set")
112
-
113
- return __client, __model
114
-
115
86
 
116
87
  class DescriptionFuture:
117
88
  """This will be immediately returned from generate_description so that
@@ -126,12 +97,17 @@ class DescriptionFuture:
126
97
  self._future = future
127
98
 
128
99
  def get_description(self):
129
- # This will block until the future is completed
130
- return self._future.result()
100
+ if isinstance(self._future, str):
101
+ description = self._future
102
+ else:
103
+ # This will block until the future is completed
104
+ description = self._future.result()
131
105
 
106
+ return md_to_html(description, mathml=True)
132
107
 
133
- def generate_description_async(
134
- test_name: str,
108
+
109
+ def generate_description(
110
+ test_id: str,
135
111
  test_description: str,
136
112
  test_summary: str,
137
113
  figures: list = None,
@@ -140,14 +116,29 @@ def generate_description_async(
140
116
  if not test_summary and not figures:
141
117
  raise ValueError("No summary or figures provided - cannot generate description")
142
118
 
143
- client, _ = __get_client_and_model()
119
+ # TODO: fix circular import
120
+ from validmind.ai.utils import get_client_and_model
121
+
122
+ client, model = get_client_and_model()
123
+
144
124
  # get last part of test id
145
- test_name = test_name.split(".")[-1]
125
+ test_name = test_id.split(".")[-1]
126
+ # truncate the test description to save time
127
+ test_description = (
128
+ f"{test_description[:500]}..."
129
+ if len(test_description) > 500
130
+ else test_description
131
+ )
146
132
 
147
133
  if test_summary:
134
+ logger.debug(
135
+ f"Generating description for test {test_name} with stringified summary"
136
+ )
148
137
  return (
149
138
  client.chat.completions.create(
150
- model="gpt-4o",
139
+ model=model,
140
+ temperature=0,
141
+ seed=42,
151
142
  messages=[
152
143
  {"role": "system", "content": SYSTEM_PROMPT},
153
144
  {
@@ -164,9 +155,14 @@ def generate_description_async(
164
155
  .message.content.strip()
165
156
  )
166
157
 
158
+ logger.debug(
159
+ f"Generating description for test {test_name} with {len(figures)} figures"
160
+ )
167
161
  return (
168
162
  client.chat.completions.create(
169
- model="gpt-4o",
163
+ model=model,
164
+ temperature=0,
165
+ seed=42,
170
166
  messages=[
171
167
  {"role": "system", "content": SYSTEM_PROMPT},
172
168
  {
@@ -197,18 +193,80 @@ def generate_description_async(
197
193
  )
198
194
 
199
195
 
200
- def generate_description(
201
- test_name: str,
196
+ def background_generate_description(
197
+ test_id: str,
202
198
  test_description: str,
203
199
  test_summary: str,
204
200
  figures: list = None,
205
201
  ):
206
- future = __executor.submit(
207
- generate_description_async,
208
- test_name,
209
- test_description,
210
- test_summary,
211
- figures,
212
- )
202
+ def wrapped():
203
+ try:
204
+ return generate_description(
205
+ test_id, test_description, test_summary, figures
206
+ )
207
+ except Exception as e:
208
+ logger.error(f"Failed to generate description: {e}")
209
+
210
+ return test_description
211
+
212
+ return DescriptionFuture(__executor.submit(wrapped))
213
+
214
+
215
+ def get_description_metadata(
216
+ test_id,
217
+ default_description,
218
+ summary=None,
219
+ figures=None,
220
+ prefix="metric_description",
221
+ ):
222
+ """Get Metadata Dictionary for a Test or Metric Result
223
+
224
+ Generates an LLM interpretation of the test results or uses the default
225
+ description and returns a metadata object that can be logged with the test results.
226
+
227
+ By default, the description is generated by an LLM that will interpret the test
228
+ results and provide a human-readable description. If the summary or figures are
229
+ not provided, or the `VALIDMIND_LLM_DESCRIPTIONS_ENABLED` environment variable is
230
+ set to `0` or `false` or no LLM has been configured, the default description will
231
+ be used as the test result description.
232
+
233
+ Note: Either the summary or figures must be provided to generate the description.
234
+
235
+ Args:
236
+ test_id (str): The test ID
237
+ default_description (str): The default description for the test
238
+ summary (Any): The test summary or results to interpret
239
+ figures (List[Figure]): The figures to attach to the test suite result
240
+ prefix (str): The prefix to use for the content ID (Default: "metric_description")
241
+
242
+ Returns:
243
+ dict: The metadata object to be logged with the test results
244
+ """
245
+ env_disabled = os.getenv("VALIDMIND_LLM_DESCRIPTIONS_ENABLED", "1") in [
246
+ "0",
247
+ "false",
248
+ ]
249
+
250
+ # TODO: fix circular import
251
+ from validmind.ai.utils import is_configured
252
+
253
+ if (summary or figures) and not env_disabled and is_configured():
254
+ revision_name = AI_REVISION_NAME
255
+
256
+ # get description future and set it as the description in the metadata
257
+ # this will lazily retrieved so it can run in the background in parallel
258
+ description = background_generate_description(
259
+ test_id=test_id,
260
+ test_description=default_description,
261
+ test_summary=summary,
262
+ figures=figures,
263
+ )
264
+
265
+ else:
266
+ revision_name = DEFAULT_REVISION_NAME
267
+ description = md_to_html(default_description, mathml=True)
213
268
 
214
- return DescriptionFuture(future)
269
+ return {
270
+ "content_id": f"{prefix}:{test_id}::{revision_name}",
271
+ "text": description,
272
+ }
@@ -0,0 +1,104 @@
1
+ # Copyright © 2023-2024 ValidMind Inc. All rights reserved.
2
+ # See the LICENSE file in the root of this repository for details.
3
+ # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
+
5
+ import os
6
+
7
+ from openai import AzureOpenAI, Client, OpenAI
8
+
9
+ from ..api_client import get_ai_key, get_api_host
10
+ from ..logging import get_logger
11
+
12
+ logger = get_logger(__name__)
13
+
14
+
15
+ __client = None
16
+ __model = None
17
+ # can be None, True or False (ternary to represent initial state, ack and failed ack)
18
+ __ack = None
19
+
20
+
21
+ def get_client_and_model():
22
+ """Get model and client to use for generating interpretations
23
+
24
+ On first call, it will look in the environment for the API key endpoint, model etc.
25
+ and store them in a global variable to avoid loading them up again.
26
+ """
27
+ global __client, __model
28
+
29
+ if __client and __model:
30
+ return __client, __model
31
+
32
+ if "OPENAI_API_KEY" in os.environ:
33
+ __client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
34
+ __model = os.getenv("VM_OPENAI_MODEL", "gpt-4o")
35
+
36
+ logger.debug(f"Using OpenAI {__model} for generating descriptions")
37
+
38
+ elif "AZURE_OPENAI_KEY" in os.environ:
39
+ if "AZURE_OPENAI_ENDPOINT" not in os.environ:
40
+ raise ValueError(
41
+ "AZURE_OPENAI_ENDPOINT must be set to run LLM tests with Azure"
42
+ )
43
+
44
+ if "AZURE_OPENAI_MODEL" not in os.environ:
45
+ raise ValueError(
46
+ "AZURE_OPENAI_MODEL must be set to run LLM tests with Azure"
47
+ )
48
+
49
+ __client = AzureOpenAI(
50
+ azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT"),
51
+ api_key=os.getenv("AZURE_OPENAI_KEY"),
52
+ api_version=os.getenv("AZURE_OPENAI_VERSION", "2023-05-15"),
53
+ )
54
+ __model = os.getenv("AZURE_OPENAI_MODEL")
55
+
56
+ logger.debug(f"Using Azure OpenAI {__model} for generating descriptions")
57
+
58
+ else:
59
+ try:
60
+ response = get_ai_key()
61
+ __client = Client(
62
+ base_url=(
63
+ # TODO: improve this to be a bit more dynamic
64
+ "http://localhost:4000/genai"
65
+ if "localhost" in get_api_host()
66
+ else f"{get_api_host()}/genai"
67
+ ),
68
+ api_key=response["key"],
69
+ )
70
+ __model = "gpt-4o" # TODO: backend should tell us which model to use
71
+ logger.debug(f"Using ValidMind {__model} for generating descriptions")
72
+ except Exception as e:
73
+ logger.debug(f"Failed to get API key: {e}")
74
+ raise ValueError(
75
+ "OPENAI_API_KEY, AZURE_OPENAI_KEY must be set, or your account "
76
+ "must be setup to use ValidMind's LLM in order to use LLM features"
77
+ )
78
+
79
+ return __client, __model
80
+
81
+
82
+ def is_configured():
83
+ global __ack
84
+
85
+ if __ack:
86
+ return True
87
+
88
+ try:
89
+ client, model = get_client_and_model()
90
+ # send an empty message with max_tokens=1 to "ping" the API
91
+ response = client.chat.completions.create(
92
+ model=model,
93
+ messages=[{"role": "user", "content": ""}],
94
+ max_tokens=1,
95
+ )
96
+ logger.debug(
97
+ f"Received response from OpenAI: {response.choices[0].message.content}"
98
+ )
99
+ __ack = True
100
+ except Exception as e:
101
+ logger.debug(f"Failed to connect to OpenAI: {e}")
102
+ __ack = False
103
+
104
+ return __ack
@@ -11,9 +11,9 @@ import asyncio
11
11
  import atexit
12
12
  import json
13
13
  import os
14
- import urllib.parse
15
14
  from io import BytesIO
16
15
  from typing import Any, Callable, Dict, List, Optional, Tuple, Union
16
+ from urllib.parse import urlencode, urljoin
17
17
 
18
18
  import aiohttp
19
19
  import requests
@@ -22,19 +22,19 @@ from aiohttp import FormData
22
22
  from .client_config import client_config
23
23
  from .errors import MissingAPICredentialsError, MissingProjectIdError, raise_api_error
24
24
  from .logging import get_logger, init_sentry, send_single_error
25
- from .utils import NumpyEncoder, md_to_html, run_async
25
+ from .utils import NumpyEncoder, run_async
26
26
  from .vm_models import Figure, MetricResult, ThresholdTestResults
27
27
 
28
28
  # TODO: can't import types from vm_models because of circular dependency
29
29
 
30
30
  logger = get_logger(__name__)
31
31
 
32
- _api_key = os.environ.get("VM_API_KEY")
33
- _api_secret = os.environ.get("VM_API_SECRET")
34
- _api_host = os.environ.get("VM_API_HOST")
32
+ _api_key = os.getenv("VM_API_KEY")
33
+ _api_secret = os.getenv("VM_API_SECRET")
34
+ _api_host = os.getenv("VM_API_HOST")
35
35
 
36
- _project = os.environ.get("VM_API_PROJECT")
37
- _run_cuid = os.environ.get("VM_RUN_CUID")
36
+ _project = os.getenv("VM_API_PROJECT")
37
+ _run_cuid = os.getenv("VM_RUN_CUID")
38
38
 
39
39
  __api_session: aiohttp.ClientSession = None
40
40
 
@@ -69,6 +69,14 @@ def get_api_project() -> Optional[str]:
69
69
  return _project
70
70
 
71
71
 
72
+ def get_api_headers() -> Dict[str, str]:
73
+ return {
74
+ "X-API-KEY": _api_key,
75
+ "X-API-SECRET": _api_secret,
76
+ "X-PROJECT-CUID": _project,
77
+ }
78
+
79
+
72
80
  def init(
73
81
  project: Optional[str] = None,
74
82
  api_key: Optional[str] = None,
@@ -97,26 +105,24 @@ def init(
97
105
  # special case to detect when running a notebook with the standard init snippet
98
106
  # will override with environment variables so we don't have to keep updating
99
107
  # the notebook
100
- api_host = None
101
- api_key = None
102
- api_secret = None
103
- project = None
108
+ api_host = api_key = api_secret = project = None
104
109
 
105
- _project = project or os.environ.get("VM_API_PROJECT")
110
+ _project = project or os.getenv("VM_API_PROJECT")
106
111
 
107
112
  if _project is None:
108
113
  raise MissingProjectIdError()
109
114
 
110
- _api_key = api_key or os.environ.get("VM_API_KEY")
111
- _api_secret = api_secret or os.environ.get("VM_API_SECRET")
115
+ _api_key = api_key or os.getenv("VM_API_KEY")
116
+ _api_secret = api_secret or os.getenv("VM_API_SECRET")
112
117
 
113
118
  if _api_key is None or _api_secret is None:
114
119
  raise MissingAPICredentialsError()
115
120
 
116
- _api_host = api_host or os.environ.get(
117
- "VM_API_HOST", "http://127.0.0.1:5000/api/v1/tracking"
121
+ _api_host = api_host or os.getenv(
122
+ "VM_API_HOST", "http://127.0.0.1:5000/api/v1/tracking/"
118
123
  )
119
- _run_cuid = os.environ.get("VM_RUN_CUID", None)
124
+
125
+ _run_cuid = os.getenv("VM_RUN_CUID", None)
120
126
 
121
127
  try:
122
128
  __ping()
@@ -127,7 +133,7 @@ def init(
127
133
  raise e
128
134
 
129
135
 
130
- async def _get_session() -> aiohttp.ClientSession:
136
+ def _get_session() -> aiohttp.ClientSession:
131
137
  """Initializes the async client session"""
132
138
  global __api_session
133
139
 
@@ -147,7 +153,7 @@ async def _get_session() -> aiohttp.ClientSession:
147
153
  def __ping() -> Dict[str, Any]:
148
154
  """Validates that we can connect to the ValidMind API (does not use the async session)"""
149
155
  r = requests.get(
150
- f"{_api_host}/ping",
156
+ __get_url("ping", should_start_run=False),
151
157
  headers={
152
158
  "X-API-KEY": _api_key,
153
159
  "X-API-SECRET": _api_secret,
@@ -189,21 +195,35 @@ def reload():
189
195
  raise e
190
196
 
191
197
 
192
- async def __get_url(endpoint: str, params: Optional[Dict[str, str]] = None) -> str:
193
- if not _run_cuid:
194
- start_run()
198
+ def __get_url(
199
+ endpoint: str,
200
+ params: Optional[Dict[str, str]] = None,
201
+ should_start_run: bool = True,
202
+ ) -> str:
203
+ global _api_host
195
204
 
196
205
  params = params or {}
197
- params["run_cuid"] = _run_cuid
198
206
 
199
- return f"{_api_host}/{endpoint}?{urllib.parse.urlencode(params)}"
207
+ if not _run_cuid and should_start_run:
208
+ start_run()
209
+
210
+ if should_start_run:
211
+ params["run_cuid"] = _run_cuid
212
+
213
+ if not _api_host.endswith("/"):
214
+ _api_host += "/"
215
+
216
+ if params:
217
+ return f"{urljoin(_api_host, endpoint)}?{urlencode(params)}"
218
+
219
+ return urljoin(_api_host, endpoint)
200
220
 
201
221
 
202
222
  async def _get(
203
223
  endpoint: str, params: Optional[Dict[str, str]] = None
204
224
  ) -> Dict[str, Any]:
205
- url = await __get_url(endpoint, params)
206
- session = await _get_session()
225
+ url = __get_url(endpoint, params)
226
+ session = _get_session()
207
227
  session.headers.update({"X-RUN-CUID": _run_cuid})
208
228
 
209
229
  async with session.get(url) as r:
@@ -219,8 +239,8 @@ async def _post(
219
239
  data: Optional[Union[dict, FormData]] = None,
220
240
  files: Optional[Dict[str, Tuple[str, BytesIO, str]]] = None,
221
241
  ) -> Dict[str, Any]:
222
- url = await __get_url(endpoint, params)
223
- session = await _get_session()
242
+ url = __get_url(endpoint, params)
243
+ session = _get_session()
224
244
  session.headers.update({"X-RUN-CUID": _run_cuid})
225
245
 
226
246
  if not isinstance(data, (dict)) and files is not None:
@@ -349,7 +369,7 @@ async def log_metadata(
349
369
  """
350
370
  metadata_dict = {"content_id": content_id}
351
371
  if text is not None:
352
- metadata_dict["text"] = md_to_html(text, mathml=True)
372
+ metadata_dict["text"] = text
353
373
  if _json is not None:
354
374
  metadata_dict["json"] = _json
355
375
 
@@ -491,7 +511,7 @@ def log_test_results(
491
511
  return responses
492
512
 
493
513
 
494
- def _log_input(name: str, type: str, metadata: Dict[str, Any]) -> Dict[str, Any]:
514
+ def log_input(name: str, type: str, metadata: Dict[str, Any]) -> Dict[str, Any]:
495
515
  """Logs input information - internal use for now (don't expose via public API)
496
516
 
497
517
  Args:
@@ -539,7 +559,7 @@ def start_run() -> str:
539
559
  global _run_cuid
540
560
 
541
561
  r = requests.post(
542
- f"{_api_host}/start_run",
562
+ __get_url("start_run", should_start_run=False),
543
563
  headers={
544
564
  "X-API-KEY": _api_key,
545
565
  "X-API-SECRET": _api_secret,
@@ -555,3 +575,22 @@ def start_run() -> str:
555
575
  _run_cuid = test_run["cuid"]
556
576
 
557
577
  return test_run["cuid"]
578
+
579
+
580
+ def get_ai_key() -> str:
581
+ """Calls the api to get an api key for our LLM proxy"""
582
+ r = requests.get(
583
+ __get_url("ai/key", should_start_run=False),
584
+ headers={
585
+ "X-API-KEY": _api_key,
586
+ "X-API-SECRET": _api_secret,
587
+ "X-PROJECT-CUID": _project,
588
+ },
589
+ )
590
+
591
+ if r.status_code != 200:
592
+ # TODO: improve error handling when there's no Open AI API or AI key available
593
+ # logger.error("Could not get AI key from ValidMind API")
594
+ raise_api_error(r.text)
595
+
596
+ return r.json()
@@ -9,7 +9,7 @@ Client interface for all data and model validation functions
9
9
  import pandas as pd
10
10
  import polars as pl
11
11
 
12
- from .api_client import _log_input as log_input
12
+ from .api_client import log_input as log_input
13
13
  from .client_config import client_config
14
14
  from .errors import (
15
15
  GetTestSuiteError,
@@ -180,6 +180,7 @@ def init_model(
180
180
  attributes: dict = None,
181
181
  predict_fn: callable = None,
182
182
  __log=True,
183
+ **kwargs,
183
184
  ) -> VMModel:
184
185
  """
185
186
  Initializes a VM Model, which can then be passed to other functions
@@ -194,6 +195,7 @@ def init_model(
194
195
  this to the same key.
195
196
  attributes (dict): A dictionary of model attributes
196
197
  predict_fn (callable): A function that takes an input and returns a prediction
198
+ **kwargs: Additional arguments to pass to the model
197
199
 
198
200
  Raises:
199
201
  ValueError: If the model type is not supported
@@ -246,6 +248,7 @@ def init_model(
246
248
  input_id=input_id,
247
249
  model=model, # Trained model instance
248
250
  predict_fn=predict_fn,
251
+ **kwargs,
249
252
  )
250
253
  metadata = get_model_info(vm_model)
251
254
  else:
@@ -351,10 +354,7 @@ def get_test_suite(
351
354
  )
352
355
 
353
356
  return get_template_test_suite(
354
- client_config.documentation_template,
355
- section=section,
356
- *args,
357
- **kwargs,
357
+ client_config.documentation_template, section=section
358
358
  )
359
359
 
360
360
  return get_test_suite_by_id(test_suite_id)(*args, **kwargs)