validmind 2.8.22__tar.gz → 2.8.27__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (334) hide show
  1. {validmind-2.8.22 → validmind-2.8.27}/PKG-INFO +4 -3
  2. {validmind-2.8.22 → validmind-2.8.27}/pyproject.toml +4 -3
  3. {validmind-2.8.22 → validmind-2.8.27}/validmind/__init__.py +3 -0
  4. validmind-2.8.27/validmind/__version__.py +1 -0
  5. validmind-2.8.27/validmind/ai/utils.py +219 -0
  6. {validmind-2.8.22 → validmind-2.8.27}/validmind/api_client.py +4 -0
  7. validmind-2.8.27/validmind/experimental/agents.py +65 -0
  8. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/data_validation/MutualInformation.py +14 -2
  9. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/model_validation/ragas/AnswerCorrectness.py +5 -1
  10. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/model_validation/ragas/AspectCritic.py +5 -1
  11. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/model_validation/ragas/ContextEntityRecall.py +5 -1
  12. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/model_validation/ragas/ContextPrecision.py +5 -1
  13. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/model_validation/ragas/ContextPrecisionWithoutReference.py +5 -1
  14. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/model_validation/ragas/ContextRecall.py +5 -1
  15. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/model_validation/ragas/Faithfulness.py +5 -1
  16. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/model_validation/ragas/NoiseSensitivity.py +3 -1
  17. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/model_validation/ragas/ResponseRelevancy.py +6 -4
  18. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/model_validation/ragas/SemanticSimilarity.py +5 -1
  19. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/model_validation/ragas/utils.py +4 -24
  20. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/model_validation/sklearn/OverfitDiagnosis.py +11 -1
  21. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/model_validation/sklearn/WeakspotsDiagnosis.py +13 -0
  22. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/prompt_validation/Bias.py +2 -1
  23. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/prompt_validation/Clarity.py +2 -1
  24. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/prompt_validation/Conciseness.py +2 -1
  25. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/prompt_validation/Delimitation.py +2 -1
  26. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/prompt_validation/NegativeInstruction.py +2 -1
  27. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/prompt_validation/Robustness.py +3 -2
  28. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/prompt_validation/Specificity.py +2 -1
  29. validmind-2.8.27/validmind/tests/prompt_validation/__init__.py +0 -0
  30. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/prompt_validation/ai_powered_test.py +18 -17
  31. validmind-2.8.27/validmind/vm_models/result/__init__.py +21 -0
  32. {validmind-2.8.22 → validmind-2.8.27}/validmind/vm_models/result/result.py +127 -14
  33. validmind-2.8.22/validmind/__version__.py +0 -1
  34. validmind-2.8.22/validmind/ai/utils.py +0 -130
  35. validmind-2.8.22/validmind/vm_models/result/__init__.py +0 -7
  36. {validmind-2.8.22 → validmind-2.8.27}/LICENSE +0 -0
  37. {validmind-2.8.22 → validmind-2.8.27}/README.pypi.md +0 -0
  38. {validmind-2.8.22 → validmind-2.8.27}/validmind/ai/test_descriptions.py +0 -0
  39. {validmind-2.8.22 → validmind-2.8.27}/validmind/client.py +0 -0
  40. {validmind-2.8.22 → validmind-2.8.27}/validmind/client_config.py +0 -0
  41. {validmind-2.8.22 → validmind-2.8.27}/validmind/datasets/__init__.py +0 -0
  42. {validmind-2.8.22 → validmind-2.8.27}/validmind/datasets/classification/__init__.py +0 -0
  43. {validmind-2.8.22 → validmind-2.8.27}/validmind/datasets/classification/customer_churn.py +0 -0
  44. {validmind-2.8.22 → validmind-2.8.27}/validmind/datasets/classification/datasets/bank_customer_churn.csv +0 -0
  45. {validmind-2.8.22 → validmind-2.8.27}/validmind/datasets/classification/datasets/taiwan_credit.csv +0 -0
  46. {validmind-2.8.22 → validmind-2.8.27}/validmind/datasets/classification/taiwan_credit.py +0 -0
  47. {validmind-2.8.22 → validmind-2.8.27}/validmind/datasets/cluster/digits.py +0 -0
  48. {validmind-2.8.22 → validmind-2.8.27}/validmind/datasets/credit_risk/__init__.py +0 -0
  49. {validmind-2.8.22 → validmind-2.8.27}/validmind/datasets/credit_risk/datasets/lending_club_biased.csv.gz +0 -0
  50. {validmind-2.8.22 → validmind-2.8.27}/validmind/datasets/credit_risk/datasets/lending_club_loan_data_2007_2014_clean.csv.gz +0 -0
  51. {validmind-2.8.22 → validmind-2.8.27}/validmind/datasets/credit_risk/lending_club.py +0 -0
  52. {validmind-2.8.22 → validmind-2.8.27}/validmind/datasets/credit_risk/lending_club_bias.py +0 -0
  53. {validmind-2.8.22 → validmind-2.8.27}/validmind/datasets/llm/rag/__init__.py +0 -0
  54. {validmind-2.8.22 → validmind-2.8.27}/validmind/datasets/llm/rag/datasets/rfp_existing_questions_client_1.csv +0 -0
  55. {validmind-2.8.22 → validmind-2.8.27}/validmind/datasets/llm/rag/datasets/rfp_existing_questions_client_2.csv +0 -0
  56. {validmind-2.8.22 → validmind-2.8.27}/validmind/datasets/llm/rag/datasets/rfp_existing_questions_client_3.csv +0 -0
  57. {validmind-2.8.22 → validmind-2.8.27}/validmind/datasets/llm/rag/datasets/rfp_existing_questions_client_4.csv +0 -0
  58. {validmind-2.8.22 → validmind-2.8.27}/validmind/datasets/llm/rag/datasets/rfp_existing_questions_client_5.csv +0 -0
  59. {validmind-2.8.22 → validmind-2.8.27}/validmind/datasets/llm/rag/rfp.py +0 -0
  60. {validmind-2.8.22 → validmind-2.8.27}/validmind/datasets/nlp/__init__.py +0 -0
  61. {validmind-2.8.22 → validmind-2.8.27}/validmind/datasets/nlp/cnn_dailymail.py +0 -0
  62. {validmind-2.8.22 → validmind-2.8.27}/validmind/datasets/nlp/datasets/Covid_19.csv +0 -0
  63. {validmind-2.8.22 → validmind-2.8.27}/validmind/datasets/nlp/datasets/cnn_dailymail_100_with_predictions.csv +0 -0
  64. {validmind-2.8.22 → validmind-2.8.27}/validmind/datasets/nlp/datasets/cnn_dailymail_500_with_predictions.csv +0 -0
  65. {validmind-2.8.22 → validmind-2.8.27}/validmind/datasets/nlp/datasets/sentiments_with_predictions.csv +0 -0
  66. {validmind-2.8.22 → validmind-2.8.27}/validmind/datasets/nlp/twitter_covid_19.py +0 -0
  67. {validmind-2.8.22 → validmind-2.8.27}/validmind/datasets/regression/__init__.py +0 -0
  68. {validmind-2.8.22 → validmind-2.8.27}/validmind/datasets/regression/california_housing.py +0 -0
  69. {validmind-2.8.22 → validmind-2.8.27}/validmind/datasets/regression/datasets/fred/CPIAUCSL.csv +0 -0
  70. {validmind-2.8.22 → validmind-2.8.27}/validmind/datasets/regression/datasets/fred/CSUSHPISA.csv +0 -0
  71. {validmind-2.8.22 → validmind-2.8.27}/validmind/datasets/regression/datasets/fred/DRSFRMACBS.csv +0 -0
  72. {validmind-2.8.22 → validmind-2.8.27}/validmind/datasets/regression/datasets/fred/FEDFUNDS.csv +0 -0
  73. {validmind-2.8.22 → validmind-2.8.27}/validmind/datasets/regression/datasets/fred/GDP.csv +0 -0
  74. {validmind-2.8.22 → validmind-2.8.27}/validmind/datasets/regression/datasets/fred/GDPC1.csv +0 -0
  75. {validmind-2.8.22 → validmind-2.8.27}/validmind/datasets/regression/datasets/fred/GS10.csv +0 -0
  76. {validmind-2.8.22 → validmind-2.8.27}/validmind/datasets/regression/datasets/fred/GS3.csv +0 -0
  77. {validmind-2.8.22 → validmind-2.8.27}/validmind/datasets/regression/datasets/fred/GS5.csv +0 -0
  78. {validmind-2.8.22 → validmind-2.8.27}/validmind/datasets/regression/datasets/fred/MORTGAGE30US.csv +0 -0
  79. {validmind-2.8.22 → validmind-2.8.27}/validmind/datasets/regression/datasets/fred/UNRATE.csv +0 -0
  80. {validmind-2.8.22 → validmind-2.8.27}/validmind/datasets/regression/datasets/fred_loan_rates.csv +0 -0
  81. {validmind-2.8.22 → validmind-2.8.27}/validmind/datasets/regression/datasets/fred_loan_rates_test_1.csv +0 -0
  82. {validmind-2.8.22 → validmind-2.8.27}/validmind/datasets/regression/datasets/fred_loan_rates_test_2.csv +0 -0
  83. {validmind-2.8.22 → validmind-2.8.27}/validmind/datasets/regression/datasets/fred_loan_rates_test_3.csv +0 -0
  84. {validmind-2.8.22 → validmind-2.8.27}/validmind/datasets/regression/datasets/fred_loan_rates_test_4.csv +0 -0
  85. {validmind-2.8.22 → validmind-2.8.27}/validmind/datasets/regression/datasets/fred_loan_rates_test_5.csv +0 -0
  86. {validmind-2.8.22 → validmind-2.8.27}/validmind/datasets/regression/datasets/leanding_club_loan_rates.csv +0 -0
  87. {validmind-2.8.22 → validmind-2.8.27}/validmind/datasets/regression/fred.py +0 -0
  88. {validmind-2.8.22 → validmind-2.8.27}/validmind/datasets/regression/fred_timeseries.py +0 -0
  89. {validmind-2.8.22 → validmind-2.8.27}/validmind/datasets/regression/lending_club.py +0 -0
  90. {validmind-2.8.22 → validmind-2.8.27}/validmind/datasets/regression/models/fred_loan_rates_model_1.pkl +0 -0
  91. {validmind-2.8.22 → validmind-2.8.27}/validmind/datasets/regression/models/fred_loan_rates_model_2.pkl +0 -0
  92. {validmind-2.8.22 → validmind-2.8.27}/validmind/datasets/regression/models/fred_loan_rates_model_3.pkl +0 -0
  93. {validmind-2.8.22 → validmind-2.8.27}/validmind/datasets/regression/models/fred_loan_rates_model_4.pkl +0 -0
  94. {validmind-2.8.22 → validmind-2.8.27}/validmind/datasets/regression/models/fred_loan_rates_model_5.pkl +0 -0
  95. {validmind-2.8.22 → validmind-2.8.27}/validmind/errors.py +0 -0
  96. {validmind-2.8.22/validmind/html_templates → validmind-2.8.27/validmind/experimental}/__init__.py +0 -0
  97. {validmind-2.8.22/validmind/tests/data_validation → validmind-2.8.27/validmind/html_templates}/__init__.py +0 -0
  98. {validmind-2.8.22 → validmind-2.8.27}/validmind/html_templates/content_blocks.py +0 -0
  99. {validmind-2.8.22 → validmind-2.8.27}/validmind/input_registry.py +0 -0
  100. {validmind-2.8.22 → validmind-2.8.27}/validmind/logging.py +0 -0
  101. {validmind-2.8.22 → validmind-2.8.27}/validmind/models/__init__.py +0 -0
  102. {validmind-2.8.22 → validmind-2.8.27}/validmind/models/foundation.py +0 -0
  103. {validmind-2.8.22 → validmind-2.8.27}/validmind/models/function.py +0 -0
  104. {validmind-2.8.22 → validmind-2.8.27}/validmind/models/huggingface.py +0 -0
  105. {validmind-2.8.22 → validmind-2.8.27}/validmind/models/metadata.py +0 -0
  106. {validmind-2.8.22 → validmind-2.8.27}/validmind/models/pipeline.py +0 -0
  107. {validmind-2.8.22 → validmind-2.8.27}/validmind/models/pytorch.py +0 -0
  108. {validmind-2.8.22 → validmind-2.8.27}/validmind/models/r_model.py +0 -0
  109. {validmind-2.8.22 → validmind-2.8.27}/validmind/models/sklearn.py +0 -0
  110. {validmind-2.8.22 → validmind-2.8.27}/validmind/template.py +0 -0
  111. {validmind-2.8.22 → validmind-2.8.27}/validmind/test_suites/__init__.py +0 -0
  112. {validmind-2.8.22 → validmind-2.8.27}/validmind/test_suites/classifier.py +0 -0
  113. {validmind-2.8.22 → validmind-2.8.27}/validmind/test_suites/cluster.py +0 -0
  114. {validmind-2.8.22 → validmind-2.8.27}/validmind/test_suites/embeddings.py +0 -0
  115. {validmind-2.8.22 → validmind-2.8.27}/validmind/test_suites/llm.py +0 -0
  116. {validmind-2.8.22 → validmind-2.8.27}/validmind/test_suites/nlp.py +0 -0
  117. {validmind-2.8.22 → validmind-2.8.27}/validmind/test_suites/parameters_optimization.py +0 -0
  118. {validmind-2.8.22 → validmind-2.8.27}/validmind/test_suites/regression.py +0 -0
  119. {validmind-2.8.22 → validmind-2.8.27}/validmind/test_suites/statsmodels_timeseries.py +0 -0
  120. {validmind-2.8.22 → validmind-2.8.27}/validmind/test_suites/summarization.py +0 -0
  121. {validmind-2.8.22 → validmind-2.8.27}/validmind/test_suites/tabular_datasets.py +0 -0
  122. {validmind-2.8.22 → validmind-2.8.27}/validmind/test_suites/text_data.py +0 -0
  123. {validmind-2.8.22 → validmind-2.8.27}/validmind/test_suites/time_series.py +0 -0
  124. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/__init__.py +0 -0
  125. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/__types__.py +0 -0
  126. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/_store.py +0 -0
  127. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/comparison.py +0 -0
  128. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/data_validation/ACFandPACFPlot.py +0 -0
  129. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/data_validation/ADF.py +0 -0
  130. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/data_validation/AutoAR.py +0 -0
  131. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/data_validation/AutoMA.py +0 -0
  132. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/data_validation/AutoStationarity.py +0 -0
  133. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/data_validation/BivariateScatterPlots.py +0 -0
  134. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/data_validation/BoxPierce.py +0 -0
  135. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/data_validation/ChiSquaredFeaturesTable.py +0 -0
  136. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/data_validation/ClassImbalance.py +0 -0
  137. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/data_validation/DatasetDescription.py +0 -0
  138. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/data_validation/DatasetSplit.py +0 -0
  139. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/data_validation/DescriptiveStatistics.py +0 -0
  140. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/data_validation/DickeyFullerGLS.py +0 -0
  141. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/data_validation/Duplicates.py +0 -0
  142. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/data_validation/EngleGrangerCoint.py +0 -0
  143. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/data_validation/FeatureTargetCorrelationPlot.py +0 -0
  144. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/data_validation/HighCardinality.py +0 -0
  145. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/data_validation/HighPearsonCorrelation.py +0 -0
  146. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/data_validation/IQROutliersBarPlot.py +0 -0
  147. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/data_validation/IQROutliersTable.py +0 -0
  148. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/data_validation/IsolationForestOutliers.py +0 -0
  149. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/data_validation/JarqueBera.py +0 -0
  150. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/data_validation/KPSS.py +0 -0
  151. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/data_validation/LJungBox.py +0 -0
  152. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/data_validation/LaggedCorrelationHeatmap.py +0 -0
  153. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/data_validation/MissingValues.py +0 -0
  154. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/data_validation/MissingValuesBarPlot.py +0 -0
  155. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/data_validation/PearsonCorrelationMatrix.py +0 -0
  156. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/data_validation/PhillipsPerronArch.py +0 -0
  157. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/data_validation/ProtectedClassesCombination.py +0 -0
  158. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/data_validation/ProtectedClassesDescription.py +0 -0
  159. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/data_validation/ProtectedClassesDisparity.py +0 -0
  160. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/data_validation/ProtectedClassesThresholdOptimizer.py +0 -0
  161. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/data_validation/RollingStatsPlot.py +0 -0
  162. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/data_validation/RunsTest.py +0 -0
  163. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/data_validation/ScatterPlot.py +0 -0
  164. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/data_validation/ScoreBandDefaultRates.py +0 -0
  165. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/data_validation/SeasonalDecompose.py +0 -0
  166. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/data_validation/ShapiroWilk.py +0 -0
  167. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/data_validation/Skewness.py +0 -0
  168. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/data_validation/SpreadPlot.py +0 -0
  169. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/data_validation/TabularCategoricalBarPlots.py +0 -0
  170. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/data_validation/TabularDateTimeHistograms.py +0 -0
  171. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/data_validation/TabularDescriptionTables.py +0 -0
  172. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/data_validation/TabularNumericalHistograms.py +0 -0
  173. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/data_validation/TargetRateBarPlots.py +0 -0
  174. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/data_validation/TimeSeriesDescription.py +0 -0
  175. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/data_validation/TimeSeriesDescriptiveStatistics.py +0 -0
  176. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/data_validation/TimeSeriesFrequency.py +0 -0
  177. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/data_validation/TimeSeriesHistogram.py +0 -0
  178. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/data_validation/TimeSeriesLinePlot.py +0 -0
  179. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/data_validation/TimeSeriesMissingValues.py +0 -0
  180. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/data_validation/TimeSeriesOutliers.py +0 -0
  181. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/data_validation/TooManyZeroValues.py +0 -0
  182. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/data_validation/UniqueRows.py +0 -0
  183. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/data_validation/WOEBinPlots.py +0 -0
  184. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/data_validation/WOEBinTable.py +0 -0
  185. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/data_validation/ZivotAndrewsArch.py +0 -0
  186. {validmind-2.8.22/validmind/tests/data_validation/nlp → validmind-2.8.27/validmind/tests/data_validation}/__init__.py +0 -0
  187. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/data_validation/nlp/CommonWords.py +0 -0
  188. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/data_validation/nlp/Hashtags.py +0 -0
  189. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/data_validation/nlp/LanguageDetection.py +0 -0
  190. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/data_validation/nlp/Mentions.py +0 -0
  191. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/data_validation/nlp/PolarityAndSubjectivity.py +0 -0
  192. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/data_validation/nlp/Punctuations.py +0 -0
  193. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/data_validation/nlp/Sentiment.py +0 -0
  194. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/data_validation/nlp/StopWords.py +0 -0
  195. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/data_validation/nlp/TextDescription.py +0 -0
  196. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/data_validation/nlp/Toxicity.py +0 -0
  197. {validmind-2.8.22/validmind/tests/model_validation → validmind-2.8.27/validmind/tests/data_validation/nlp}/__init__.py +0 -0
  198. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/decorator.py +0 -0
  199. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/load.py +0 -0
  200. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/model_validation/BertScore.py +0 -0
  201. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/model_validation/BleuScore.py +0 -0
  202. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/model_validation/ClusterSizeDistribution.py +0 -0
  203. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/model_validation/ContextualRecall.py +0 -0
  204. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/model_validation/FeaturesAUC.py +0 -0
  205. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/model_validation/MeteorScore.py +0 -0
  206. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/model_validation/ModelMetadata.py +0 -0
  207. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/model_validation/ModelPredictionResiduals.py +0 -0
  208. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/model_validation/RegardScore.py +0 -0
  209. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/model_validation/RegressionResidualsPlot.py +0 -0
  210. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/model_validation/RougeScore.py +0 -0
  211. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/model_validation/TimeSeriesPredictionWithCI.py +0 -0
  212. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/model_validation/TimeSeriesPredictionsPlot.py +0 -0
  213. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/model_validation/TimeSeriesR2SquareBySegments.py +0 -0
  214. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/model_validation/TokenDisparity.py +0 -0
  215. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/model_validation/ToxicityScore.py +0 -0
  216. {validmind-2.8.22/validmind/tests/model_validation/sklearn → validmind-2.8.27/validmind/tests/model_validation}/__init__.py +0 -0
  217. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/model_validation/embeddings/ClusterDistribution.py +0 -0
  218. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/model_validation/embeddings/CosineSimilarityComparison.py +0 -0
  219. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/model_validation/embeddings/CosineSimilarityDistribution.py +0 -0
  220. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/model_validation/embeddings/CosineSimilarityHeatmap.py +0 -0
  221. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/model_validation/embeddings/DescriptiveAnalytics.py +0 -0
  222. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/model_validation/embeddings/EmbeddingsVisualization2D.py +0 -0
  223. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/model_validation/embeddings/EuclideanDistanceComparison.py +0 -0
  224. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/model_validation/embeddings/EuclideanDistanceHeatmap.py +0 -0
  225. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/model_validation/embeddings/PCAComponentsPairwisePlots.py +0 -0
  226. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/model_validation/embeddings/StabilityAnalysisKeyword.py +0 -0
  227. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/model_validation/embeddings/StabilityAnalysisRandomNoise.py +0 -0
  228. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/model_validation/embeddings/StabilityAnalysisSynonyms.py +0 -0
  229. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/model_validation/embeddings/StabilityAnalysisTranslation.py +0 -0
  230. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/model_validation/embeddings/TSNEComponentsPairwisePlots.py +0 -0
  231. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/model_validation/embeddings/utils.py +0 -0
  232. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/model_validation/sklearn/AdjustedMutualInformation.py +0 -0
  233. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/model_validation/sklearn/AdjustedRandIndex.py +0 -0
  234. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/model_validation/sklearn/CalibrationCurve.py +0 -0
  235. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/model_validation/sklearn/ClassifierPerformance.py +0 -0
  236. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/model_validation/sklearn/ClassifierThresholdOptimization.py +0 -0
  237. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/model_validation/sklearn/ClusterCosineSimilarity.py +0 -0
  238. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/model_validation/sklearn/ClusterPerformanceMetrics.py +0 -0
  239. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/model_validation/sklearn/CompletenessScore.py +0 -0
  240. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/model_validation/sklearn/ConfusionMatrix.py +0 -0
  241. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/model_validation/sklearn/FeatureImportance.py +0 -0
  242. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/model_validation/sklearn/FowlkesMallowsScore.py +0 -0
  243. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/model_validation/sklearn/HomogeneityScore.py +0 -0
  244. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/model_validation/sklearn/HyperParametersTuning.py +0 -0
  245. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/model_validation/sklearn/KMeansClustersOptimization.py +0 -0
  246. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/model_validation/sklearn/MinimumAccuracy.py +0 -0
  247. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/model_validation/sklearn/MinimumF1Score.py +0 -0
  248. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/model_validation/sklearn/MinimumROCAUCScore.py +0 -0
  249. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/model_validation/sklearn/ModelParameters.py +0 -0
  250. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/model_validation/sklearn/ModelsPerformanceComparison.py +0 -0
  251. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/model_validation/sklearn/PermutationFeatureImportance.py +0 -0
  252. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/model_validation/sklearn/PopulationStabilityIndex.py +0 -0
  253. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/model_validation/sklearn/PrecisionRecallCurve.py +0 -0
  254. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/model_validation/sklearn/ROCCurve.py +0 -0
  255. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/model_validation/sklearn/RegressionErrors.py +0 -0
  256. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/model_validation/sklearn/RegressionErrorsComparison.py +0 -0
  257. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/model_validation/sklearn/RegressionPerformance.py +0 -0
  258. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/model_validation/sklearn/RegressionR2Square.py +0 -0
  259. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/model_validation/sklearn/RegressionR2SquareComparison.py +0 -0
  260. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/model_validation/sklearn/RobustnessDiagnosis.py +0 -0
  261. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/model_validation/sklearn/SHAPGlobalImportance.py +0 -0
  262. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/model_validation/sklearn/ScoreProbabilityAlignment.py +0 -0
  263. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/model_validation/sklearn/SilhouettePlot.py +0 -0
  264. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/model_validation/sklearn/TrainingTestDegradation.py +0 -0
  265. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/model_validation/sklearn/VMeasure.py +0 -0
  266. {validmind-2.8.22/validmind/tests/model_validation/statsmodels → validmind-2.8.27/validmind/tests/model_validation/sklearn}/__init__.py +0 -0
  267. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/model_validation/statsmodels/AutoARIMA.py +0 -0
  268. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/model_validation/statsmodels/CumulativePredictionProbabilities.py +0 -0
  269. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/model_validation/statsmodels/DurbinWatsonTest.py +0 -0
  270. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/model_validation/statsmodels/GINITable.py +0 -0
  271. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/model_validation/statsmodels/KolmogorovSmirnov.py +0 -0
  272. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/model_validation/statsmodels/Lilliefors.py +0 -0
  273. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/model_validation/statsmodels/PredictionProbabilitiesHistogram.py +0 -0
  274. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/model_validation/statsmodels/RegressionCoeffs.py +0 -0
  275. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/model_validation/statsmodels/RegressionFeatureSignificance.py +0 -0
  276. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/model_validation/statsmodels/RegressionModelForecastPlot.py +0 -0
  277. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/model_validation/statsmodels/RegressionModelForecastPlotLevels.py +0 -0
  278. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/model_validation/statsmodels/RegressionModelSensitivityPlot.py +0 -0
  279. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/model_validation/statsmodels/RegressionModelSummary.py +0 -0
  280. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/model_validation/statsmodels/RegressionPermutationFeatureImportance.py +0 -0
  281. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/model_validation/statsmodels/ScorecardHistogram.py +0 -0
  282. {validmind-2.8.22/validmind/tests/prompt_validation → validmind-2.8.27/validmind/tests/model_validation/statsmodels}/__init__.py +0 -0
  283. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/model_validation/statsmodels/statsutils.py +0 -0
  284. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/ongoing_monitoring/CalibrationCurveDrift.py +0 -0
  285. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/ongoing_monitoring/ClassDiscriminationDrift.py +0 -0
  286. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/ongoing_monitoring/ClassImbalanceDrift.py +0 -0
  287. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/ongoing_monitoring/ClassificationAccuracyDrift.py +0 -0
  288. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/ongoing_monitoring/ConfusionMatrixDrift.py +0 -0
  289. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/ongoing_monitoring/CumulativePredictionProbabilitiesDrift.py +0 -0
  290. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/ongoing_monitoring/FeatureDrift.py +0 -0
  291. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/ongoing_monitoring/PredictionAcrossEachFeature.py +0 -0
  292. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/ongoing_monitoring/PredictionCorrelation.py +0 -0
  293. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/ongoing_monitoring/PredictionProbabilitiesHistogramDrift.py +0 -0
  294. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/ongoing_monitoring/PredictionQuantilesAcrossFeatures.py +0 -0
  295. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/ongoing_monitoring/ROCCurveDrift.py +0 -0
  296. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/ongoing_monitoring/ScoreBandsDrift.py +0 -0
  297. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/ongoing_monitoring/ScorecardHistogramDrift.py +0 -0
  298. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/ongoing_monitoring/TargetPredictionDistributionPlot.py +0 -0
  299. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/output.py +0 -0
  300. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/run.py +0 -0
  301. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/test_providers.py +0 -0
  302. {validmind-2.8.22 → validmind-2.8.27}/validmind/tests/utils.py +0 -0
  303. {validmind-2.8.22 → validmind-2.8.27}/validmind/unit_metrics/__init__.py +0 -0
  304. {validmind-2.8.22 → validmind-2.8.27}/validmind/unit_metrics/classification/Accuracy.py +0 -0
  305. {validmind-2.8.22 → validmind-2.8.27}/validmind/unit_metrics/classification/F1.py +0 -0
  306. {validmind-2.8.22 → validmind-2.8.27}/validmind/unit_metrics/classification/Precision.py +0 -0
  307. {validmind-2.8.22 → validmind-2.8.27}/validmind/unit_metrics/classification/ROC_AUC.py +0 -0
  308. {validmind-2.8.22 → validmind-2.8.27}/validmind/unit_metrics/classification/Recall.py +0 -0
  309. {validmind-2.8.22 → validmind-2.8.27}/validmind/unit_metrics/regression/AdjustedRSquaredScore.py +0 -0
  310. {validmind-2.8.22 → validmind-2.8.27}/validmind/unit_metrics/regression/GiniCoefficient.py +0 -0
  311. {validmind-2.8.22 → validmind-2.8.27}/validmind/unit_metrics/regression/HuberLoss.py +0 -0
  312. {validmind-2.8.22 → validmind-2.8.27}/validmind/unit_metrics/regression/KolmogorovSmirnovStatistic.py +0 -0
  313. {validmind-2.8.22 → validmind-2.8.27}/validmind/unit_metrics/regression/MeanAbsoluteError.py +0 -0
  314. {validmind-2.8.22 → validmind-2.8.27}/validmind/unit_metrics/regression/MeanAbsolutePercentageError.py +0 -0
  315. {validmind-2.8.22 → validmind-2.8.27}/validmind/unit_metrics/regression/MeanBiasDeviation.py +0 -0
  316. {validmind-2.8.22 → validmind-2.8.27}/validmind/unit_metrics/regression/MeanSquaredError.py +0 -0
  317. {validmind-2.8.22 → validmind-2.8.27}/validmind/unit_metrics/regression/QuantileLoss.py +0 -0
  318. {validmind-2.8.22 → validmind-2.8.27}/validmind/unit_metrics/regression/RSquaredScore.py +0 -0
  319. {validmind-2.8.22 → validmind-2.8.27}/validmind/unit_metrics/regression/RootMeanSquaredError.py +0 -0
  320. {validmind-2.8.22 → validmind-2.8.27}/validmind/utils.py +0 -0
  321. {validmind-2.8.22 → validmind-2.8.27}/validmind/vm_models/__init__.py +0 -0
  322. {validmind-2.8.22 → validmind-2.8.27}/validmind/vm_models/dataset/__init__.py +0 -0
  323. {validmind-2.8.22 → validmind-2.8.27}/validmind/vm_models/dataset/dataset.py +0 -0
  324. {validmind-2.8.22 → validmind-2.8.27}/validmind/vm_models/dataset/utils.py +0 -0
  325. {validmind-2.8.22 → validmind-2.8.27}/validmind/vm_models/figure.py +0 -0
  326. {validmind-2.8.22 → validmind-2.8.27}/validmind/vm_models/input.py +0 -0
  327. {validmind-2.8.22 → validmind-2.8.27}/validmind/vm_models/model.py +0 -0
  328. {validmind-2.8.22 → validmind-2.8.27}/validmind/vm_models/result/result.jinja +0 -0
  329. {validmind-2.8.22 → validmind-2.8.27}/validmind/vm_models/result/utils.py +0 -0
  330. {validmind-2.8.22 → validmind-2.8.27}/validmind/vm_models/test_suite/__init__.py +0 -0
  331. {validmind-2.8.22 → validmind-2.8.27}/validmind/vm_models/test_suite/runner.py +0 -0
  332. {validmind-2.8.22 → validmind-2.8.27}/validmind/vm_models/test_suite/summary.py +0 -0
  333. {validmind-2.8.22 → validmind-2.8.27}/validmind/vm_models/test_suite/test.py +0 -0
  334. {validmind-2.8.22 → validmind-2.8.27}/validmind/vm_models/test_suite/test_suite.py +0 -0
@@ -1,11 +1,11 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: validmind
3
- Version: 2.8.22
3
+ Version: 2.8.27
4
4
  Summary: ValidMind Library
5
5
  License: Commercial License
6
6
  Author: Andres Rodriguez
7
7
  Author-email: andres@validmind.ai
8
- Requires-Python: >=3.8.1,<3.12
8
+ Requires-Python: >=3.9.0,<3.12
9
9
  Classifier: License :: Other/Proprietary License
10
10
  Classifier: Programming Language :: Python :: 3
11
11
  Classifier: Programming Language :: Python :: 3.9
@@ -22,6 +22,7 @@ Requires-Dist: bert-score (>=0.3.13)
22
22
  Requires-Dist: catboost
23
23
  Requires-Dist: datasets (>=2.10.0,<3.0.0)
24
24
  Requires-Dist: evaluate
25
+ Requires-Dist: h11 (>=0.16.0)
25
26
  Requires-Dist: ipywidgets
26
27
  Requires-Dist: kaleido (>=0.2.1,!=0.2.1.post1)
27
28
  Requires-Dist: langchain-openai (>=0.1.8) ; extra == "all" or extra == "llm"
@@ -53,7 +54,7 @@ Requires-Dist: statsmodels
53
54
  Requires-Dist: tabulate (>=0.8.9,<0.9.0)
54
55
  Requires-Dist: textblob (>=0.18.0.post0,<0.19.0)
55
56
  Requires-Dist: tiktoken
56
- Requires-Dist: torch (>=1.10.0) ; extra == "all" or extra == "llm" or extra == "pytorch"
57
+ Requires-Dist: torch (==2.7.0) ; extra == "all" or extra == "llm" or extra == "pytorch"
57
58
  Requires-Dist: tqdm
58
59
  Requires-Dist: transformers (>=4.32.0,<5.0.0) ; extra == "all" or extra == "huggingface" or extra == "llm"
59
60
  Requires-Dist: xgboost (>=1.5.2,<3)
@@ -10,7 +10,7 @@ description = "ValidMind Library"
10
10
  license = "Commercial License"
11
11
  name = "validmind"
12
12
  readme = "README.pypi.md"
13
- version = "2.8.22"
13
+ version = "2.8.27"
14
14
 
15
15
  [tool.poetry.dependencies]
16
16
  aiohttp = {extras = ["speedups"], version = "*"}
@@ -20,6 +20,7 @@ bert-score = ">=0.3.13"
20
20
  catboost = "*"
21
21
  datasets = "^2.10.0"
22
22
  evaluate = "*"
23
+ h11 = ">=0.16.0"
23
24
  ipywidgets = "*"
24
25
  kaleido = ">=0.2.1,!=0.2.1.post1"
25
26
  langchain-openai = {version = ">=0.1.8", optional = true}
@@ -37,7 +38,7 @@ plotly = "<6.0.0"
37
38
  plotly-express = "*"
38
39
  polars = "*"
39
40
  pycocoevalcap = {version = "^1.2", optional = true}
40
- python = ">=3.8.1,<3.12"
41
+ python = ">=3.9.0,<3.12"
41
42
  python-dotenv = "*"
42
43
  ragas = {version = ">=0.2.3,<=0.2.7", optional = true}
43
44
  rouge = ">=1"
@@ -52,7 +53,7 @@ statsmodels = "*"
52
53
  tabulate = "^0.8.9"
53
54
  textblob = "^0.18.0.post0"
54
55
  tiktoken = "*"
55
- torch = {version = ">=1.10.0", optional = true}
56
+ torch = {version = "2.7.0", optional = true}
56
57
  tqdm = "*"
57
58
  transformers = {version = "^4.32.0", optional = true}
58
59
  xgboost = ">=1.5.2,<3"
@@ -53,6 +53,7 @@ from .client import ( # noqa: E402
53
53
  run_documentation_tests,
54
54
  run_test_suite,
55
55
  )
56
+ from .experimental import agents as experimental_agent
56
57
  from .tests.decorator import tags, tasks, test
57
58
  from .tests.run import print_env
58
59
  from .utils import is_notebook, parse_version
@@ -126,4 +127,6 @@ __all__ = [ # noqa
126
127
  "unit_metrics",
127
128
  "test_suites",
128
129
  "log_text",
130
+ # experimental features
131
+ "experimental_agent",
129
132
  ]
@@ -0,0 +1 @@
1
+ __version__ = "2.8.27"
@@ -0,0 +1,219 @@
1
+ # Copyright © 2023-2024 ValidMind Inc. All rights reserved.
2
+ # See the LICENSE file in the root of this repository for details.
3
+ # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
+
5
+ import os
6
+ from urllib.parse import urljoin
7
+
8
+ from openai import AzureOpenAI, Client, OpenAI
9
+
10
+ from ..logging import get_logger
11
+ from ..utils import md_to_html
12
+
13
+ logger = get_logger(__name__)
14
+
15
+
16
+ __client = None
17
+ __model = None
18
+ __judge_llm = None
19
+ __judge_embeddings = None
20
+ EMBEDDINGS_MODEL = "text-embedding-3-small"
21
+
22
+ # can be None, True or False (ternary to represent initial state, ack and failed ack)
23
+ __ack = None
24
+
25
+
26
+ class DescriptionFuture:
27
+ """This will be immediately returned from generate_description so that
28
+ the tests can continue to be run in parallel while the description is
29
+ retrieved asynchronously.
30
+
31
+ The value will be retrieved later and, if it is not ready yet, it should
32
+ block until it is.
33
+ """
34
+
35
+ def __init__(self, future):
36
+ self._future = future
37
+
38
+ def get_description(self):
39
+ if isinstance(self._future, str):
40
+ description = self._future
41
+ else:
42
+ # This will block until the future is completed
43
+ description = self._future.result()
44
+
45
+ return md_to_html(description, mathml=True)
46
+
47
+
48
+ def get_client_and_model():
49
+ """Get model and client to use for generating interpretations.
50
+
51
+ On first call, it will look in the environment for the API key endpoint, model etc.
52
+ and store them in a global variable to avoid loading them up again.
53
+ """
54
+ global __client, __model
55
+
56
+ if __client and __model:
57
+ return __client, __model
58
+
59
+ if "OPENAI_API_KEY" in os.environ:
60
+ __client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
61
+ __model = os.getenv("VM_OPENAI_MODEL", "gpt-4o")
62
+
63
+ logger.debug(f"Using OpenAI {__model} for generating descriptions")
64
+
65
+ elif "AZURE_OPENAI_KEY" in os.environ:
66
+ if "AZURE_OPENAI_ENDPOINT" not in os.environ:
67
+ raise ValueError(
68
+ "AZURE_OPENAI_ENDPOINT must be set to run LLM tests with Azure"
69
+ )
70
+
71
+ if "AZURE_OPENAI_MODEL" not in os.environ:
72
+ raise ValueError(
73
+ "AZURE_OPENAI_MODEL must be set to run LLM tests with Azure"
74
+ )
75
+
76
+ __client = AzureOpenAI(
77
+ azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT"),
78
+ api_key=os.getenv("AZURE_OPENAI_KEY"),
79
+ api_version=os.getenv("AZURE_OPENAI_VERSION", "2023-05-15"),
80
+ )
81
+ __model = os.getenv("AZURE_OPENAI_MODEL")
82
+
83
+ logger.debug(f"Using Azure OpenAI {__model} for generating descriptions")
84
+
85
+ else:
86
+ try:
87
+ # TODO: fix circular import
88
+ from ..api_client import get_ai_key, get_api_host
89
+
90
+ response = get_ai_key()
91
+ __client = Client(
92
+ base_url=(
93
+ # TODO: improve this to be a bit more dynamic
94
+ "http://localhost:4000/genai"
95
+ if "localhost" in get_api_host()
96
+ else urljoin(get_api_host(), "/genai")
97
+ ),
98
+ api_key=response["key"],
99
+ )
100
+ __model = "gpt-4o" # TODO: backend should tell us which model to use
101
+ logger.debug(f"Using ValidMind {__model} for generating descriptions")
102
+ except Exception as e:
103
+ logger.debug(f"Failed to get API key: {e}")
104
+ raise ValueError(
105
+ "OPENAI_API_KEY, AZURE_OPENAI_KEY must be set, or your account "
106
+ "must be setup to use ValidMind's LLM in order to use LLM features"
107
+ )
108
+
109
+ return __client, __model
110
+
111
+
112
+ def get_judge_config(judge_llm=None, judge_embeddings=None):
113
+ try:
114
+ from langchain_core.embeddings import Embeddings
115
+ from langchain_core.language_models.chat_models import BaseChatModel
116
+ from langchain_openai import ChatOpenAI, OpenAIEmbeddings
117
+
118
+ from validmind.models.function import FunctionModel
119
+ except ImportError:
120
+ raise ImportError("Please run `pip install validmind[llm]` to use LLM tests")
121
+
122
+ if judge_llm is not None or judge_embeddings is not None:
123
+ if isinstance(judge_llm, FunctionModel) and judge_llm is not None:
124
+ if isinstance(judge_llm.model, BaseChatModel):
125
+ judge_llm = judge_llm.model
126
+ else:
127
+ raise ValueError(
128
+ "The ValidMind Functional model provided does not have have a langchain compatible LLM as a model attribute."
129
+ "To use default ValidMind LLM, do not set judge_llm/judge_embedding parameter, "
130
+ "ensure that you are connected to the ValidMind API and confirm ValidMind AI is enabled for your account."
131
+ )
132
+ if isinstance(judge_embeddings, FunctionModel) and judge_embeddings is not None:
133
+ if isinstance(judge_llm.model, BaseChatModel):
134
+ judge_embeddings = judge_embeddings.model
135
+ else:
136
+ raise ValueError(
137
+ "The ValidMind Functional model provided does not have have a langchain compatible embeddings model as a model attribute."
138
+ "To use default ValidMind LLM, do not set judge_embedding parameter, "
139
+ "ensure that you are connected to the ValidMind API and confirm ValidMind AI is enabled for your account."
140
+ )
141
+
142
+ if (isinstance(judge_llm, BaseChatModel) or judge_llm is None) and (
143
+ isinstance(judge_embeddings, Embeddings) or judge_embeddings is None
144
+ ):
145
+ return judge_llm, judge_embeddings
146
+ else:
147
+ raise ValueError(
148
+ "Provided Judge LLM/Embeddings are not Langchain compatible. Ensure the judge LLM/embedding provided are an instance of "
149
+ "Langchain BaseChatModel and LangchainEmbeddings. To use default ValidMind LLM, do not set judge_llm/judge_embedding parameter, "
150
+ "ensure that you are connected to the ValidMind API and confirm ValidMind AI is enabled for your account."
151
+ )
152
+
153
+ # grab default values if not passed at run time
154
+ global __judge_llm, __judge_embeddings
155
+ if __judge_llm and __judge_embeddings:
156
+ return __judge_llm, __judge_embeddings
157
+
158
+ client, model = get_client_and_model()
159
+ os.environ["OPENAI_API_BASE"] = str(client.base_url)
160
+
161
+ __judge_llm = ChatOpenAI(api_key=client.api_key, model=model)
162
+ __judge_embeddings = OpenAIEmbeddings(
163
+ api_key=client.api_key, model=EMBEDDINGS_MODEL
164
+ )
165
+
166
+ return __judge_llm, __judge_embeddings
167
+
168
+
169
+ def set_judge_config(judge_llm, judge_embeddings):
170
+ global __judge_llm, __judge_embeddings
171
+ try:
172
+ from langchain_core.embeddings import Embeddings
173
+ from langchain_core.language_models.chat_models import BaseChatModel
174
+
175
+ from validmind.models.function import FunctionModel
176
+ except ImportError:
177
+ raise ImportError("Please run `pip install validmind[llm]` to use LLM tests")
178
+ if isinstance(judge_llm, BaseChatModel) and isinstance(
179
+ judge_embeddings, Embeddings
180
+ ):
181
+ __judge_llm = judge_llm
182
+ __judge_embeddings = judge_embeddings
183
+ # Assuming 'your_object' is the object you want to check
184
+ elif isinstance(judge_llm, FunctionModel) and isinstance(
185
+ judge_embeddings, FunctionModel
186
+ ):
187
+ __judge_llm = judge_llm.model
188
+ __judge_embeddings = judge_embeddings.model
189
+ else:
190
+ raise ValueError(
191
+ "Provided Judge LLM/Embeddings are not Langchain compatible. Ensure the judge LLM/embedding provided are an instance of "
192
+ "Langchain BaseChatModel and LangchainEmbeddings. To use default ValidMind LLM, do not set judge_llm/judge_embedding parameter, "
193
+ "ensure that you are connected to the ValidMind API and confirm ValidMind AI is enabled for your account."
194
+ )
195
+
196
+
197
+ def is_configured():
198
+ global __ack
199
+
200
+ if __ack:
201
+ return True
202
+
203
+ try:
204
+ client, model = get_client_and_model()
205
+ # send an empty message with max_tokens=1 to "ping" the API
206
+ response = client.chat.completions.create(
207
+ model=model,
208
+ messages=[{"role": "user", "content": ""}],
209
+ max_tokens=1,
210
+ )
211
+ logger.debug(
212
+ f"Received response from OpenAI: {response.choices[0].message.content}"
213
+ )
214
+ __ack = True
215
+ except Exception as e:
216
+ logger.debug(f"Failed to connect to OpenAI: {e}")
217
+ __ack = False
218
+
219
+ return __ack
@@ -448,6 +448,7 @@ async def alog_metric(
448
448
  params: Optional[Dict[str, Any]] = None,
449
449
  recorded_at: Optional[str] = None,
450
450
  thresholds: Optional[Dict[str, Any]] = None,
451
+ passed: Optional[bool] = None,
451
452
  ):
452
453
  """See log_metric for details."""
453
454
  if not key or not isinstance(key, str):
@@ -476,6 +477,7 @@ async def alog_metric(
476
477
  "params": params or {},
477
478
  "recorded_at": recorded_at,
478
479
  "thresholds": thresholds or {},
480
+ "passed": passed if passed is not None else None,
479
481
  },
480
482
  cls=NumpyEncoder,
481
483
  allow_nan=False,
@@ -493,6 +495,7 @@ def log_metric(
493
495
  params: Optional[Dict[str, Any]] = None,
494
496
  recorded_at: Optional[str] = None,
495
497
  thresholds: Optional[Dict[str, Any]] = None,
498
+ passed: Optional[bool] = None,
496
499
  ):
497
500
  """Logs a unit metric.
498
501
 
@@ -518,6 +521,7 @@ def log_metric(
518
521
  params=params,
519
522
  recorded_at=recorded_at,
520
523
  thresholds=thresholds,
524
+ passed=passed,
521
525
  )
522
526
 
523
527
 
@@ -0,0 +1,65 @@
1
+ # Copyright © 2023-2024 ValidMind Inc. All rights reserved.
2
+ # See the LICENSE file in the root of this repository for details.
3
+ # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
+
5
+ """
6
+ Agent interface for all text generation tasks
7
+ """
8
+
9
+ import requests
10
+
11
+ from validmind.api_client import _get_api_headers, _get_url, raise_api_error
12
+ from validmind.utils import is_html, md_to_html
13
+ from validmind.vm_models.result import TextGenerationResult
14
+
15
+
16
+ def run_task(
17
+ task: str,
18
+ input: dict,
19
+ show: bool = True,
20
+ ) -> TextGenerationResult:
21
+ """
22
+ Run text generation tasks using AI models.
23
+
24
+ Args:
25
+ task (str): Type of text generation task to run. Currently supports:
26
+ - 'code_explainer': Generates natural language explanations of code
27
+ input (dict): Input parameters for the generation task:
28
+ - For code_explainer: Must contain 'source_code' and optional parameters
29
+ show (bool): Whether to display the generated result. Defaults to True.
30
+
31
+ Returns:
32
+ TextGenerationResult: Result object containing the generated text and metadata
33
+
34
+ Raises:
35
+ ValueError: If an unsupported task is provided
36
+ requests.exceptions.RequestException: If the API request fails
37
+ """
38
+ if task == "code_explainer" or task == "qualitative_text_generation":
39
+ r = requests.post(
40
+ url=_get_url(f"ai/generate/{task}"),
41
+ headers=_get_api_headers(),
42
+ json=input,
43
+ )
44
+
45
+ if r.status_code != 200:
46
+ raise_api_error(r.text)
47
+
48
+ generated_text = r.json()["content"]
49
+ else:
50
+ raise ValueError(f"Unsupported task: {task}")
51
+
52
+ if not is_html(generated_text):
53
+ generated_text = md_to_html(generated_text, mathml=True)
54
+
55
+ # Create a test result with the generated text
56
+ result = TextGenerationResult(
57
+ result_type=f"{task}",
58
+ description=generated_text,
59
+ title=f"Text Generation: {task}",
60
+ doc=f"Generated {task}",
61
+ )
62
+ if show:
63
+ result.show()
64
+
65
+ return result
@@ -68,8 +68,20 @@ def MutualInformation(
68
68
  if task not in ["classification", "regression"]:
69
69
  raise ValueError("task must be either 'classification' or 'regression'")
70
70
 
71
- X = dataset.x
72
- y = dataset.y
71
+ # Check if numeric features exist
72
+ if not dataset.feature_columns_numeric:
73
+ raise ValueError(
74
+ "No numeric features found in dataset. Mutual Information test requires numeric features."
75
+ )
76
+
77
+ # Check if target column exists
78
+ if not dataset.target_column:
79
+ raise ValueError(
80
+ "Target column is required for Mutual Information calculation but was not provided."
81
+ )
82
+
83
+ X = dataset._df[dataset.feature_columns_numeric]
84
+ y = dataset._df[dataset.target_column]
73
85
 
74
86
  # Select appropriate MI function based on task type
75
87
  if task == "classification":
@@ -34,6 +34,8 @@ def AnswerCorrectness(
34
34
  user_input_column="user_input",
35
35
  response_column="response",
36
36
  reference_column="reference",
37
+ judge_llm=None,
38
+ judge_embeddings=None,
37
39
  ):
38
40
  """
39
41
  Evaluates the correctness of answers in a dataset with respect to the provided ground
@@ -118,7 +120,9 @@ def AnswerCorrectness(
118
120
  df = get_renamed_columns(dataset._df, required_columns)
119
121
 
120
122
  result_df = evaluate(
121
- Dataset.from_pandas(df), metrics=[answer_correctness()], **get_ragas_config()
123
+ Dataset.from_pandas(df),
124
+ metrics=[answer_correctness()],
125
+ **get_ragas_config(judge_llm, judge_embeddings)
122
126
  ).to_pandas()
123
127
 
124
128
  score_column = "answer_correctness"
@@ -51,6 +51,8 @@ def AspectCritic(
51
51
  "maliciousness",
52
52
  ],
53
53
  additional_aspects: list = None,
54
+ judge_llm=None,
55
+ judge_embeddings=None,
54
56
  ):
55
57
  """
56
58
  Evaluates generations against the following aspects: harmfulness, maliciousness,
@@ -158,7 +160,9 @@ def AspectCritic(
158
160
  all_aspects = [built_in_aspects[aspect] for aspect in aspects] + custom_aspects
159
161
 
160
162
  result_df = evaluate(
161
- Dataset.from_pandas(df), metrics=all_aspects, **get_ragas_config()
163
+ Dataset.from_pandas(df),
164
+ metrics=all_aspects,
165
+ **get_ragas_config(judge_llm, judge_embeddings)
162
166
  ).to_pandas()
163
167
 
164
168
  # reverse the score for aspects where lower is better
@@ -33,6 +33,8 @@ def ContextEntityRecall(
33
33
  dataset,
34
34
  retrieved_contexts_column: str = "retrieved_contexts",
35
35
  reference_column: str = "reference",
36
+ judge_llm=None,
37
+ judge_embeddings=None,
36
38
  ):
37
39
  """
38
40
  Evaluates the context entity recall for dataset entries and visualizes the results.
@@ -113,7 +115,9 @@ def ContextEntityRecall(
113
115
  df = get_renamed_columns(dataset._df, required_columns)
114
116
 
115
117
  result_df = evaluate(
116
- Dataset.from_pandas(df), metrics=[context_entity_recall()], **get_ragas_config()
118
+ Dataset.from_pandas(df),
119
+ metrics=[context_entity_recall()],
120
+ **get_ragas_config(judge_llm, judge_embeddings)
117
121
  ).to_pandas()
118
122
 
119
123
  score_column = "context_entity_recall"
@@ -34,6 +34,8 @@ def ContextPrecision(
34
34
  user_input_column: str = "user_input",
35
35
  retrieved_contexts_column: str = "retrieved_contexts",
36
36
  reference_column: str = "reference",
37
+ judge_llm=None,
38
+ judge_embeddings=None,
37
39
  ): # noqa: B950
38
40
  """
39
41
  Context Precision is a metric that evaluates whether all of the ground-truth
@@ -109,7 +111,9 @@ def ContextPrecision(
109
111
  df = get_renamed_columns(dataset._df, required_columns)
110
112
 
111
113
  result_df = evaluate(
112
- Dataset.from_pandas(df), metrics=[context_precision()], **get_ragas_config()
114
+ Dataset.from_pandas(df),
115
+ metrics=[context_precision()],
116
+ **get_ragas_config(judge_llm, judge_embeddings)
113
117
  ).to_pandas()
114
118
 
115
119
  score_column = "llm_context_precision_with_reference"
@@ -34,6 +34,8 @@ def ContextPrecisionWithoutReference(
34
34
  user_input_column: str = "user_input",
35
35
  retrieved_contexts_column: str = "retrieved_contexts",
36
36
  response_column: str = "response",
37
+ judge_llm=None,
38
+ judge_embeddings=None,
37
39
  ): # noqa: B950
38
40
  """
39
41
  Context Precision Without Reference is a metric used to evaluate the relevance of
@@ -104,7 +106,9 @@ def ContextPrecisionWithoutReference(
104
106
  df = get_renamed_columns(dataset._df, required_columns)
105
107
 
106
108
  result_df = evaluate(
107
- Dataset.from_pandas(df), metrics=[context_precision()], **get_ragas_config()
109
+ Dataset.from_pandas(df),
110
+ metrics=[context_precision()],
111
+ **get_ragas_config(judge_llm, judge_embeddings)
108
112
  ).to_pandas()
109
113
 
110
114
  score_column = "llm_context_precision_without_reference"
@@ -34,6 +34,8 @@ def ContextRecall(
34
34
  user_input_column: str = "user_input",
35
35
  retrieved_contexts_column: str = "retrieved_contexts",
36
36
  reference_column: str = "reference",
37
+ judge_llm=None,
38
+ judge_embeddings=None,
37
39
  ):
38
40
  """
39
41
  Context recall measures the extent to which the retrieved context aligns with the
@@ -109,7 +111,9 @@ def ContextRecall(
109
111
  df = get_renamed_columns(dataset._df, required_columns)
110
112
 
111
113
  result_df = evaluate(
112
- Dataset.from_pandas(df), metrics=[context_recall()], **get_ragas_config()
114
+ Dataset.from_pandas(df),
115
+ metrics=[context_recall()],
116
+ **get_ragas_config(judge_llm, judge_embeddings)
113
117
  ).to_pandas()
114
118
 
115
119
  score_column = "context_recall"
@@ -34,6 +34,8 @@ def Faithfulness(
34
34
  user_input_column="user_input",
35
35
  response_column="response",
36
36
  retrieved_contexts_column="retrieved_contexts",
37
+ judge_llm=None,
38
+ judge_embeddings=None,
37
39
  ): # noqa
38
40
  """
39
41
  Evaluates the faithfulness of the generated answers with respect to retrieved contexts.
@@ -114,7 +116,9 @@ def Faithfulness(
114
116
  df = get_renamed_columns(dataset._df, required_columns)
115
117
 
116
118
  result_df = evaluate(
117
- Dataset.from_pandas(df), metrics=[faithfulness()], **get_ragas_config()
119
+ Dataset.from_pandas(df),
120
+ metrics=[faithfulness()],
121
+ **get_ragas_config(judge_llm, judge_embeddings)
118
122
  ).to_pandas()
119
123
 
120
124
  score_column = "faithfulness"
@@ -38,6 +38,8 @@ def NoiseSensitivity(
38
38
  reference_column="reference",
39
39
  focus="relevant",
40
40
  user_input_column="user_input",
41
+ judge_llm=None,
42
+ judge_embeddings=None,
41
43
  ):
42
44
  """
43
45
  Assesses the sensitivity of a Large Language Model (LLM) to noise in retrieved context by measuring how often it
@@ -149,7 +151,7 @@ def NoiseSensitivity(
149
151
  result_df = evaluate(
150
152
  Dataset.from_pandas(df),
151
153
  metrics=[noise_sensitivity(focus=focus)],
152
- **get_ragas_config(),
154
+ **get_ragas_config(judge_llm, judge_embeddings),
153
155
  ).to_pandas()
154
156
 
155
157
  score_column = f"noise_sensitivity_{focus}"
@@ -34,6 +34,8 @@ def ResponseRelevancy(
34
34
  user_input_column="user_input",
35
35
  retrieved_contexts_column=None,
36
36
  response_column="response",
37
+ judge_llm=None,
38
+ judge_embeddings=None,
37
39
  ):
38
40
  """
39
41
  Assesses how pertinent the generated answer is to the given prompt.
@@ -44,8 +46,8 @@ def ResponseRelevancy(
44
46
  relevancy. This metric is computed using the `user_input`, the `retrieved_contexts`
45
47
  and the `response`.
46
48
 
47
- The Response Relevancy is defined as the mean cosine similartiy of the original
48
- `user_input` to a number of artifical questions, which are generated (reverse-engineered)
49
+ The Response Relevancy is defined as the mean cosine similarity of the original
50
+ `user_input` to a number of artificial questions, which are generated (reverse-engineered)
49
51
  based on the `response`:
50
52
 
51
53
  $$
@@ -62,7 +64,7 @@ def ResponseRelevancy(
62
64
 
63
65
  **Note**: *This is a reference-free metric, meaning that it does not require a
64
66
  `ground_truth` answer to compare against. A similar metric that does evaluate the
65
- correctness of a generated answser with respect to a `ground_truth` answer is
67
+ correctness of a generated answers with respect to a `ground_truth` answer is
66
68
  `validmind.model_validation.ragas.AnswerCorrectness`.*
67
69
 
68
70
  ### Configuring Columns
@@ -128,7 +130,7 @@ def ResponseRelevancy(
128
130
  result_df = evaluate(
129
131
  Dataset.from_pandas(df),
130
132
  metrics=metrics,
131
- **get_ragas_config(),
133
+ **get_ragas_config(judge_llm, judge_embeddings),
132
134
  ).to_pandas()
133
135
 
134
136
  score_column = "answer_relevancy"
@@ -33,6 +33,8 @@ def SemanticSimilarity(
33
33
  dataset,
34
34
  response_column="response",
35
35
  reference_column="reference",
36
+ judge_llm=None,
37
+ judge_embeddings=None,
36
38
  ):
37
39
  """
38
40
  Calculates the semantic similarity between generated responses and ground truths
@@ -107,7 +109,9 @@ def SemanticSimilarity(
107
109
  df = get_renamed_columns(dataset._df, required_columns)
108
110
 
109
111
  result_df = evaluate(
110
- Dataset.from_pandas(df), metrics=[semantic_similarity()], **get_ragas_config()
112
+ Dataset.from_pandas(df),
113
+ metrics=[semantic_similarity()],
114
+ **get_ragas_config(judge_llm, judge_embeddings)
111
115
  ).to_pandas()
112
116
 
113
117
  score_column = "semantic_similarity"