validmind 2.8.28__py3-none-any.whl → 2.9.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (233) hide show
  1. validmind/__version__.py +1 -1
  2. validmind/ai/utils.py +4 -24
  3. validmind/api_client.py +6 -17
  4. validmind/logging.py +48 -0
  5. validmind/models/function.py +11 -3
  6. validmind/tests/__init__.py +2 -0
  7. validmind/tests/__types__.py +18 -0
  8. validmind/tests/data_validation/ACFandPACFPlot.py +3 -1
  9. validmind/tests/data_validation/ADF.py +3 -1
  10. validmind/tests/data_validation/AutoAR.py +3 -1
  11. validmind/tests/data_validation/AutoMA.py +5 -1
  12. validmind/tests/data_validation/AutoStationarity.py +5 -1
  13. validmind/tests/data_validation/BivariateScatterPlots.py +3 -1
  14. validmind/tests/data_validation/BoxPierce.py +4 -1
  15. validmind/tests/data_validation/ChiSquaredFeaturesTable.py +1 -1
  16. validmind/tests/data_validation/ClassImbalance.py +1 -1
  17. validmind/tests/data_validation/DatasetDescription.py +4 -1
  18. validmind/tests/data_validation/DatasetSplit.py +3 -2
  19. validmind/tests/data_validation/DescriptiveStatistics.py +3 -1
  20. validmind/tests/data_validation/DickeyFullerGLS.py +3 -1
  21. validmind/tests/data_validation/Duplicates.py +3 -1
  22. validmind/tests/data_validation/EngleGrangerCoint.py +6 -1
  23. validmind/tests/data_validation/FeatureTargetCorrelationPlot.py +3 -1
  24. validmind/tests/data_validation/HighCardinality.py +3 -1
  25. validmind/tests/data_validation/HighPearsonCorrelation.py +4 -1
  26. validmind/tests/data_validation/IQROutliersBarPlot.py +4 -1
  27. validmind/tests/data_validation/IQROutliersTable.py +6 -1
  28. validmind/tests/data_validation/IsolationForestOutliers.py +3 -1
  29. validmind/tests/data_validation/JarqueBera.py +3 -1
  30. validmind/tests/data_validation/KPSS.py +3 -1
  31. validmind/tests/data_validation/LJungBox.py +3 -1
  32. validmind/tests/data_validation/LaggedCorrelationHeatmap.py +6 -1
  33. validmind/tests/data_validation/MissingValues.py +5 -1
  34. validmind/tests/data_validation/MissingValuesBarPlot.py +3 -1
  35. validmind/tests/data_validation/MutualInformation.py +4 -1
  36. validmind/tests/data_validation/PearsonCorrelationMatrix.py +3 -1
  37. validmind/tests/data_validation/PhillipsPerronArch.py +3 -1
  38. validmind/tests/data_validation/ProtectedClassesCombination.py +5 -1
  39. validmind/tests/data_validation/ProtectedClassesDescription.py +5 -1
  40. validmind/tests/data_validation/ProtectedClassesDisparity.py +5 -3
  41. validmind/tests/data_validation/ProtectedClassesThresholdOptimizer.py +9 -2
  42. validmind/tests/data_validation/RollingStatsPlot.py +5 -1
  43. validmind/tests/data_validation/RunsTest.py +1 -1
  44. validmind/tests/data_validation/ScatterPlot.py +2 -1
  45. validmind/tests/data_validation/ScoreBandDefaultRates.py +3 -1
  46. validmind/tests/data_validation/SeasonalDecompose.py +6 -1
  47. validmind/tests/data_validation/ShapiroWilk.py +4 -1
  48. validmind/tests/data_validation/Skewness.py +3 -1
  49. validmind/tests/data_validation/SpreadPlot.py +3 -1
  50. validmind/tests/data_validation/TabularCategoricalBarPlots.py +4 -1
  51. validmind/tests/data_validation/TabularDateTimeHistograms.py +3 -1
  52. validmind/tests/data_validation/TabularDescriptionTables.py +4 -1
  53. validmind/tests/data_validation/TabularNumericalHistograms.py +3 -1
  54. validmind/tests/data_validation/TargetRateBarPlots.py +4 -1
  55. validmind/tests/data_validation/TimeSeriesDescription.py +1 -1
  56. validmind/tests/data_validation/TimeSeriesDescriptiveStatistics.py +1 -1
  57. validmind/tests/data_validation/TimeSeriesFrequency.py +5 -1
  58. validmind/tests/data_validation/TimeSeriesHistogram.py +4 -1
  59. validmind/tests/data_validation/TimeSeriesLinePlot.py +3 -1
  60. validmind/tests/data_validation/TimeSeriesMissingValues.py +6 -1
  61. validmind/tests/data_validation/TimeSeriesOutliers.py +5 -1
  62. validmind/tests/data_validation/TooManyZeroValues.py +6 -1
  63. validmind/tests/data_validation/UniqueRows.py +5 -1
  64. validmind/tests/data_validation/WOEBinPlots.py +4 -1
  65. validmind/tests/data_validation/WOEBinTable.py +5 -1
  66. validmind/tests/data_validation/ZivotAndrewsArch.py +3 -1
  67. validmind/tests/data_validation/nlp/CommonWords.py +2 -1
  68. validmind/tests/data_validation/nlp/Hashtags.py +2 -1
  69. validmind/tests/data_validation/nlp/LanguageDetection.py +4 -1
  70. validmind/tests/data_validation/nlp/Mentions.py +3 -1
  71. validmind/tests/data_validation/nlp/PolarityAndSubjectivity.py +6 -1
  72. validmind/tests/data_validation/nlp/Punctuations.py +2 -1
  73. validmind/tests/data_validation/nlp/Sentiment.py +3 -1
  74. validmind/tests/data_validation/nlp/StopWords.py +2 -1
  75. validmind/tests/data_validation/nlp/TextDescription.py +3 -1
  76. validmind/tests/data_validation/nlp/Toxicity.py +3 -1
  77. validmind/tests/load.py +91 -17
  78. validmind/tests/model_validation/BertScore.py +6 -3
  79. validmind/tests/model_validation/BleuScore.py +6 -1
  80. validmind/tests/model_validation/ClusterSizeDistribution.py +5 -1
  81. validmind/tests/model_validation/ContextualRecall.py +6 -1
  82. validmind/tests/model_validation/FeaturesAUC.py +5 -1
  83. validmind/tests/model_validation/MeteorScore.py +6 -1
  84. validmind/tests/model_validation/ModelMetadata.py +2 -1
  85. validmind/tests/model_validation/ModelPredictionResiduals.py +10 -2
  86. validmind/tests/model_validation/RegardScore.py +7 -1
  87. validmind/tests/model_validation/RegressionResidualsPlot.py +5 -1
  88. validmind/tests/model_validation/RougeScore.py +8 -1
  89. validmind/tests/model_validation/TimeSeriesPredictionWithCI.py +8 -1
  90. validmind/tests/model_validation/TimeSeriesPredictionsPlot.py +7 -1
  91. validmind/tests/model_validation/TimeSeriesR2SquareBySegments.py +6 -1
  92. validmind/tests/model_validation/TokenDisparity.py +6 -1
  93. validmind/tests/model_validation/ToxicityScore.py +6 -1
  94. validmind/tests/model_validation/embeddings/ClusterDistribution.py +6 -1
  95. validmind/tests/model_validation/embeddings/CosineSimilarityComparison.py +6 -1
  96. validmind/tests/model_validation/embeddings/CosineSimilarityDistribution.py +6 -1
  97. validmind/tests/model_validation/embeddings/CosineSimilarityHeatmap.py +7 -3
  98. validmind/tests/model_validation/embeddings/DescriptiveAnalytics.py +6 -1
  99. validmind/tests/model_validation/embeddings/EmbeddingsVisualization2D.py +4 -3
  100. validmind/tests/model_validation/embeddings/EuclideanDistanceComparison.py +6 -1
  101. validmind/tests/model_validation/embeddings/EuclideanDistanceHeatmap.py +7 -3
  102. validmind/tests/model_validation/embeddings/PCAComponentsPairwisePlots.py +6 -1
  103. validmind/tests/model_validation/embeddings/StabilityAnalysisKeyword.py +5 -2
  104. validmind/tests/model_validation/embeddings/StabilityAnalysisRandomNoise.py +5 -1
  105. validmind/tests/model_validation/embeddings/StabilityAnalysisSynonyms.py +4 -1
  106. validmind/tests/model_validation/embeddings/StabilityAnalysisTranslation.py +5 -1
  107. validmind/tests/model_validation/embeddings/TSNEComponentsPairwisePlots.py +9 -6
  108. validmind/tests/model_validation/ragas/AnswerCorrectness.py +8 -5
  109. validmind/tests/model_validation/ragas/AspectCritic.py +11 -8
  110. validmind/tests/model_validation/ragas/ContextEntityRecall.py +5 -2
  111. validmind/tests/model_validation/ragas/ContextPrecision.py +5 -2
  112. validmind/tests/model_validation/ragas/ContextPrecisionWithoutReference.py +5 -2
  113. validmind/tests/model_validation/ragas/ContextRecall.py +6 -2
  114. validmind/tests/model_validation/ragas/Faithfulness.py +9 -5
  115. validmind/tests/model_validation/ragas/NoiseSensitivity.py +10 -7
  116. validmind/tests/model_validation/ragas/ResponseRelevancy.py +9 -6
  117. validmind/tests/model_validation/ragas/SemanticSimilarity.py +7 -4
  118. validmind/tests/model_validation/sklearn/AdjustedMutualInformation.py +5 -1
  119. validmind/tests/model_validation/sklearn/AdjustedRandIndex.py +5 -1
  120. validmind/tests/model_validation/sklearn/CalibrationCurve.py +5 -1
  121. validmind/tests/model_validation/sklearn/ClassifierPerformance.py +5 -1
  122. validmind/tests/model_validation/sklearn/ClusterCosineSimilarity.py +5 -1
  123. validmind/tests/model_validation/sklearn/ClusterPerformanceMetrics.py +5 -1
  124. validmind/tests/model_validation/sklearn/CompletenessScore.py +5 -1
  125. validmind/tests/model_validation/sklearn/ConfusionMatrix.py +4 -1
  126. validmind/tests/model_validation/sklearn/FeatureImportance.py +5 -1
  127. validmind/tests/model_validation/sklearn/FowlkesMallowsScore.py +5 -1
  128. validmind/tests/model_validation/sklearn/HomogeneityScore.py +5 -1
  129. validmind/tests/model_validation/sklearn/HyperParametersTuning.py +2 -4
  130. validmind/tests/model_validation/sklearn/KMeansClustersOptimization.py +3 -3
  131. validmind/tests/model_validation/sklearn/MinimumAccuracy.py +5 -1
  132. validmind/tests/model_validation/sklearn/MinimumF1Score.py +5 -1
  133. validmind/tests/model_validation/sklearn/MinimumROCAUCScore.py +5 -1
  134. validmind/tests/model_validation/sklearn/ModelParameters.py +6 -1
  135. validmind/tests/model_validation/sklearn/ModelsPerformanceComparison.py +5 -1
  136. validmind/tests/model_validation/sklearn/OverfitDiagnosis.py +3 -2
  137. validmind/tests/model_validation/sklearn/PermutationFeatureImportance.py +4 -4
  138. validmind/tests/model_validation/sklearn/PopulationStabilityIndex.py +2 -2
  139. validmind/tests/model_validation/sklearn/PrecisionRecallCurve.py +5 -1
  140. validmind/tests/model_validation/sklearn/ROCCurve.py +3 -1
  141. validmind/tests/model_validation/sklearn/RegressionErrors.py +6 -1
  142. validmind/tests/model_validation/sklearn/RegressionErrorsComparison.py +6 -1
  143. validmind/tests/model_validation/sklearn/RegressionPerformance.py +5 -1
  144. validmind/tests/model_validation/sklearn/RegressionR2Square.py +6 -1
  145. validmind/tests/model_validation/sklearn/RegressionR2SquareComparison.py +6 -1
  146. validmind/tests/model_validation/sklearn/RobustnessDiagnosis.py +2 -2
  147. validmind/tests/model_validation/sklearn/ScoreProbabilityAlignment.py +3 -1
  148. validmind/tests/model_validation/sklearn/SilhouettePlot.py +6 -1
  149. validmind/tests/model_validation/sklearn/TrainingTestDegradation.py +2 -2
  150. validmind/tests/model_validation/sklearn/VMeasure.py +5 -1
  151. validmind/tests/model_validation/sklearn/WeakspotsDiagnosis.py +6 -5
  152. validmind/tests/model_validation/statsmodels/AutoARIMA.py +3 -1
  153. validmind/tests/model_validation/statsmodels/CumulativePredictionProbabilities.py +6 -1
  154. validmind/tests/model_validation/statsmodels/DurbinWatsonTest.py +6 -1
  155. validmind/tests/model_validation/statsmodels/GINITable.py +4 -1
  156. validmind/tests/model_validation/statsmodels/KolmogorovSmirnov.py +5 -1
  157. validmind/tests/model_validation/statsmodels/Lilliefors.py +3 -1
  158. validmind/tests/model_validation/statsmodels/PredictionProbabilitiesHistogram.py +6 -2
  159. validmind/tests/model_validation/statsmodels/RegressionCoeffs.py +4 -1
  160. validmind/tests/model_validation/statsmodels/RegressionFeatureSignificance.py +7 -2
  161. validmind/tests/model_validation/statsmodels/RegressionModelForecastPlot.py +5 -4
  162. validmind/tests/model_validation/statsmodels/RegressionModelForecastPlotLevels.py +4 -1
  163. validmind/tests/model_validation/statsmodels/RegressionModelSensitivityPlot.py +3 -2
  164. validmind/tests/model_validation/statsmodels/RegressionModelSummary.py +5 -1
  165. validmind/tests/model_validation/statsmodels/RegressionPermutationFeatureImportance.py +3 -1
  166. validmind/tests/model_validation/statsmodels/ScorecardHistogram.py +6 -1
  167. validmind/tests/ongoing_monitoring/CalibrationCurveDrift.py +2 -2
  168. validmind/tests/ongoing_monitoring/ClassDiscriminationDrift.py +2 -2
  169. validmind/tests/ongoing_monitoring/ClassImbalanceDrift.py +2 -2
  170. validmind/tests/ongoing_monitoring/ClassificationAccuracyDrift.py +2 -2
  171. validmind/tests/ongoing_monitoring/ConfusionMatrixDrift.py +2 -2
  172. validmind/tests/ongoing_monitoring/CumulativePredictionProbabilitiesDrift.py +2 -2
  173. validmind/tests/ongoing_monitoring/FeatureDrift.py +5 -2
  174. validmind/tests/ongoing_monitoring/PredictionAcrossEachFeature.py +6 -1
  175. validmind/tests/ongoing_monitoring/PredictionCorrelation.py +8 -1
  176. validmind/tests/ongoing_monitoring/PredictionProbabilitiesHistogramDrift.py +2 -2
  177. validmind/tests/ongoing_monitoring/PredictionQuantilesAcrossFeatures.py +6 -1
  178. validmind/tests/ongoing_monitoring/ROCCurveDrift.py +4 -2
  179. validmind/tests/ongoing_monitoring/ScoreBandsDrift.py +2 -2
  180. validmind/tests/ongoing_monitoring/ScorecardHistogramDrift.py +2 -2
  181. validmind/tests/ongoing_monitoring/TargetPredictionDistributionPlot.py +8 -1
  182. validmind/tests/output.py +9 -2
  183. validmind/tests/plots/BoxPlot.py +260 -0
  184. validmind/tests/plots/CorrelationHeatmap.py +235 -0
  185. validmind/tests/plots/HistogramPlot.py +233 -0
  186. validmind/tests/plots/ViolinPlot.py +125 -0
  187. validmind/tests/plots/__init__.py +0 -0
  188. validmind/tests/prompt_validation/Bias.py +5 -1
  189. validmind/tests/prompt_validation/Clarity.py +5 -1
  190. validmind/tests/prompt_validation/Conciseness.py +5 -1
  191. validmind/tests/prompt_validation/Delimitation.py +5 -1
  192. validmind/tests/prompt_validation/NegativeInstruction.py +5 -1
  193. validmind/tests/prompt_validation/Robustness.py +5 -1
  194. validmind/tests/prompt_validation/Specificity.py +5 -1
  195. validmind/tests/stats/CorrelationAnalysis.py +251 -0
  196. validmind/tests/stats/DescriptiveStats.py +197 -0
  197. validmind/tests/stats/NormalityTests.py +147 -0
  198. validmind/tests/stats/OutlierDetection.py +173 -0
  199. validmind/tests/stats/__init__.py +0 -0
  200. validmind/unit_metrics/classification/Accuracy.py +2 -1
  201. validmind/unit_metrics/classification/F1.py +2 -1
  202. validmind/unit_metrics/classification/Precision.py +2 -1
  203. validmind/unit_metrics/classification/ROC_AUC.py +2 -1
  204. validmind/unit_metrics/classification/Recall.py +2 -1
  205. validmind/unit_metrics/classification/individual/AbsoluteError.py +42 -0
  206. validmind/unit_metrics/classification/individual/BrierScore.py +56 -0
  207. validmind/unit_metrics/classification/individual/CalibrationError.py +77 -0
  208. validmind/unit_metrics/classification/individual/ClassBalance.py +65 -0
  209. validmind/unit_metrics/classification/individual/Confidence.py +52 -0
  210. validmind/unit_metrics/classification/individual/Correctness.py +41 -0
  211. validmind/unit_metrics/classification/individual/LogLoss.py +61 -0
  212. validmind/unit_metrics/classification/individual/OutlierScore.py +86 -0
  213. validmind/unit_metrics/classification/individual/ProbabilityError.py +54 -0
  214. validmind/unit_metrics/classification/individual/Uncertainty.py +60 -0
  215. validmind/unit_metrics/classification/individual/__init__.py +0 -0
  216. validmind/unit_metrics/regression/AdjustedRSquaredScore.py +2 -1
  217. validmind/unit_metrics/regression/GiniCoefficient.py +2 -1
  218. validmind/unit_metrics/regression/HuberLoss.py +2 -1
  219. validmind/unit_metrics/regression/KolmogorovSmirnovStatistic.py +2 -1
  220. validmind/unit_metrics/regression/MeanAbsoluteError.py +2 -1
  221. validmind/unit_metrics/regression/MeanAbsolutePercentageError.py +2 -1
  222. validmind/unit_metrics/regression/MeanBiasDeviation.py +2 -1
  223. validmind/unit_metrics/regression/MeanSquaredError.py +2 -1
  224. validmind/unit_metrics/regression/QuantileLoss.py +1 -1
  225. validmind/unit_metrics/regression/RSquaredScore.py +2 -1
  226. validmind/unit_metrics/regression/RootMeanSquaredError.py +2 -1
  227. validmind/vm_models/dataset/dataset.py +291 -38
  228. validmind/vm_models/result/result.py +26 -4
  229. {validmind-2.8.28.dist-info → validmind-2.9.1.dist-info}/METADATA +2 -2
  230. {validmind-2.8.28.dist-info → validmind-2.9.1.dist-info}/RECORD +233 -212
  231. {validmind-2.8.28.dist-info → validmind-2.9.1.dist-info}/LICENSE +0 -0
  232. {validmind-2.8.28.dist-info → validmind-2.9.1.dist-info}/WHEEL +0 -0
  233. {validmind-2.8.28.dist-info → validmind-2.9.1.dist-info}/entry_points.txt +0 -0
@@ -3,12 +3,15 @@
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
5
  import warnings
6
+ from typing import Dict, Tuple
6
7
 
7
8
  import plotly.express as px
9
+ import plotly.graph_objects as go
8
10
  from datasets import Dataset
9
11
 
10
12
  from validmind import RawData, tags, tasks
11
13
  from validmind.errors import MissingDependencyError
14
+ from validmind.vm_models import VMDataset
12
15
 
13
16
  from .utils import get_ragas_config, get_renamed_columns
14
17
 
@@ -30,13 +33,13 @@ except ImportError as e:
30
33
  @tags("ragas", "llm", "retrieval_performance")
31
34
  @tasks("text_qa", "text_generation", "text_summarization", "text_classification")
32
35
  def ContextRecall(
33
- dataset,
36
+ dataset: VMDataset,
34
37
  user_input_column: str = "user_input",
35
38
  retrieved_contexts_column: str = "retrieved_contexts",
36
39
  reference_column: str = "reference",
37
40
  judge_llm=None,
38
41
  judge_embeddings=None,
39
- ):
42
+ ) -> Tuple[Dict[str, list], go.Figure, go.Figure, RawData]:
40
43
  """
41
44
  Context recall measures the extent to which the retrieved context aligns with the
42
45
  annotated answer, treated as the ground truth. It is computed based on the `ground
@@ -109,6 +112,7 @@ def ContextRecall(
109
112
  }
110
113
 
111
114
  df = get_renamed_columns(dataset._df, required_columns)
115
+ df = df[required_columns.keys()]
112
116
 
113
117
  result_df = evaluate(
114
118
  Dataset.from_pandas(df),
@@ -3,12 +3,15 @@
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
5
  import warnings
6
+ from typing import Dict, Tuple
6
7
 
7
8
  import plotly.express as px
9
+ import plotly.graph_objects as go
8
10
  from datasets import Dataset
9
11
 
10
12
  from validmind import RawData, tags, tasks
11
13
  from validmind.errors import MissingDependencyError
14
+ from validmind.vm_models import VMDataset
12
15
 
13
16
  from .utils import get_ragas_config, get_renamed_columns
14
17
 
@@ -30,13 +33,13 @@ except ImportError as e:
30
33
  @tags("ragas", "llm", "rag_performance")
31
34
  @tasks("text_qa", "text_generation", "text_summarization")
32
35
  def Faithfulness(
33
- dataset,
34
- user_input_column="user_input",
35
- response_column="response",
36
- retrieved_contexts_column="retrieved_contexts",
36
+ dataset: VMDataset,
37
+ user_input_column: str = "user_input",
38
+ response_column: str = "response",
39
+ retrieved_contexts_column: str = "retrieved_contexts",
37
40
  judge_llm=None,
38
41
  judge_embeddings=None,
39
- ): # noqa
42
+ ) -> Tuple[Dict[str, list], go.Figure, go.Figure, RawData]:
40
43
  """
41
44
  Evaluates the faithfulness of the generated answers with respect to retrieved contexts.
42
45
 
@@ -115,6 +118,7 @@ def Faithfulness(
115
118
 
116
119
  df = get_renamed_columns(dataset._df, required_columns)
117
120
 
121
+ df = df[required_columns.keys()]
118
122
  result_df = evaluate(
119
123
  Dataset.from_pandas(df),
120
124
  metrics=[faithfulness()],
@@ -3,12 +3,15 @@
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
5
  import warnings
6
+ from typing import Dict, Tuple
6
7
 
7
8
  import plotly.express as px
9
+ import plotly.graph_objects as go
8
10
  from datasets import Dataset
9
11
 
10
12
  from validmind import RawData, tags, tasks
11
13
  from validmind.errors import MissingDependencyError
14
+ from validmind.vm_models import VMDataset
12
15
 
13
16
  from .utils import get_ragas_config, get_renamed_columns
14
17
 
@@ -32,15 +35,15 @@ VALID_FOCUS_VALUES = ["relevant", "irrelevant"]
32
35
  @tags("ragas", "llm", "rag_performance")
33
36
  @tasks("text_qa", "text_generation", "text_summarization")
34
37
  def NoiseSensitivity(
35
- dataset,
36
- response_column="response",
37
- retrieved_contexts_column="retrieved_contexts",
38
- reference_column="reference",
39
- focus="relevant",
40
- user_input_column="user_input",
38
+ dataset: VMDataset,
39
+ response_column: str = "response",
40
+ retrieved_contexts_column: str = "retrieved_contexts",
41
+ reference_column: str = "reference",
42
+ focus: str = "relevant",
43
+ user_input_column: str = "user_input",
41
44
  judge_llm=None,
42
45
  judge_embeddings=None,
43
- ):
46
+ ) -> Tuple[Dict[str, list], go.Figure, go.Figure, RawData]:
44
47
  """
45
48
  Assesses the sensitivity of a Large Language Model (LLM) to noise in retrieved context by measuring how often it
46
49
  generates incorrect responses.
@@ -3,12 +3,15 @@
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
5
  import warnings
6
+ from typing import Dict, Tuple
6
7
 
7
8
  import plotly.express as px
9
+ import plotly.graph_objects as go
8
10
  from datasets import Dataset
9
11
 
10
12
  from validmind import RawData, tags, tasks
11
13
  from validmind.errors import MissingDependencyError
14
+ from validmind.vm_models import VMDataset
12
15
 
13
16
  from .utils import get_ragas_config, get_renamed_columns
14
17
 
@@ -30,13 +33,13 @@ except ImportError as e:
30
33
  @tags("ragas", "llm", "rag_performance")
31
34
  @tasks("text_qa", "text_generation", "text_summarization")
32
35
  def ResponseRelevancy(
33
- dataset,
34
- user_input_column="user_input",
35
- retrieved_contexts_column=None,
36
- response_column="response",
36
+ dataset: VMDataset,
37
+ user_input_column: str = "user_input",
38
+ retrieved_contexts_column: str = None,
39
+ response_column: str = "response",
37
40
  judge_llm=None,
38
41
  judge_embeddings=None,
39
- ):
42
+ ) -> Tuple[Dict[str, list], go.Figure, go.Figure, RawData]:
40
43
  """
41
44
  Assesses how pertinent the generated answer is to the given prompt.
42
45
 
@@ -124,6 +127,7 @@ def ResponseRelevancy(
124
127
  required_columns["retrieved_contexts"] = retrieved_contexts_column
125
128
 
126
129
  df = get_renamed_columns(dataset._df, required_columns)
130
+ df = df[required_columns.keys()]
127
131
 
128
132
  metrics = [response_relevancy()]
129
133
 
@@ -134,7 +138,6 @@ def ResponseRelevancy(
134
138
  ).to_pandas()
135
139
 
136
140
  score_column = "answer_relevancy"
137
-
138
141
  fig_histogram = px.histogram(
139
142
  x=result_df[score_column].to_list(), nbins=10, title="Response Relevancy"
140
143
  )
@@ -3,12 +3,15 @@
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
5
  import warnings
6
+ from typing import Dict, Tuple
6
7
 
7
8
  import plotly.express as px
9
+ import plotly.graph_objects as go
8
10
  from datasets import Dataset
9
11
 
10
12
  from validmind import RawData, tags, tasks
11
13
  from validmind.errors import MissingDependencyError
14
+ from validmind.vm_models import VMDataset
12
15
 
13
16
  from .utils import get_ragas_config, get_renamed_columns
14
17
 
@@ -30,12 +33,12 @@ except ImportError as e:
30
33
  @tags("ragas", "llm")
31
34
  @tasks("text_qa", "text_generation", "text_summarization")
32
35
  def SemanticSimilarity(
33
- dataset,
34
- response_column="response",
35
- reference_column="reference",
36
+ dataset: VMDataset,
37
+ response_column: str = "response",
38
+ reference_column: str = "reference",
36
39
  judge_llm=None,
37
40
  judge_embeddings=None,
38
- ):
41
+ ) -> Tuple[Dict[str, list], go.Figure, go.Figure, RawData]:
39
42
  """
40
43
  Calculates the semantic similarity between generated responses and ground truths
41
44
 
@@ -2,6 +2,8 @@
2
2
  # See the LICENSE file in the root of this repository for details.
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
+ from typing import Dict, List, Tuple
6
+
5
7
  from sklearn.metrics import adjusted_mutual_info_score
6
8
 
7
9
  from validmind import RawData, tags, tasks
@@ -10,7 +12,9 @@ from validmind.vm_models import VMDataset, VMModel
10
12
 
11
13
  @tags("sklearn", "model_performance", "clustering")
12
14
  @tasks("clustering")
13
- def AdjustedMutualInformation(model: VMModel, dataset: VMDataset):
15
+ def AdjustedMutualInformation(
16
+ model: VMModel, dataset: VMDataset
17
+ ) -> Tuple[List[Dict[str, float]], RawData]:
14
18
  """
15
19
  Evaluates clustering model performance by measuring mutual information between true and predicted labels, adjusting
16
20
  for chance.
@@ -2,6 +2,8 @@
2
2
  # See the LICENSE file in the root of this repository for details.
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
+ from typing import Dict, List, Tuple
6
+
5
7
  from sklearn.metrics import adjusted_rand_score
6
8
 
7
9
  from validmind import RawData, tags, tasks
@@ -10,7 +12,9 @@ from validmind.vm_models import VMDataset, VMModel
10
12
 
11
13
  @tags("sklearn", "model_performance", "clustering")
12
14
  @tasks("clustering")
13
- def AdjustedRandIndex(model: VMModel, dataset: VMDataset):
15
+ def AdjustedRandIndex(
16
+ model: VMModel, dataset: VMDataset
17
+ ) -> Tuple[List[Dict[str, float]], RawData]:
14
18
  """
15
19
  Measures the similarity between two data clusters using the Adjusted Rand Index (ARI) metric in clustering machine
16
20
  learning models.
@@ -2,6 +2,8 @@
2
2
  # See the LICENSE file in the root of this repository for details.
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
+ from typing import Tuple
6
+
5
7
  import plotly.graph_objects as go
6
8
  from sklearn.calibration import calibration_curve
7
9
 
@@ -12,7 +14,9 @@ from validmind.vm_models.result import RawData
12
14
 
13
15
  @tags("sklearn", "model_performance", "classification")
14
16
  @tasks("classification")
15
- def CalibrationCurve(model: VMModel, dataset: VMDataset, n_bins: int = 10):
17
+ def CalibrationCurve(
18
+ model: VMModel, dataset: VMDataset, n_bins: int = 10
19
+ ) -> Tuple[go.Figure, RawData]:
16
20
  """
17
21
  Evaluates the calibration of probability estimates by comparing predicted probabilities against observed
18
22
  frequencies.
@@ -2,6 +2,8 @@
2
2
  # See the LICENSE file in the root of this repository for details.
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
+ from typing import Dict, List
6
+
5
7
  import numpy as np
6
8
  from sklearn.metrics import classification_report, roc_auc_score
7
9
  from sklearn.preprocessing import LabelBinarizer
@@ -20,7 +22,9 @@ def multiclass_roc_auc_score(y_test, y_pred, average="macro"):
20
22
  "sklearn", "binary_classification", "multiclass_classification", "model_performance"
21
23
  )
22
24
  @tasks("classification", "text_classification")
23
- def ClassifierPerformance(dataset: VMDataset, model: VMModel, average: str = "macro"):
25
+ def ClassifierPerformance(
26
+ dataset: VMDataset, model: VMModel, average: str = "macro"
27
+ ) -> Dict[str, List[Dict[str, float]]]:
24
28
  """
25
29
  Evaluates performance of binary or multiclass classification models using precision, recall, F1-Score, accuracy,
26
30
  and ROC AUC scores.
@@ -2,6 +2,8 @@
2
2
  # See the LICENSE file in the root of this repository for details.
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
+ from typing import Dict, List, Tuple
6
+
5
7
  import numpy as np
6
8
  from sklearn.metrics.pairwise import cosine_similarity
7
9
 
@@ -12,7 +14,9 @@ from validmind.vm_models import VMDataset, VMModel
12
14
 
13
15
  @tags("sklearn", "model_performance", "clustering")
14
16
  @tasks("clustering")
15
- def ClusterCosineSimilarity(model: VMModel, dataset: VMDataset):
17
+ def ClusterCosineSimilarity(
18
+ model: VMModel, dataset: VMDataset
19
+ ) -> Tuple[List[Dict[str, float]], RawData]:
16
20
  """
17
21
  Measures the intra-cluster similarity of a clustering model using cosine similarity.
18
22
 
@@ -2,6 +2,8 @@
2
2
  # See the LICENSE file in the root of this repository for details.
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
+ from typing import Dict, List, Tuple
6
+
5
7
  from sklearn.metrics import (
6
8
  adjusted_mutual_info_score,
7
9
  adjusted_rand_score,
@@ -69,7 +71,9 @@ identify members of the same class (precision) and the ability to capture all me
69
71
 
70
72
  @tags("sklearn", "model_performance", "clustering")
71
73
  @tasks("clustering")
72
- def ClusterPerformanceMetrics(model: VMModel, dataset: VMDataset):
74
+ def ClusterPerformanceMetrics(
75
+ model: VMModel, dataset: VMDataset
76
+ ) -> Tuple[List[Dict[str, float]], RawData]:
73
77
  """
74
78
  Evaluates the performance of clustering machine learning models using multiple established metrics.
75
79
 
@@ -2,6 +2,8 @@
2
2
  # See the LICENSE file in the root of this repository for details.
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
+ from typing import Dict, List, Tuple
6
+
5
7
  from sklearn.metrics import completeness_score
6
8
 
7
9
  from validmind import RawData, tags, tasks
@@ -10,7 +12,9 @@ from validmind.vm_models import VMDataset, VMModel
10
12
 
11
13
  @tags("sklearn", "model_performance", "clustering")
12
14
  @tasks("clustering")
13
- def CompletenessScore(model: VMModel, dataset: VMDataset):
15
+ def CompletenessScore(
16
+ model: VMModel, dataset: VMDataset
17
+ ) -> Tuple[List[Dict[str, float]], RawData]:
14
18
  """
15
19
  Evaluates a clustering model's capacity to categorize instances from a single class into the same cluster.
16
20
 
@@ -3,8 +3,11 @@
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
5
 
6
+ from typing import Tuple
7
+
6
8
  import numpy as np
7
9
  import plotly.figure_factory as ff
10
+ import plotly.graph_objects as go
8
11
  from sklearn.metrics import confusion_matrix
9
12
 
10
13
  from validmind import RawData, tags, tasks
@@ -23,7 +26,7 @@ def ConfusionMatrix(
23
26
  dataset: VMDataset,
24
27
  model: VMModel,
25
28
  threshold: float = 0.5,
26
- ):
29
+ ) -> Tuple[go.Figure, RawData]:
27
30
  """
28
31
  Evaluates and visually represents the classification ML model's predictive performance using a Confusion Matrix
29
32
  heatmap.
@@ -2,6 +2,8 @@
2
2
  # See the LICENSE file in the root of this repository for details.
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
+ from typing import Tuple
6
+
5
7
  import pandas as pd
6
8
  from sklearn.inspection import permutation_importance
7
9
 
@@ -11,7 +13,9 @@ from validmind.vm_models import VMDataset, VMModel
11
13
 
12
14
  @tags("model_explainability", "sklearn")
13
15
  @tasks("regression", "time_series_forecasting")
14
- def FeatureImportance(dataset: VMDataset, model: VMModel, num_features: int = 3):
16
+ def FeatureImportance(
17
+ dataset: VMDataset, model: VMModel, num_features: int = 3
18
+ ) -> Tuple[pd.DataFrame, RawData]:
15
19
  """
16
20
  Compute feature importance scores for a given model and generate a summary table
17
21
  with the top important features.
@@ -2,6 +2,8 @@
2
2
  # See the LICENSE file in the root of this repository for details.
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
+ from typing import Dict, List, Tuple
6
+
5
7
  from sklearn import metrics
6
8
 
7
9
  from validmind import RawData, tags, tasks
@@ -10,7 +12,9 @@ from validmind.vm_models import VMDataset, VMModel
10
12
 
11
13
  @tags("sklearn", "model_performance")
12
14
  @tasks("clustering")
13
- def FowlkesMallowsScore(dataset: VMDataset, model: VMModel):
15
+ def FowlkesMallowsScore(
16
+ dataset: VMDataset, model: VMModel
17
+ ) -> Tuple[List[Dict[str, float]], RawData]:
14
18
  """
15
19
  Evaluates the similarity between predicted and actual cluster assignments in a model using the Fowlkes-Mallows
16
20
  score.
@@ -2,6 +2,8 @@
2
2
  # See the LICENSE file in the root of this repository for details.
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
+ from typing import Dict, List, Tuple
6
+
5
7
  from sklearn import metrics
6
8
 
7
9
  from validmind import RawData, tags, tasks
@@ -10,7 +12,9 @@ from validmind.vm_models import VMDataset, VMModel
10
12
 
11
13
  @tags("sklearn", "model_performance")
12
14
  @tasks("clustering")
13
- def HomogeneityScore(dataset: VMDataset, model: VMModel):
15
+ def HomogeneityScore(
16
+ dataset: VMDataset, model: VMModel
17
+ ) -> Tuple[List[Dict[str, float]], RawData]:
14
18
  """
15
19
  Assesses clustering homogeneity by comparing true and predicted labels, scoring from 0 (heterogeneous) to 1
16
20
  (homogeneous).
@@ -2,7 +2,7 @@
2
2
  # See the LICENSE file in the root of this repository for details.
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
- from typing import Dict, List, Union
5
+ from typing import Dict, List, Tuple, Union
6
6
 
7
7
  from sklearn.metrics import make_scorer, recall_score
8
8
  from sklearn.model_selection import GridSearchCV
@@ -11,8 +11,6 @@ from validmind import RawData, tags, tasks
11
11
  from validmind.vm_models import VMDataset, VMModel
12
12
 
13
13
 
14
- @tags("sklearn", "model_performance")
15
- @tasks("classification", "clustering")
16
14
  def custom_recall(y_true, y_pred_proba, threshold=0.5):
17
15
  y_pred = (y_pred_proba >= threshold).astype(int)
18
16
  return recall_score(y_true, y_pred)
@@ -65,7 +63,7 @@ def HyperParametersTuning(
65
63
  scoring: Union[str, List, Dict] = None,
66
64
  thresholds: Union[float, List[float]] = None,
67
65
  fit_params: dict = None,
68
- ):
66
+ ) -> Tuple[List[Dict[str, float]], RawData]:
69
67
  """
70
68
  Performs exhaustive grid search over specified parameter ranges to find optimal model configurations
71
69
  across different metrics and decision thresholds.
@@ -2,7 +2,7 @@
2
2
  # See the LICENSE file in the root of this repository for details.
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
- from typing import List, Union
5
+ from typing import List, Optional, Tuple
6
6
 
7
7
  import numpy as np
8
8
  import plotly.graph_objects as go
@@ -19,8 +19,8 @@ from validmind.vm_models import VMDataset, VMModel
19
19
  @tags("sklearn", "model_performance", "kmeans")
20
20
  @tasks("clustering")
21
21
  def KMeansClustersOptimization(
22
- model: VMModel, dataset: VMDataset, n_clusters: Union[List[int], None] = None
23
- ):
22
+ model: VMModel, dataset: VMDataset, n_clusters: Optional[List[int]] = None
23
+ ) -> Tuple[go.Figure, RawData]:
24
24
  """
25
25
  Optimizes the number of clusters in K-means models using Elbow and Silhouette methods.
26
26
 
@@ -1,6 +1,8 @@
1
1
  # Copyright © 2023-2024 ValidMind Inc. All rights reserved.
2
2
  # See the LICENSE file in the root of this repository for details.
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
+ from typing import Dict, List, Tuple
5
+
4
6
  from sklearn.metrics import accuracy_score
5
7
 
6
8
  from validmind import RawData
@@ -12,7 +14,9 @@ from validmind.vm_models import VMDataset, VMModel
12
14
  "sklearn", "binary_classification", "multiclass_classification", "model_performance"
13
15
  )
14
16
  @tasks("classification", "text_classification")
15
- def MinimumAccuracy(dataset: VMDataset, model: VMModel, min_threshold: float = 0.7):
17
+ def MinimumAccuracy(
18
+ dataset: VMDataset, model: VMModel, min_threshold: float = 0.7
19
+ ) -> Tuple[List[Dict[str, float]], bool, RawData]:
16
20
  """
17
21
  Checks if the model's prediction accuracy meets or surpasses a specified threshold.
18
22
 
@@ -2,6 +2,8 @@
2
2
  # See the LICENSE file in the root of this repository for details.
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
+ from typing import Dict, List, Tuple
6
+
5
7
  import numpy as np
6
8
  from sklearn.metrics import f1_score
7
9
 
@@ -14,7 +16,9 @@ from validmind.vm_models import VMDataset, VMModel
14
16
  "sklearn", "binary_classification", "multiclass_classification", "model_performance"
15
17
  )
16
18
  @tasks("classification", "text_classification")
17
- def MinimumF1Score(dataset: VMDataset, model: VMModel, min_threshold: float = 0.5):
19
+ def MinimumF1Score(
20
+ dataset: VMDataset, model: VMModel, min_threshold: float = 0.5
21
+ ) -> Tuple[List[Dict[str, float]], bool, RawData]:
18
22
  """
19
23
  Assesses if the model's F1 score on the validation set meets a predefined minimum threshold, ensuring balanced
20
24
  performance between precision and recall.
@@ -2,6 +2,8 @@
2
2
  # See the LICENSE file in the root of this repository for details.
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
+ from typing import Dict, List, Tuple
6
+
5
7
  import numpy as np
6
8
  from sklearn.metrics import roc_auc_score
7
9
  from sklearn.preprocessing import LabelBinarizer
@@ -14,7 +16,9 @@ from validmind.vm_models import VMDataset, VMModel
14
16
  "sklearn", "binary_classification", "multiclass_classification", "model_performance"
15
17
  )
16
18
  @tasks("classification", "text_classification")
17
- def MinimumROCAUCScore(dataset: VMDataset, model: VMModel, min_threshold: float = 0.5):
19
+ def MinimumROCAUCScore(
20
+ dataset: VMDataset, model: VMModel, min_threshold: float = 0.5
21
+ ) -> Tuple[List[Dict[str, float]], bool, RawData]:
18
22
  """
19
23
  Validates model by checking if the ROC AUC score meets or surpasses a specified threshold.
20
24
 
@@ -2,14 +2,19 @@
2
2
  # See the LICENSE file in the root of this repository for details.
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
+ from typing import List, Optional
6
+
5
7
  import pandas as pd
6
8
 
7
9
  from validmind import tags, tasks
10
+ from validmind.vm_models import VMModel
8
11
 
9
12
 
10
13
  @tags("model_training", "metadata")
11
14
  @tasks("classification", "regression")
12
- def ModelParameters(model, model_params=None):
15
+ def ModelParameters(
16
+ model: VMModel, model_params: Optional[List[str]] = None
17
+ ) -> pd.DataFrame:
13
18
  """
14
19
  Extracts and displays model parameters in a structured format for transparency and reproducibility.
15
20
 
@@ -2,6 +2,8 @@
2
2
  # See the LICENSE file in the root of this repository for details.
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
+ from typing import Dict, List
6
+
5
7
  import numpy as np
6
8
  from sklearn.metrics import classification_report
7
9
 
@@ -19,7 +21,9 @@ from .ClassifierPerformance import multiclass_roc_auc_score
19
21
  "model_comparison",
20
22
  )
21
23
  @tasks("classification", "text_classification")
22
- def ModelsPerformanceComparison(dataset: VMDataset, models: list[VMModel]):
24
+ def ModelsPerformanceComparison(
25
+ dataset: VMDataset, models: list[VMModel]
26
+ ) -> Dict[str, List[Dict[str, float]]]:
23
27
  """
24
28
  Evaluates and compares the performance of multiple Machine Learning models using various metrics like accuracy,
25
29
  precision, recall, and F1 score.
@@ -2,11 +2,12 @@
2
2
  # See the LICENSE file in the root of this repository for details.
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
- from typing import List
5
+ from typing import Dict, List, Tuple
6
6
 
7
7
  import matplotlib.pyplot as plt
8
8
  import numpy as np
9
9
  import pandas as pd
10
+ import plotly.graph_objects as go
10
11
  import seaborn as sns
11
12
  from sklearn import metrics
12
13
 
@@ -178,7 +179,7 @@ def OverfitDiagnosis(
178
179
  datasets: List[VMDataset],
179
180
  metric: str = None,
180
181
  cut_off_threshold: float = DEFAULT_THRESHOLD,
181
- ):
182
+ ) -> Tuple[Dict[str, List[Dict[str, float]]], go.Figure, RawData]:
182
183
  """
183
184
  Assesses potential overfitting in a model's predictions, identifying regions where performance between training and
184
185
  testing sets deviates significantly.
@@ -2,7 +2,7 @@
2
2
  # See the LICENSE file in the root of this repository for details.
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
- from typing import Union
5
+ from typing import Optional, Tuple
6
6
 
7
7
  import plotly.graph_objects as go
8
8
  from sklearn.inspection import permutation_importance
@@ -26,9 +26,9 @@ logger = get_logger(__name__)
26
26
  def PermutationFeatureImportance(
27
27
  model: VMModel,
28
28
  dataset: VMDataset,
29
- fontsize: Union[int, None] = None,
30
- figure_height: Union[int, None] = None,
31
- ):
29
+ fontsize: Optional[int] = None,
30
+ figure_height: Optional[int] = None,
31
+ ) -> Tuple[go.Figure, RawData]:
32
32
  """
33
33
  Assesses the significance of each feature in a model by evaluating the impact on model performance when feature
34
34
  values are randomly rearranged.
@@ -2,7 +2,7 @@
2
2
  # See the LICENSE file in the root of this repository for details.
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
- from typing import List
5
+ from typing import Dict, List, Tuple
6
6
 
7
7
  import numpy as np
8
8
  import pandas as pd
@@ -82,7 +82,7 @@ def calculate_psi(score_initial, score_new, num_bins=10, mode="fixed"):
82
82
  @tasks("classification", "text_classification")
83
83
  def PopulationStabilityIndex(
84
84
  datasets: List[VMDataset], model: VMModel, num_bins: int = 10, mode: str = "fixed"
85
- ):
85
+ ) -> Tuple[Dict[str, List[Dict[str, float]]], go.Figure, RawData]:
86
86
  """
87
87
  Assesses the Population Stability Index (PSI) to quantify the stability of an ML model's predictions across
88
88
  different datasets.