validmind 2.8.27__py3-none-any.whl → 2.8.29__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (207) hide show
  1. validmind/__version__.py +1 -1
  2. validmind/ai/utils.py +1 -1
  3. validmind/models/function.py +11 -3
  4. validmind/tests/data_validation/ACFandPACFPlot.py +3 -1
  5. validmind/tests/data_validation/ADF.py +3 -1
  6. validmind/tests/data_validation/AutoAR.py +3 -1
  7. validmind/tests/data_validation/AutoMA.py +5 -1
  8. validmind/tests/data_validation/AutoStationarity.py +5 -1
  9. validmind/tests/data_validation/BivariateScatterPlots.py +3 -1
  10. validmind/tests/data_validation/BoxPierce.py +4 -1
  11. validmind/tests/data_validation/ChiSquaredFeaturesTable.py +1 -1
  12. validmind/tests/data_validation/ClassImbalance.py +1 -1
  13. validmind/tests/data_validation/DatasetDescription.py +4 -1
  14. validmind/tests/data_validation/DatasetSplit.py +3 -2
  15. validmind/tests/data_validation/DescriptiveStatistics.py +3 -1
  16. validmind/tests/data_validation/DickeyFullerGLS.py +3 -1
  17. validmind/tests/data_validation/Duplicates.py +3 -1
  18. validmind/tests/data_validation/EngleGrangerCoint.py +6 -1
  19. validmind/tests/data_validation/FeatureTargetCorrelationPlot.py +3 -1
  20. validmind/tests/data_validation/HighCardinality.py +3 -1
  21. validmind/tests/data_validation/HighPearsonCorrelation.py +4 -1
  22. validmind/tests/data_validation/IQROutliersBarPlot.py +4 -1
  23. validmind/tests/data_validation/IQROutliersTable.py +6 -1
  24. validmind/tests/data_validation/IsolationForestOutliers.py +3 -1
  25. validmind/tests/data_validation/JarqueBera.py +3 -1
  26. validmind/tests/data_validation/KPSS.py +3 -1
  27. validmind/tests/data_validation/LJungBox.py +3 -1
  28. validmind/tests/data_validation/LaggedCorrelationHeatmap.py +6 -1
  29. validmind/tests/data_validation/MissingValues.py +5 -1
  30. validmind/tests/data_validation/MissingValuesBarPlot.py +3 -1
  31. validmind/tests/data_validation/MutualInformation.py +4 -1
  32. validmind/tests/data_validation/PearsonCorrelationMatrix.py +3 -1
  33. validmind/tests/data_validation/PhillipsPerronArch.py +3 -1
  34. validmind/tests/data_validation/ProtectedClassesCombination.py +5 -1
  35. validmind/tests/data_validation/ProtectedClassesDescription.py +5 -1
  36. validmind/tests/data_validation/ProtectedClassesDisparity.py +5 -3
  37. validmind/tests/data_validation/ProtectedClassesThresholdOptimizer.py +9 -2
  38. validmind/tests/data_validation/RollingStatsPlot.py +5 -1
  39. validmind/tests/data_validation/RunsTest.py +1 -1
  40. validmind/tests/data_validation/ScatterPlot.py +2 -1
  41. validmind/tests/data_validation/ScoreBandDefaultRates.py +3 -1
  42. validmind/tests/data_validation/SeasonalDecompose.py +6 -1
  43. validmind/tests/data_validation/ShapiroWilk.py +4 -1
  44. validmind/tests/data_validation/Skewness.py +3 -1
  45. validmind/tests/data_validation/SpreadPlot.py +3 -1
  46. validmind/tests/data_validation/TabularCategoricalBarPlots.py +4 -1
  47. validmind/tests/data_validation/TabularDateTimeHistograms.py +3 -1
  48. validmind/tests/data_validation/TabularDescriptionTables.py +4 -1
  49. validmind/tests/data_validation/TabularNumericalHistograms.py +3 -1
  50. validmind/tests/data_validation/TargetRateBarPlots.py +4 -1
  51. validmind/tests/data_validation/TimeSeriesDescription.py +1 -1
  52. validmind/tests/data_validation/TimeSeriesDescriptiveStatistics.py +1 -1
  53. validmind/tests/data_validation/TimeSeriesFrequency.py +5 -1
  54. validmind/tests/data_validation/TimeSeriesHistogram.py +4 -1
  55. validmind/tests/data_validation/TimeSeriesLinePlot.py +3 -1
  56. validmind/tests/data_validation/TimeSeriesMissingValues.py +6 -1
  57. validmind/tests/data_validation/TimeSeriesOutliers.py +5 -1
  58. validmind/tests/data_validation/TooManyZeroValues.py +6 -1
  59. validmind/tests/data_validation/UniqueRows.py +5 -1
  60. validmind/tests/data_validation/WOEBinPlots.py +4 -1
  61. validmind/tests/data_validation/WOEBinTable.py +5 -1
  62. validmind/tests/data_validation/ZivotAndrewsArch.py +3 -1
  63. validmind/tests/data_validation/nlp/CommonWords.py +2 -1
  64. validmind/tests/data_validation/nlp/Hashtags.py +2 -1
  65. validmind/tests/data_validation/nlp/LanguageDetection.py +4 -1
  66. validmind/tests/data_validation/nlp/Mentions.py +3 -1
  67. validmind/tests/data_validation/nlp/PolarityAndSubjectivity.py +6 -1
  68. validmind/tests/data_validation/nlp/Punctuations.py +2 -1
  69. validmind/tests/data_validation/nlp/Sentiment.py +3 -1
  70. validmind/tests/data_validation/nlp/StopWords.py +2 -1
  71. validmind/tests/data_validation/nlp/TextDescription.py +3 -1
  72. validmind/tests/data_validation/nlp/Toxicity.py +3 -1
  73. validmind/tests/load.py +91 -17
  74. validmind/tests/model_validation/BertScore.py +6 -3
  75. validmind/tests/model_validation/BleuScore.py +6 -1
  76. validmind/tests/model_validation/ClusterSizeDistribution.py +5 -1
  77. validmind/tests/model_validation/ContextualRecall.py +6 -1
  78. validmind/tests/model_validation/FeaturesAUC.py +5 -1
  79. validmind/tests/model_validation/MeteorScore.py +6 -1
  80. validmind/tests/model_validation/ModelMetadata.py +2 -1
  81. validmind/tests/model_validation/ModelPredictionResiduals.py +10 -2
  82. validmind/tests/model_validation/RegardScore.py +7 -1
  83. validmind/tests/model_validation/RegressionResidualsPlot.py +5 -1
  84. validmind/tests/model_validation/RougeScore.py +8 -1
  85. validmind/tests/model_validation/TimeSeriesPredictionWithCI.py +8 -1
  86. validmind/tests/model_validation/TimeSeriesPredictionsPlot.py +7 -1
  87. validmind/tests/model_validation/TimeSeriesR2SquareBySegments.py +6 -1
  88. validmind/tests/model_validation/TokenDisparity.py +6 -1
  89. validmind/tests/model_validation/ToxicityScore.py +6 -1
  90. validmind/tests/model_validation/embeddings/ClusterDistribution.py +6 -1
  91. validmind/tests/model_validation/embeddings/CosineSimilarityComparison.py +6 -1
  92. validmind/tests/model_validation/embeddings/CosineSimilarityDistribution.py +6 -1
  93. validmind/tests/model_validation/embeddings/CosineSimilarityHeatmap.py +7 -3
  94. validmind/tests/model_validation/embeddings/DescriptiveAnalytics.py +6 -1
  95. validmind/tests/model_validation/embeddings/EmbeddingsVisualization2D.py +4 -3
  96. validmind/tests/model_validation/embeddings/EuclideanDistanceComparison.py +6 -1
  97. validmind/tests/model_validation/embeddings/EuclideanDistanceHeatmap.py +7 -3
  98. validmind/tests/model_validation/embeddings/PCAComponentsPairwisePlots.py +6 -1
  99. validmind/tests/model_validation/embeddings/StabilityAnalysisKeyword.py +5 -2
  100. validmind/tests/model_validation/embeddings/StabilityAnalysisRandomNoise.py +5 -1
  101. validmind/tests/model_validation/embeddings/StabilityAnalysisSynonyms.py +4 -1
  102. validmind/tests/model_validation/embeddings/StabilityAnalysisTranslation.py +5 -1
  103. validmind/tests/model_validation/embeddings/TSNEComponentsPairwisePlots.py +9 -6
  104. validmind/tests/model_validation/ragas/AnswerCorrectness.py +8 -5
  105. validmind/tests/model_validation/ragas/AspectCritic.py +11 -8
  106. validmind/tests/model_validation/ragas/ContextEntityRecall.py +5 -2
  107. validmind/tests/model_validation/ragas/ContextPrecision.py +5 -2
  108. validmind/tests/model_validation/ragas/ContextPrecisionWithoutReference.py +5 -2
  109. validmind/tests/model_validation/ragas/ContextRecall.py +6 -2
  110. validmind/tests/model_validation/ragas/Faithfulness.py +9 -5
  111. validmind/tests/model_validation/ragas/NoiseSensitivity.py +10 -7
  112. validmind/tests/model_validation/ragas/ResponseRelevancy.py +9 -6
  113. validmind/tests/model_validation/ragas/SemanticSimilarity.py +7 -4
  114. validmind/tests/model_validation/sklearn/AdjustedMutualInformation.py +5 -1
  115. validmind/tests/model_validation/sklearn/AdjustedRandIndex.py +5 -1
  116. validmind/tests/model_validation/sklearn/CalibrationCurve.py +5 -1
  117. validmind/tests/model_validation/sklearn/ClassifierPerformance.py +5 -1
  118. validmind/tests/model_validation/sklearn/ClusterCosineSimilarity.py +5 -1
  119. validmind/tests/model_validation/sklearn/ClusterPerformanceMetrics.py +5 -1
  120. validmind/tests/model_validation/sklearn/CompletenessScore.py +5 -1
  121. validmind/tests/model_validation/sklearn/ConfusionMatrix.py +4 -1
  122. validmind/tests/model_validation/sklearn/FeatureImportance.py +5 -1
  123. validmind/tests/model_validation/sklearn/FowlkesMallowsScore.py +5 -1
  124. validmind/tests/model_validation/sklearn/HomogeneityScore.py +5 -1
  125. validmind/tests/model_validation/sklearn/HyperParametersTuning.py +2 -4
  126. validmind/tests/model_validation/sklearn/KMeansClustersOptimization.py +3 -3
  127. validmind/tests/model_validation/sklearn/MinimumAccuracy.py +5 -1
  128. validmind/tests/model_validation/sklearn/MinimumF1Score.py +5 -1
  129. validmind/tests/model_validation/sklearn/MinimumROCAUCScore.py +5 -1
  130. validmind/tests/model_validation/sklearn/ModelParameters.py +6 -1
  131. validmind/tests/model_validation/sklearn/ModelsPerformanceComparison.py +5 -1
  132. validmind/tests/model_validation/sklearn/OverfitDiagnosis.py +3 -2
  133. validmind/tests/model_validation/sklearn/PermutationFeatureImportance.py +4 -4
  134. validmind/tests/model_validation/sklearn/PopulationStabilityIndex.py +2 -2
  135. validmind/tests/model_validation/sklearn/PrecisionRecallCurve.py +5 -1
  136. validmind/tests/model_validation/sklearn/ROCCurve.py +3 -1
  137. validmind/tests/model_validation/sklearn/RegressionErrors.py +6 -1
  138. validmind/tests/model_validation/sklearn/RegressionErrorsComparison.py +6 -1
  139. validmind/tests/model_validation/sklearn/RegressionPerformance.py +5 -1
  140. validmind/tests/model_validation/sklearn/RegressionR2Square.py +6 -1
  141. validmind/tests/model_validation/sklearn/RegressionR2SquareComparison.py +6 -1
  142. validmind/tests/model_validation/sklearn/RobustnessDiagnosis.py +2 -2
  143. validmind/tests/model_validation/sklearn/ScoreProbabilityAlignment.py +3 -1
  144. validmind/tests/model_validation/sklearn/SilhouettePlot.py +6 -1
  145. validmind/tests/model_validation/sklearn/TrainingTestDegradation.py +2 -2
  146. validmind/tests/model_validation/sklearn/VMeasure.py +5 -1
  147. validmind/tests/model_validation/sklearn/WeakspotsDiagnosis.py +6 -5
  148. validmind/tests/model_validation/statsmodels/AutoARIMA.py +3 -1
  149. validmind/tests/model_validation/statsmodels/CumulativePredictionProbabilities.py +6 -1
  150. validmind/tests/model_validation/statsmodels/DurbinWatsonTest.py +6 -1
  151. validmind/tests/model_validation/statsmodels/GINITable.py +4 -1
  152. validmind/tests/model_validation/statsmodels/KolmogorovSmirnov.py +5 -1
  153. validmind/tests/model_validation/statsmodels/Lilliefors.py +3 -1
  154. validmind/tests/model_validation/statsmodels/PredictionProbabilitiesHistogram.py +6 -2
  155. validmind/tests/model_validation/statsmodels/RegressionCoeffs.py +4 -1
  156. validmind/tests/model_validation/statsmodels/RegressionFeatureSignificance.py +7 -2
  157. validmind/tests/model_validation/statsmodels/RegressionModelForecastPlot.py +5 -4
  158. validmind/tests/model_validation/statsmodels/RegressionModelForecastPlotLevels.py +4 -1
  159. validmind/tests/model_validation/statsmodels/RegressionModelSensitivityPlot.py +3 -2
  160. validmind/tests/model_validation/statsmodels/RegressionModelSummary.py +5 -1
  161. validmind/tests/model_validation/statsmodels/RegressionPermutationFeatureImportance.py +3 -1
  162. validmind/tests/model_validation/statsmodels/ScorecardHistogram.py +6 -1
  163. validmind/tests/ongoing_monitoring/CalibrationCurveDrift.py +2 -2
  164. validmind/tests/ongoing_monitoring/ClassDiscriminationDrift.py +2 -2
  165. validmind/tests/ongoing_monitoring/ClassImbalanceDrift.py +2 -2
  166. validmind/tests/ongoing_monitoring/ClassificationAccuracyDrift.py +2 -2
  167. validmind/tests/ongoing_monitoring/ConfusionMatrixDrift.py +2 -2
  168. validmind/tests/ongoing_monitoring/CumulativePredictionProbabilitiesDrift.py +2 -2
  169. validmind/tests/ongoing_monitoring/FeatureDrift.py +5 -2
  170. validmind/tests/ongoing_monitoring/PredictionAcrossEachFeature.py +6 -1
  171. validmind/tests/ongoing_monitoring/PredictionCorrelation.py +8 -1
  172. validmind/tests/ongoing_monitoring/PredictionProbabilitiesHistogramDrift.py +2 -2
  173. validmind/tests/ongoing_monitoring/PredictionQuantilesAcrossFeatures.py +6 -1
  174. validmind/tests/ongoing_monitoring/ROCCurveDrift.py +4 -2
  175. validmind/tests/ongoing_monitoring/ScoreBandsDrift.py +2 -2
  176. validmind/tests/ongoing_monitoring/ScorecardHistogramDrift.py +2 -2
  177. validmind/tests/ongoing_monitoring/TargetPredictionDistributionPlot.py +8 -1
  178. validmind/tests/prompt_validation/Bias.py +5 -1
  179. validmind/tests/prompt_validation/Clarity.py +5 -1
  180. validmind/tests/prompt_validation/Conciseness.py +5 -1
  181. validmind/tests/prompt_validation/Delimitation.py +5 -1
  182. validmind/tests/prompt_validation/NegativeInstruction.py +5 -1
  183. validmind/tests/prompt_validation/Robustness.py +5 -1
  184. validmind/tests/prompt_validation/Specificity.py +5 -1
  185. validmind/unit_metrics/classification/Accuracy.py +2 -1
  186. validmind/unit_metrics/classification/F1.py +2 -1
  187. validmind/unit_metrics/classification/Precision.py +2 -1
  188. validmind/unit_metrics/classification/ROC_AUC.py +2 -1
  189. validmind/unit_metrics/classification/Recall.py +2 -1
  190. validmind/unit_metrics/regression/AdjustedRSquaredScore.py +2 -1
  191. validmind/unit_metrics/regression/GiniCoefficient.py +2 -1
  192. validmind/unit_metrics/regression/HuberLoss.py +2 -1
  193. validmind/unit_metrics/regression/KolmogorovSmirnovStatistic.py +2 -1
  194. validmind/unit_metrics/regression/MeanAbsoluteError.py +2 -1
  195. validmind/unit_metrics/regression/MeanAbsolutePercentageError.py +2 -1
  196. validmind/unit_metrics/regression/MeanBiasDeviation.py +2 -1
  197. validmind/unit_metrics/regression/MeanSquaredError.py +2 -1
  198. validmind/unit_metrics/regression/QuantileLoss.py +1 -1
  199. validmind/unit_metrics/regression/RSquaredScore.py +2 -1
  200. validmind/unit_metrics/regression/RootMeanSquaredError.py +2 -1
  201. validmind/vm_models/dataset/dataset.py +145 -38
  202. validmind/vm_models/result/result.py +14 -12
  203. {validmind-2.8.27.dist-info → validmind-2.8.29.dist-info}/METADATA +1 -1
  204. {validmind-2.8.27.dist-info → validmind-2.8.29.dist-info}/RECORD +207 -207
  205. {validmind-2.8.27.dist-info → validmind-2.8.29.dist-info}/LICENSE +0 -0
  206. {validmind-2.8.27.dist-info → validmind-2.8.29.dist-info}/WHEEL +0 -0
  207. {validmind-2.8.27.dist-info → validmind-2.8.29.dist-info}/entry_points.txt +0 -0
@@ -2,15 +2,20 @@
2
2
  # See the LICENSE file in the root of this repository for details.
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
+ from typing import List, Tuple
6
+
5
7
  import plotly.graph_objects as go
6
8
  from plotly.subplots import make_subplots
7
9
 
8
10
  from validmind import tags, tasks
11
+ from validmind.vm_models import VMDataset, VMModel
9
12
 
10
13
 
11
14
  @tags("visualization")
12
15
  @tasks("monitoring")
13
- def PredictionQuantilesAcrossFeatures(datasets, model):
16
+ def PredictionQuantilesAcrossFeatures(
17
+ datasets: List[VMDataset], model: VMModel
18
+ ) -> Tuple[go.Figure, ...]:
14
19
  """
15
20
  Assesses differences in model prediction distributions across individual features between reference
16
21
  and monitoring datasets through quantile analysis.
@@ -2,7 +2,7 @@
2
2
  # See the LICENSE file in the root of this repository for details.
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
- from typing import List
5
+ from typing import List, Tuple
6
6
 
7
7
  import numpy as np
8
8
  import plotly.graph_objects as go
@@ -20,7 +20,9 @@ from validmind.vm_models import VMDataset, VMModel
20
20
  "visualization",
21
21
  )
22
22
  @tasks("classification", "text_classification")
23
- def ROCCurveDrift(datasets: List[VMDataset], model: VMModel):
23
+ def ROCCurveDrift(
24
+ datasets: List[VMDataset], model: VMModel
25
+ ) -> Tuple[go.Figure, go.Figure, RawData]:
24
26
  """
25
27
  Compares ROC curves between reference and monitoring datasets.
26
28
 
@@ -2,7 +2,7 @@
2
2
  # See the LICENSE file in the root of this repository for details.
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
- from typing import List
5
+ from typing import Dict, List, Tuple
6
6
 
7
7
  import numpy as np
8
8
  import pandas as pd
@@ -19,7 +19,7 @@ def ScoreBandsDrift(
19
19
  score_column: str = "score",
20
20
  score_bands: list = None,
21
21
  drift_threshold: float = 20.0,
22
- ):
22
+ ) -> Tuple[Dict[str, pd.DataFrame], bool, RawData]:
23
23
  """
24
24
  Analyzes drift in population distribution and default rates across score bands.
25
25
 
@@ -2,7 +2,7 @@
2
2
  # See the LICENSE file in the root of this repository for details.
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
- from typing import List
5
+ from typing import Dict, List, Tuple
6
6
 
7
7
  import numpy as np
8
8
  import pandas as pd
@@ -21,7 +21,7 @@ def ScorecardHistogramDrift(
21
21
  score_column: str = "score",
22
22
  title: str = "Scorecard Histogram Drift",
23
23
  drift_pct_threshold: float = 20.0,
24
- ):
24
+ ) -> Tuple[go.Figure, Dict[str, pd.DataFrame], bool]:
25
25
  """
26
26
  Compares score distributions between reference and monitoring datasets for each class.
27
27
 
@@ -2,17 +2,24 @@
2
2
  # See the LICENSE file in the root of this repository for details.
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
+ from typing import Dict, List, Tuple
6
+
5
7
  import pandas as pd
6
8
  import plotly.figure_factory as ff
7
9
  import plotly.graph_objects as go
8
10
  from scipy.stats import kurtosis, skew
9
11
 
10
12
  from validmind import RawData, tags, tasks
13
+ from validmind.vm_models import VMDataset, VMModel
11
14
 
12
15
 
13
16
  @tags("visualization")
14
17
  @tasks("monitoring")
15
- def TargetPredictionDistributionPlot(datasets, model, drift_pct_threshold=20):
18
+ def TargetPredictionDistributionPlot(
19
+ datasets: List[VMDataset],
20
+ model: VMModel,
21
+ drift_pct_threshold: float = 20,
22
+ ) -> Tuple[Dict[str, pd.DataFrame], go.Figure, bool, RawData]:
16
23
  """
17
24
  Assesses differences in prediction distributions between a reference dataset and a monitoring dataset to identify
18
25
  potential data drift.
@@ -2,6 +2,8 @@
2
2
  # See the LICENSE file in the root of this repository for details.
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
+ from typing import Any, Dict, List, Tuple
6
+
5
7
  from validmind import RawData, tags, tasks
6
8
  from validmind.errors import MissingRequiredTestInputError
7
9
 
@@ -45,7 +47,9 @@ Prompt:
45
47
 
46
48
  @tags("llm", "few_shot")
47
49
  @tasks("text_classification", "text_summarization")
48
- def Bias(model, min_threshold=7, judge_llm=None):
50
+ def Bias(
51
+ model, min_threshold=7, judge_llm=None
52
+ ) -> Tuple[List[Dict[str, Any]], bool, RawData]:
49
53
  """
50
54
  Assesses potential bias in a Large Language Model by analyzing the distribution and order of exemplars in the
51
55
  prompt.
@@ -2,6 +2,8 @@
2
2
  # See the LICENSE file in the root of this repository for details.
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
+ from typing import Any, Dict, List, Tuple
6
+
5
7
  from validmind import RawData, tags, tasks
6
8
  from validmind.errors import MissingRequiredTestInputError
7
9
 
@@ -46,7 +48,9 @@ Prompt:
46
48
 
47
49
  @tags("llm", "zero_shot", "few_shot")
48
50
  @tasks("text_classification", "text_summarization")
49
- def Clarity(model, min_threshold=7, judge_llm=None):
51
+ def Clarity(
52
+ model, min_threshold=7, judge_llm=None
53
+ ) -> Tuple[List[Dict[str, Any]], bool, RawData]:
50
54
  """
51
55
  Evaluates and scores the clarity of prompts in a Large Language Model based on specified guidelines.
52
56
 
@@ -2,6 +2,8 @@
2
2
  # See the LICENSE file in the root of this repository for details.
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
+ from typing import Any, Dict, List, Tuple
6
+
5
7
  from validmind import RawData, tags, tasks
6
8
  from validmind.errors import MissingRequiredTestInputError
7
9
 
@@ -54,7 +56,9 @@ Prompt:
54
56
 
55
57
  @tags("llm", "zero_shot", "few_shot")
56
58
  @tasks("text_classification", "text_summarization")
57
- def Conciseness(model, min_threshold=7, judge_llm=None):
59
+ def Conciseness(
60
+ model, min_threshold=7, judge_llm=None
61
+ ) -> Tuple[List[Dict[str, Any]], bool, RawData]:
58
62
  """
59
63
  Analyzes and grades the conciseness of prompts provided to a Large Language Model.
60
64
 
@@ -2,6 +2,8 @@
2
2
  # See the LICENSE file in the root of this repository for details.
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
+ from typing import Any, Dict, List, Tuple
6
+
5
7
  from validmind import RawData, tags, tasks
6
8
  from validmind.errors import MissingRequiredTestInputError
7
9
 
@@ -39,7 +41,9 @@ Prompt:
39
41
 
40
42
  @tags("llm", "zero_shot", "few_shot")
41
43
  @tasks("text_classification", "text_summarization")
42
- def Delimitation(model, min_threshold=7, judge_llm=None):
44
+ def Delimitation(
45
+ model, min_threshold=7, judge_llm=None
46
+ ) -> Tuple[List[Dict[str, Any]], bool, RawData]:
43
47
  """
44
48
  Evaluates the proper use of delimiters in prompts provided to Large Language Models.
45
49
 
@@ -2,6 +2,8 @@
2
2
  # See the LICENSE file in the root of this repository for details.
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
+ from typing import Any, Dict, List, Tuple
6
+
5
7
  from validmind import RawData, tags, tasks
6
8
  from validmind.errors import MissingRequiredTestInputError
7
9
 
@@ -52,7 +54,9 @@ Prompt:
52
54
 
53
55
  @tags("llm", "zero_shot", "few_shot")
54
56
  @tasks("text_classification", "text_summarization")
55
- def NegativeInstruction(model, min_threshold=7, judge_llm=None):
57
+ def NegativeInstruction(
58
+ model, min_threshold=7, judge_llm=None
59
+ ) -> Tuple[List[Dict[str, Any]], bool, RawData]:
56
60
  """
57
61
  Evaluates and grades the use of affirmative, proactive language over negative instructions in LLM prompts.
58
62
 
@@ -2,6 +2,8 @@
2
2
  # See the LICENSE file in the root of this repository for details.
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
+ from typing import Any, Dict, List, Tuple
6
+
5
7
  import pandas as pd
6
8
 
7
9
  from validmind import RawData, tags, tasks
@@ -56,7 +58,9 @@ Input:
56
58
 
57
59
  @tags("llm", "zero_shot", "few_shot")
58
60
  @tasks("text_classification", "text_summarization")
59
- def Robustness(model, dataset, num_tests=10, judge_llm=None):
61
+ def Robustness(
62
+ model, dataset, num_tests=10, judge_llm=None
63
+ ) -> Tuple[List[Dict[str, Any]], bool, RawData]:
60
64
  """
61
65
  Assesses the robustness of prompts provided to a Large Language Model under varying conditions and contexts. This test
62
66
  specifically measures the model's ability to generate correct classifications with the given prompt even when the
@@ -2,6 +2,8 @@
2
2
  # See the LICENSE file in the root of this repository for details.
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
+ from typing import Any, Dict, List, Tuple
6
+
5
7
  from validmind import RawData, tags, tasks
6
8
  from validmind.errors import MissingRequiredTestInputError
7
9
 
@@ -52,7 +54,9 @@ Prompt:
52
54
 
53
55
  @tags("llm", "zero_shot", "few_shot")
54
56
  @tasks("text_classification", "text_summarization")
55
- def Specificity(model, min_threshold=7, judge_llm=None):
57
+ def Specificity(
58
+ model, min_threshold=7, judge_llm=None
59
+ ) -> Tuple[List[Dict[str, Any]], bool, RawData]:
56
60
  """
57
61
  Evaluates and scores the specificity of prompts provided to a Large Language Model (LLM), based on clarity, detail,
58
62
  and relevance.
@@ -5,10 +5,11 @@
5
5
  from sklearn.metrics import accuracy_score
6
6
 
7
7
  from validmind import tags, tasks
8
+ from validmind.vm_models import VMDataset, VMModel
8
9
 
9
10
 
10
11
  @tasks("classification")
11
12
  @tags("classification")
12
- def Accuracy(dataset, model):
13
+ def Accuracy(dataset: VMDataset, model: VMModel) -> float:
13
14
  """Calculates the accuracy of a model"""
14
15
  return accuracy_score(dataset.y, dataset.y_pred(model))
@@ -5,10 +5,11 @@
5
5
  from sklearn.metrics import f1_score
6
6
 
7
7
  from validmind import tags, tasks
8
+ from validmind.vm_models import VMDataset, VMModel
8
9
 
9
10
 
10
11
  @tasks("classification")
11
12
  @tags("classification")
12
- def F1(model, dataset, **kwargs):
13
+ def F1(model: VMModel, dataset: VMDataset, **kwargs) -> float:
13
14
  """Calculates the F1 score for a classification model."""
14
15
  return f1_score(dataset.y, dataset.y_pred(model), **kwargs)
@@ -5,10 +5,11 @@
5
5
  from sklearn.metrics import precision_score
6
6
 
7
7
  from validmind import tags, tasks
8
+ from validmind.vm_models import VMDataset, VMModel
8
9
 
9
10
 
10
11
  @tasks("classification")
11
12
  @tags("classification")
12
- def Precision(model, dataset, **kwargs):
13
+ def Precision(model: VMModel, dataset: VMDataset, **kwargs) -> float:
13
14
  """Calculates the precision for a classification model."""
14
15
  return precision_score(dataset.y, dataset.y_pred(model), **kwargs)
@@ -7,11 +7,12 @@ from sklearn.metrics import roc_auc_score
7
7
  from sklearn.preprocessing import LabelBinarizer
8
8
 
9
9
  from validmind import tags, tasks
10
+ from validmind.vm_models import VMDataset, VMModel
10
11
 
11
12
 
12
13
  @tasks("classification")
13
14
  @tags("classification")
14
- def ROC_AUC(model, dataset, **kwargs):
15
+ def ROC_AUC(model: VMModel, dataset: VMDataset, **kwargs) -> float:
15
16
  """Calculates the ROC AUC for a classification model."""
16
17
  y_true = dataset.y
17
18
 
@@ -5,10 +5,11 @@
5
5
  from sklearn.metrics import recall_score
6
6
 
7
7
  from validmind import tags, tasks
8
+ from validmind.vm_models import VMDataset, VMModel
8
9
 
9
10
 
10
11
  @tasks("classification")
11
12
  @tags("classification")
12
- def Recall(model, dataset, **kwargs):
13
+ def Recall(model: VMModel, dataset: VMDataset, **kwargs) -> float:
13
14
  """Calculates the recall for a classification model."""
14
15
  return recall_score(dataset.y, dataset.y_pred(model), **kwargs)
@@ -5,11 +5,12 @@
5
5
  from sklearn.metrics import r2_score as _r2_score
6
6
 
7
7
  from validmind import tags, tasks
8
+ from validmind.vm_models import VMDataset, VMModel
8
9
 
9
10
 
10
11
  @tags("regression")
11
12
  @tasks("regression")
12
- def AdjustedRSquaredScore(model, dataset):
13
+ def AdjustedRSquaredScore(model: VMModel, dataset: VMDataset) -> float:
13
14
  """Calculates the adjusted R-squared score for a regression model."""
14
15
  r2_score = _r2_score(
15
16
  dataset.y,
@@ -5,11 +5,12 @@
5
5
  import numpy as np
6
6
 
7
7
  from validmind import tags, tasks
8
+ from validmind.vm_models import VMDataset, VMModel
8
9
 
9
10
 
10
11
  @tags("regression")
11
12
  @tasks("regression")
12
- def GiniCoefficient(dataset, model):
13
+ def GiniCoefficient(dataset: VMDataset, model: VMModel) -> float:
13
14
  """Calculates the Gini coefficient for a regression model."""
14
15
  y_true = dataset.y
15
16
  y_pred = dataset.y_pred(model)
@@ -5,11 +5,12 @@
5
5
  import numpy as np
6
6
 
7
7
  from validmind import tags, tasks
8
+ from validmind.vm_models import VMDataset, VMModel
8
9
 
9
10
 
10
11
  @tags("regression")
11
12
  @tasks("regression")
12
- def HuberLoss(model, dataset):
13
+ def HuberLoss(model: VMModel, dataset: VMDataset) -> float:
13
14
  """Calculates the Huber loss for a regression model."""
14
15
  y_true = dataset.y
15
16
  y_pred = dataset.y_pred(model)
@@ -5,11 +5,12 @@
5
5
  import numpy as np
6
6
 
7
7
  from validmind import tags, tasks
8
+ from validmind.vm_models import VMDataset, VMModel
8
9
 
9
10
 
10
11
  @tags("regression")
11
12
  @tasks("regression")
12
- def KolmogorovSmirnovStatistic(dataset, model):
13
+ def KolmogorovSmirnovStatistic(dataset: VMDataset, model: VMModel) -> float:
13
14
  """Calculates the Kolmogorov-Smirnov statistic for a regression model."""
14
15
  y_true = dataset.y.flatten()
15
16
  y_pred = dataset.y_pred(model)
@@ -5,10 +5,11 @@
5
5
  from sklearn.metrics import mean_absolute_error as _mean_absolute_error
6
6
 
7
7
  from validmind import tags, tasks
8
+ from validmind.vm_models import VMDataset, VMModel
8
9
 
9
10
 
10
11
  @tags("regression")
11
12
  @tasks("regression")
12
- def MeanAbsoluteError(model, dataset, **kwargs):
13
+ def MeanAbsoluteError(model: VMModel, dataset: VMDataset, **kwargs) -> float:
13
14
  """Calculates the mean absolute error for a regression model."""
14
15
  return _mean_absolute_error(dataset.y, dataset.y_pred(model), **kwargs)
@@ -5,11 +5,12 @@
5
5
  import numpy as np
6
6
 
7
7
  from validmind import tags, tasks
8
+ from validmind.vm_models import VMDataset, VMModel
8
9
 
9
10
 
10
11
  @tags("regression")
11
12
  @tasks("regression")
12
- def MeanAbsolutePercentageError(model, dataset):
13
+ def MeanAbsolutePercentageError(model: VMModel, dataset: VMDataset) -> float:
13
14
  """Calculates the mean absolute percentage error for a regression model."""
14
15
  y_true = dataset.y
15
16
  y_pred = dataset.y_pred(model)
@@ -5,10 +5,11 @@
5
5
  import numpy as np
6
6
 
7
7
  from validmind import tags, tasks
8
+ from validmind.vm_models import VMDataset, VMModel
8
9
 
9
10
 
10
11
  @tags("regression")
11
12
  @tasks("regression")
12
- def MeanBiasDeviation(model, dataset):
13
+ def MeanBiasDeviation(model: VMModel, dataset: VMDataset) -> float:
13
14
  """Calculates the mean bias deviation for a regression model."""
14
15
  return np.mean(dataset.y - dataset.y_pred(model))
@@ -5,10 +5,11 @@
5
5
  from sklearn.metrics import mean_squared_error
6
6
 
7
7
  from validmind import tags, tasks
8
+ from validmind.vm_models import VMDataset, VMModel
8
9
 
9
10
 
10
11
  @tags("regression")
11
12
  @tasks("regression")
12
- def MeanSquaredError(model, dataset, **kwargs):
13
+ def MeanSquaredError(model: VMModel, dataset: VMDataset, **kwargs) -> float:
13
14
  """Calculates the mean squared error for a regression model."""
14
15
  return mean_squared_error(dataset.y, dataset.y_pred(model), **kwargs)
@@ -9,7 +9,7 @@ from validmind import tags, tasks
9
9
 
10
10
  @tags("regression")
11
11
  @tasks("regression")
12
- def QuantileLoss(model, dataset, quantile=0.5):
12
+ def QuantileLoss(model, dataset, quantile=0.5) -> float:
13
13
  """Calculates the quantile loss for a regression model."""
14
14
  error = dataset.y - dataset.y_pred(model)
15
15
 
@@ -5,10 +5,11 @@
5
5
  from sklearn.metrics import r2_score
6
6
 
7
7
  from validmind import tags, tasks
8
+ from validmind.vm_models import VMDataset, VMModel
8
9
 
9
10
 
10
11
  @tags("regression")
11
12
  @tasks("regression")
12
- def RSquaredScore(model, dataset):
13
+ def RSquaredScore(model: VMModel, dataset: VMDataset) -> float:
13
14
  """Calculates the R-squared score for a regression model."""
14
15
  return r2_score(dataset.y, dataset.y_pred(model))
@@ -6,11 +6,12 @@ import numpy as np
6
6
  from sklearn.metrics import mean_squared_error
7
7
 
8
8
  from validmind import tags, tasks
9
+ from validmind.vm_models import VMDataset, VMModel
9
10
 
10
11
 
11
12
  @tags("regression")
12
13
  @tasks("regression")
13
- def RootMeanSquaredError(model, dataset, **kwargs):
14
+ def RootMeanSquaredError(model: VMModel, dataset: VMDataset, **kwargs) -> float:
14
15
  """Calculates the root mean squared error for a regression model."""
15
16
  return np.sqrt(
16
17
  mean_squared_error(