validmind 2.8.28__py3-none-any.whl → 2.8.29__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (204) hide show
  1. validmind/models/function.py +11 -3
  2. validmind/tests/data_validation/ACFandPACFPlot.py +3 -1
  3. validmind/tests/data_validation/ADF.py +3 -1
  4. validmind/tests/data_validation/AutoAR.py +3 -1
  5. validmind/tests/data_validation/AutoMA.py +5 -1
  6. validmind/tests/data_validation/AutoStationarity.py +5 -1
  7. validmind/tests/data_validation/BivariateScatterPlots.py +3 -1
  8. validmind/tests/data_validation/BoxPierce.py +4 -1
  9. validmind/tests/data_validation/ChiSquaredFeaturesTable.py +1 -1
  10. validmind/tests/data_validation/ClassImbalance.py +1 -1
  11. validmind/tests/data_validation/DatasetDescription.py +4 -1
  12. validmind/tests/data_validation/DatasetSplit.py +3 -2
  13. validmind/tests/data_validation/DescriptiveStatistics.py +3 -1
  14. validmind/tests/data_validation/DickeyFullerGLS.py +3 -1
  15. validmind/tests/data_validation/Duplicates.py +3 -1
  16. validmind/tests/data_validation/EngleGrangerCoint.py +6 -1
  17. validmind/tests/data_validation/FeatureTargetCorrelationPlot.py +3 -1
  18. validmind/tests/data_validation/HighCardinality.py +3 -1
  19. validmind/tests/data_validation/HighPearsonCorrelation.py +4 -1
  20. validmind/tests/data_validation/IQROutliersBarPlot.py +4 -1
  21. validmind/tests/data_validation/IQROutliersTable.py +6 -1
  22. validmind/tests/data_validation/IsolationForestOutliers.py +3 -1
  23. validmind/tests/data_validation/JarqueBera.py +3 -1
  24. validmind/tests/data_validation/KPSS.py +3 -1
  25. validmind/tests/data_validation/LJungBox.py +3 -1
  26. validmind/tests/data_validation/LaggedCorrelationHeatmap.py +6 -1
  27. validmind/tests/data_validation/MissingValues.py +5 -1
  28. validmind/tests/data_validation/MissingValuesBarPlot.py +3 -1
  29. validmind/tests/data_validation/MutualInformation.py +4 -1
  30. validmind/tests/data_validation/PearsonCorrelationMatrix.py +3 -1
  31. validmind/tests/data_validation/PhillipsPerronArch.py +3 -1
  32. validmind/tests/data_validation/ProtectedClassesCombination.py +5 -1
  33. validmind/tests/data_validation/ProtectedClassesDescription.py +5 -1
  34. validmind/tests/data_validation/ProtectedClassesDisparity.py +5 -3
  35. validmind/tests/data_validation/ProtectedClassesThresholdOptimizer.py +9 -2
  36. validmind/tests/data_validation/RollingStatsPlot.py +5 -1
  37. validmind/tests/data_validation/RunsTest.py +1 -1
  38. validmind/tests/data_validation/ScatterPlot.py +2 -1
  39. validmind/tests/data_validation/ScoreBandDefaultRates.py +3 -1
  40. validmind/tests/data_validation/SeasonalDecompose.py +6 -1
  41. validmind/tests/data_validation/ShapiroWilk.py +4 -1
  42. validmind/tests/data_validation/Skewness.py +3 -1
  43. validmind/tests/data_validation/SpreadPlot.py +3 -1
  44. validmind/tests/data_validation/TabularCategoricalBarPlots.py +4 -1
  45. validmind/tests/data_validation/TabularDateTimeHistograms.py +3 -1
  46. validmind/tests/data_validation/TabularDescriptionTables.py +4 -1
  47. validmind/tests/data_validation/TabularNumericalHistograms.py +3 -1
  48. validmind/tests/data_validation/TargetRateBarPlots.py +4 -1
  49. validmind/tests/data_validation/TimeSeriesDescription.py +1 -1
  50. validmind/tests/data_validation/TimeSeriesDescriptiveStatistics.py +1 -1
  51. validmind/tests/data_validation/TimeSeriesFrequency.py +5 -1
  52. validmind/tests/data_validation/TimeSeriesHistogram.py +4 -1
  53. validmind/tests/data_validation/TimeSeriesLinePlot.py +3 -1
  54. validmind/tests/data_validation/TimeSeriesMissingValues.py +6 -1
  55. validmind/tests/data_validation/TimeSeriesOutliers.py +5 -1
  56. validmind/tests/data_validation/TooManyZeroValues.py +6 -1
  57. validmind/tests/data_validation/UniqueRows.py +5 -1
  58. validmind/tests/data_validation/WOEBinPlots.py +4 -1
  59. validmind/tests/data_validation/WOEBinTable.py +5 -1
  60. validmind/tests/data_validation/ZivotAndrewsArch.py +3 -1
  61. validmind/tests/data_validation/nlp/CommonWords.py +2 -1
  62. validmind/tests/data_validation/nlp/Hashtags.py +2 -1
  63. validmind/tests/data_validation/nlp/LanguageDetection.py +4 -1
  64. validmind/tests/data_validation/nlp/Mentions.py +3 -1
  65. validmind/tests/data_validation/nlp/PolarityAndSubjectivity.py +6 -1
  66. validmind/tests/data_validation/nlp/Punctuations.py +2 -1
  67. validmind/tests/data_validation/nlp/Sentiment.py +3 -1
  68. validmind/tests/data_validation/nlp/StopWords.py +2 -1
  69. validmind/tests/data_validation/nlp/TextDescription.py +3 -1
  70. validmind/tests/data_validation/nlp/Toxicity.py +3 -1
  71. validmind/tests/load.py +91 -17
  72. validmind/tests/model_validation/BertScore.py +6 -3
  73. validmind/tests/model_validation/BleuScore.py +6 -1
  74. validmind/tests/model_validation/ClusterSizeDistribution.py +5 -1
  75. validmind/tests/model_validation/ContextualRecall.py +6 -1
  76. validmind/tests/model_validation/FeaturesAUC.py +5 -1
  77. validmind/tests/model_validation/MeteorScore.py +6 -1
  78. validmind/tests/model_validation/ModelMetadata.py +2 -1
  79. validmind/tests/model_validation/ModelPredictionResiduals.py +10 -2
  80. validmind/tests/model_validation/RegardScore.py +7 -1
  81. validmind/tests/model_validation/RegressionResidualsPlot.py +5 -1
  82. validmind/tests/model_validation/RougeScore.py +8 -1
  83. validmind/tests/model_validation/TimeSeriesPredictionWithCI.py +8 -1
  84. validmind/tests/model_validation/TimeSeriesPredictionsPlot.py +7 -1
  85. validmind/tests/model_validation/TimeSeriesR2SquareBySegments.py +6 -1
  86. validmind/tests/model_validation/TokenDisparity.py +6 -1
  87. validmind/tests/model_validation/ToxicityScore.py +6 -1
  88. validmind/tests/model_validation/embeddings/ClusterDistribution.py +6 -1
  89. validmind/tests/model_validation/embeddings/CosineSimilarityComparison.py +6 -1
  90. validmind/tests/model_validation/embeddings/CosineSimilarityDistribution.py +6 -1
  91. validmind/tests/model_validation/embeddings/CosineSimilarityHeatmap.py +7 -3
  92. validmind/tests/model_validation/embeddings/DescriptiveAnalytics.py +6 -1
  93. validmind/tests/model_validation/embeddings/EmbeddingsVisualization2D.py +4 -3
  94. validmind/tests/model_validation/embeddings/EuclideanDistanceComparison.py +6 -1
  95. validmind/tests/model_validation/embeddings/EuclideanDistanceHeatmap.py +7 -3
  96. validmind/tests/model_validation/embeddings/PCAComponentsPairwisePlots.py +6 -1
  97. validmind/tests/model_validation/embeddings/StabilityAnalysisKeyword.py +5 -2
  98. validmind/tests/model_validation/embeddings/StabilityAnalysisRandomNoise.py +5 -1
  99. validmind/tests/model_validation/embeddings/StabilityAnalysisSynonyms.py +4 -1
  100. validmind/tests/model_validation/embeddings/StabilityAnalysisTranslation.py +5 -1
  101. validmind/tests/model_validation/embeddings/TSNEComponentsPairwisePlots.py +9 -6
  102. validmind/tests/model_validation/ragas/AnswerCorrectness.py +8 -5
  103. validmind/tests/model_validation/ragas/AspectCritic.py +11 -8
  104. validmind/tests/model_validation/ragas/ContextEntityRecall.py +5 -2
  105. validmind/tests/model_validation/ragas/ContextPrecision.py +5 -2
  106. validmind/tests/model_validation/ragas/ContextPrecisionWithoutReference.py +5 -2
  107. validmind/tests/model_validation/ragas/ContextRecall.py +6 -2
  108. validmind/tests/model_validation/ragas/Faithfulness.py +9 -5
  109. validmind/tests/model_validation/ragas/NoiseSensitivity.py +10 -7
  110. validmind/tests/model_validation/ragas/ResponseRelevancy.py +9 -6
  111. validmind/tests/model_validation/ragas/SemanticSimilarity.py +7 -4
  112. validmind/tests/model_validation/sklearn/AdjustedMutualInformation.py +5 -1
  113. validmind/tests/model_validation/sklearn/AdjustedRandIndex.py +5 -1
  114. validmind/tests/model_validation/sklearn/CalibrationCurve.py +5 -1
  115. validmind/tests/model_validation/sklearn/ClassifierPerformance.py +5 -1
  116. validmind/tests/model_validation/sklearn/ClusterCosineSimilarity.py +5 -1
  117. validmind/tests/model_validation/sklearn/ClusterPerformanceMetrics.py +5 -1
  118. validmind/tests/model_validation/sklearn/CompletenessScore.py +5 -1
  119. validmind/tests/model_validation/sklearn/ConfusionMatrix.py +4 -1
  120. validmind/tests/model_validation/sklearn/FeatureImportance.py +5 -1
  121. validmind/tests/model_validation/sklearn/FowlkesMallowsScore.py +5 -1
  122. validmind/tests/model_validation/sklearn/HomogeneityScore.py +5 -1
  123. validmind/tests/model_validation/sklearn/HyperParametersTuning.py +2 -4
  124. validmind/tests/model_validation/sklearn/KMeansClustersOptimization.py +3 -3
  125. validmind/tests/model_validation/sklearn/MinimumAccuracy.py +5 -1
  126. validmind/tests/model_validation/sklearn/MinimumF1Score.py +5 -1
  127. validmind/tests/model_validation/sklearn/MinimumROCAUCScore.py +5 -1
  128. validmind/tests/model_validation/sklearn/ModelParameters.py +6 -1
  129. validmind/tests/model_validation/sklearn/ModelsPerformanceComparison.py +5 -1
  130. validmind/tests/model_validation/sklearn/OverfitDiagnosis.py +3 -2
  131. validmind/tests/model_validation/sklearn/PermutationFeatureImportance.py +4 -4
  132. validmind/tests/model_validation/sklearn/PopulationStabilityIndex.py +2 -2
  133. validmind/tests/model_validation/sklearn/PrecisionRecallCurve.py +5 -1
  134. validmind/tests/model_validation/sklearn/ROCCurve.py +3 -1
  135. validmind/tests/model_validation/sklearn/RegressionErrors.py +6 -1
  136. validmind/tests/model_validation/sklearn/RegressionErrorsComparison.py +6 -1
  137. validmind/tests/model_validation/sklearn/RegressionPerformance.py +5 -1
  138. validmind/tests/model_validation/sklearn/RegressionR2Square.py +6 -1
  139. validmind/tests/model_validation/sklearn/RegressionR2SquareComparison.py +6 -1
  140. validmind/tests/model_validation/sklearn/RobustnessDiagnosis.py +2 -2
  141. validmind/tests/model_validation/sklearn/ScoreProbabilityAlignment.py +3 -1
  142. validmind/tests/model_validation/sklearn/SilhouettePlot.py +6 -1
  143. validmind/tests/model_validation/sklearn/TrainingTestDegradation.py +2 -2
  144. validmind/tests/model_validation/sklearn/VMeasure.py +5 -1
  145. validmind/tests/model_validation/sklearn/WeakspotsDiagnosis.py +6 -5
  146. validmind/tests/model_validation/statsmodels/AutoARIMA.py +3 -1
  147. validmind/tests/model_validation/statsmodels/CumulativePredictionProbabilities.py +6 -1
  148. validmind/tests/model_validation/statsmodels/DurbinWatsonTest.py +6 -1
  149. validmind/tests/model_validation/statsmodels/GINITable.py +4 -1
  150. validmind/tests/model_validation/statsmodels/KolmogorovSmirnov.py +5 -1
  151. validmind/tests/model_validation/statsmodels/Lilliefors.py +3 -1
  152. validmind/tests/model_validation/statsmodels/PredictionProbabilitiesHistogram.py +6 -2
  153. validmind/tests/model_validation/statsmodels/RegressionCoeffs.py +4 -1
  154. validmind/tests/model_validation/statsmodels/RegressionFeatureSignificance.py +7 -2
  155. validmind/tests/model_validation/statsmodels/RegressionModelForecastPlot.py +5 -4
  156. validmind/tests/model_validation/statsmodels/RegressionModelForecastPlotLevels.py +4 -1
  157. validmind/tests/model_validation/statsmodels/RegressionModelSensitivityPlot.py +3 -2
  158. validmind/tests/model_validation/statsmodels/RegressionModelSummary.py +5 -1
  159. validmind/tests/model_validation/statsmodels/RegressionPermutationFeatureImportance.py +3 -1
  160. validmind/tests/model_validation/statsmodels/ScorecardHistogram.py +6 -1
  161. validmind/tests/ongoing_monitoring/CalibrationCurveDrift.py +2 -2
  162. validmind/tests/ongoing_monitoring/ClassDiscriminationDrift.py +2 -2
  163. validmind/tests/ongoing_monitoring/ClassImbalanceDrift.py +2 -2
  164. validmind/tests/ongoing_monitoring/ClassificationAccuracyDrift.py +2 -2
  165. validmind/tests/ongoing_monitoring/ConfusionMatrixDrift.py +2 -2
  166. validmind/tests/ongoing_monitoring/CumulativePredictionProbabilitiesDrift.py +2 -2
  167. validmind/tests/ongoing_monitoring/FeatureDrift.py +5 -2
  168. validmind/tests/ongoing_monitoring/PredictionAcrossEachFeature.py +6 -1
  169. validmind/tests/ongoing_monitoring/PredictionCorrelation.py +8 -1
  170. validmind/tests/ongoing_monitoring/PredictionProbabilitiesHistogramDrift.py +2 -2
  171. validmind/tests/ongoing_monitoring/PredictionQuantilesAcrossFeatures.py +6 -1
  172. validmind/tests/ongoing_monitoring/ROCCurveDrift.py +4 -2
  173. validmind/tests/ongoing_monitoring/ScoreBandsDrift.py +2 -2
  174. validmind/tests/ongoing_monitoring/ScorecardHistogramDrift.py +2 -2
  175. validmind/tests/ongoing_monitoring/TargetPredictionDistributionPlot.py +8 -1
  176. validmind/tests/prompt_validation/Bias.py +5 -1
  177. validmind/tests/prompt_validation/Clarity.py +5 -1
  178. validmind/tests/prompt_validation/Conciseness.py +5 -1
  179. validmind/tests/prompt_validation/Delimitation.py +5 -1
  180. validmind/tests/prompt_validation/NegativeInstruction.py +5 -1
  181. validmind/tests/prompt_validation/Robustness.py +5 -1
  182. validmind/tests/prompt_validation/Specificity.py +5 -1
  183. validmind/unit_metrics/classification/Accuracy.py +2 -1
  184. validmind/unit_metrics/classification/F1.py +2 -1
  185. validmind/unit_metrics/classification/Precision.py +2 -1
  186. validmind/unit_metrics/classification/ROC_AUC.py +2 -1
  187. validmind/unit_metrics/classification/Recall.py +2 -1
  188. validmind/unit_metrics/regression/AdjustedRSquaredScore.py +2 -1
  189. validmind/unit_metrics/regression/GiniCoefficient.py +2 -1
  190. validmind/unit_metrics/regression/HuberLoss.py +2 -1
  191. validmind/unit_metrics/regression/KolmogorovSmirnovStatistic.py +2 -1
  192. validmind/unit_metrics/regression/MeanAbsoluteError.py +2 -1
  193. validmind/unit_metrics/regression/MeanAbsolutePercentageError.py +2 -1
  194. validmind/unit_metrics/regression/MeanBiasDeviation.py +2 -1
  195. validmind/unit_metrics/regression/MeanSquaredError.py +2 -1
  196. validmind/unit_metrics/regression/QuantileLoss.py +1 -1
  197. validmind/unit_metrics/regression/RSquaredScore.py +2 -1
  198. validmind/unit_metrics/regression/RootMeanSquaredError.py +2 -1
  199. validmind/vm_models/dataset/dataset.py +145 -38
  200. {validmind-2.8.28.dist-info → validmind-2.8.29.dist-info}/METADATA +1 -1
  201. {validmind-2.8.28.dist-info → validmind-2.8.29.dist-info}/RECORD +204 -204
  202. {validmind-2.8.28.dist-info → validmind-2.8.29.dist-info}/LICENSE +0 -0
  203. {validmind-2.8.28.dist-info → validmind-2.8.29.dist-info}/WHEEL +0 -0
  204. {validmind-2.8.28.dist-info → validmind-2.8.29.dist-info}/entry_points.txt +0 -0
@@ -4,12 +4,14 @@
4
4
 
5
5
  import io
6
6
  import sys
7
+ from typing import Any, List, Tuple
7
8
 
8
9
  import pandas as pd
9
10
 
10
11
  from validmind import RawData, tags, tasks
11
12
  from validmind.errors import MissingDependencyError
12
13
  from validmind.logging import get_logger
14
+ from validmind.vm_models import VMDataset, VMModel
13
15
 
14
16
  try:
15
17
  import aequitas.plot as ap
@@ -28,12 +30,12 @@ logger = get_logger(__name__)
28
30
  @tags("bias_and_fairness")
29
31
  @tasks("classification", "regression")
30
32
  def ProtectedClassesDisparity(
31
- dataset,
32
- model,
33
+ dataset: VMDataset,
34
+ model: VMModel,
33
35
  protected_classes=None,
34
36
  disparity_tolerance=1.25,
35
37
  metrics=["fnr", "fpr", "tpr"],
36
- ):
38
+ ) -> Tuple[pd.DataFrame, List[bytes], Any, RawData]:
37
39
  """
38
40
  Investigates disparities in model performance across different protected class segments.
39
41
 
@@ -4,13 +4,16 @@
4
4
 
5
5
  import json
6
6
  import sys
7
+ from typing import Any, Dict, Tuple
7
8
 
9
+ import matplotlib.figure
8
10
  import matplotlib.pyplot as plt
9
11
  import pandas as pd
10
12
 
11
13
  from validmind import RawData, tags, tasks
12
14
  from validmind.errors import MissingDependencyError
13
15
  from validmind.logging import get_logger
16
+ from validmind.vm_models import VMDataset
14
17
 
15
18
  try:
16
19
  from fairlearn.metrics import (
@@ -35,8 +38,12 @@ logger = get_logger(__name__)
35
38
  @tags("bias_and_fairness")
36
39
  @tasks("classification", "regression")
37
40
  def ProtectedClassesThresholdOptimizer(
38
- dataset, pipeline=None, protected_classes=None, X_train=None, y_train=None
39
- ):
41
+ dataset: VMDataset,
42
+ pipeline=None,
43
+ protected_classes=None,
44
+ X_train=None,
45
+ y_train=None,
46
+ ) -> Tuple[Dict[str, Any], matplotlib.figure.Figure, RawData]:
40
47
  """
41
48
  Obtains a classifier by applying group-specific thresholds to the provided estimator.
42
49
 
@@ -2,6 +2,8 @@
2
2
  # See the LICENSE file in the root of this repository for details.
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
+ from typing import Tuple
6
+
5
7
  import matplotlib.pyplot as plt
6
8
  import pandas as pd
7
9
 
@@ -42,7 +44,9 @@ def plot_rolling_statistics(df, col, window_size):
42
44
 
43
45
  @tags("time_series_data", "visualization", "stationarity")
44
46
  @tasks("regression")
45
- def RollingStatsPlot(dataset: VMDataset, window_size: int = 12):
47
+ def RollingStatsPlot(
48
+ dataset: VMDataset, window_size: int = 12
49
+ ) -> Tuple[plt.Figure, RawData]:
46
50
  """
47
51
  Evaluates the stationarity of time series data by plotting its rolling mean and standard deviation over a specified
48
52
  window.
@@ -10,7 +10,7 @@ from validmind import tags, tasks
10
10
 
11
11
  @tasks("classification", "regression")
12
12
  @tags("tabular_data", "statistical_test", "statsmodels")
13
- def RunsTest(dataset):
13
+ def RunsTest(dataset) -> pd.DataFrame:
14
14
  """
15
15
  Executes Runs Test on ML model to detect non-random patterns in output data sequence.
16
16
 
@@ -2,6 +2,7 @@
2
2
  # See the LICENSE file in the root of this repository for details.
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
+ import matplotlib.figure
5
6
  import matplotlib.pyplot as plt
6
7
  import seaborn as sns
7
8
 
@@ -10,7 +11,7 @@ from validmind import tags, tasks
10
11
 
11
12
  @tags("tabular_data", "visualization")
12
13
  @tasks("classification", "regression")
13
- def ScatterPlot(dataset):
14
+ def ScatterPlot(dataset) -> matplotlib.figure.Figure:
14
15
  """
15
16
  Assesses visual relationships, patterns, and outliers among features in a dataset through scatter plot matrices.
16
17
 
@@ -2,6 +2,8 @@
2
2
  # See the LICENSE file in the root of this repository for details.
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
+ from typing import Tuple
6
+
5
7
  import numpy as np
6
8
  import pandas as pd
7
9
 
@@ -16,7 +18,7 @@ def ScoreBandDefaultRates(
16
18
  model: VMModel,
17
19
  score_column: str = "score",
18
20
  score_bands: list = None,
19
- ):
21
+ ) -> Tuple[pd.DataFrame, RawData]:
20
22
  """
21
23
  Analyzes default rates and population distribution across credit score bands.
22
24
 
@@ -2,6 +2,9 @@
2
2
  # See the LICENSE file in the root of this repository for details.
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
+
6
+ from typing import Tuple
7
+
5
8
  import numpy as np
6
9
  import pandas as pd
7
10
  import plotly.graph_objects as go
@@ -19,7 +22,9 @@ logger = get_logger(__name__)
19
22
 
20
23
  @tags("time_series_data", "seasonality", "statsmodels")
21
24
  @tasks("regression")
22
- def SeasonalDecompose(dataset: VMDataset, seasonal_model: str = "additive"):
25
+ def SeasonalDecompose(
26
+ dataset: VMDataset, seasonal_model: str = "additive"
27
+ ) -> Tuple[go.Figure, RawData]:
23
28
  """
24
29
  Assesses patterns and seasonality in a time series dataset by decomposing its features into foundational components.
25
30
 
@@ -2,6 +2,9 @@
2
2
  # See the LICENSE file in the root of this repository for details.
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
+
6
+ from typing import Tuple
7
+
5
8
  import pandas as pd
6
9
  from scipy import stats
7
10
 
@@ -10,7 +13,7 @@ from validmind import RawData, tags, tasks
10
13
 
11
14
  @tasks("classification", "regression")
12
15
  @tags("tabular_data", "data_distribution", "statistical_test")
13
- def ShapiroWilk(dataset):
16
+ def ShapiroWilk(dataset) -> Tuple[pd.DataFrame, RawData]:
14
17
  """
15
18
  Evaluates feature-wise normality of training data using the Shapiro-Wilk test.
16
19
 
@@ -2,13 +2,15 @@
2
2
  # See the LICENSE file in the root of this repository for details.
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
+ from typing import Any, Dict, List, Tuple
6
+
5
7
  from validmind import tags, tasks
6
8
  from validmind.utils import infer_datatypes
7
9
 
8
10
 
9
11
  @tags("data_quality", "tabular_data")
10
12
  @tasks("classification", "regression")
11
- def Skewness(dataset, max_threshold=1):
13
+ def Skewness(dataset, max_threshold=1) -> Tuple[Dict[str, List[Dict[str, Any]]], bool]:
12
14
  """
13
15
  Evaluates the skewness of numerical data in a dataset to check against a defined threshold, aiming to ensure data
14
16
  quality and optimize model performance.
@@ -2,6 +2,8 @@
2
2
  # See the LICENSE file in the root of this repository for details.
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
+ from typing import Tuple
6
+
5
7
  import matplotlib.pyplot as plt
6
8
  import pandas as pd
7
9
  import seaborn as sns
@@ -13,7 +15,7 @@ from validmind.vm_models import VMDataset
13
15
 
14
16
  @tags("time_series_data", "visualization")
15
17
  @tasks("regression")
16
- def SpreadPlot(dataset: VMDataset):
18
+ def SpreadPlot(dataset: VMDataset) -> Tuple[plt.Figure, RawData]:
17
19
  """
18
20
  Assesses potential correlations between pairs of time series variables through visualization to enhance
19
21
  understanding of their relationships.
@@ -2,6 +2,9 @@
2
2
  # See the LICENSE file in the root of this repository for details.
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
+
6
+ from typing import Tuple
7
+
5
8
  import plotly.graph_objs as go
6
9
 
7
10
  from validmind import RawData, tags, tasks
@@ -11,7 +14,7 @@ from validmind.vm_models import VMDataset
11
14
 
12
15
  @tags("tabular_data", "visualization")
13
16
  @tasks("classification", "regression")
14
- def TabularCategoricalBarPlots(dataset: VMDataset):
17
+ def TabularCategoricalBarPlots(dataset: VMDataset) -> Tuple[go.Figure, RawData]:
15
18
  """
16
19
  Generates and visualizes bar plots for each category in categorical features to evaluate the dataset's composition.
17
20
 
@@ -2,6 +2,8 @@
2
2
  # See the LICENSE file in the root of this repository for details.
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
+ from typing import Tuple
6
+
5
7
  import pandas as pd
6
8
  import plotly.graph_objects as go
7
9
 
@@ -12,7 +14,7 @@ from validmind.vm_models import VMDataset
12
14
 
13
15
  @tags("time_series_data", "visualization")
14
16
  @tasks("classification", "regression")
15
- def TabularDateTimeHistograms(dataset: VMDataset):
17
+ def TabularDateTimeHistograms(dataset: VMDataset) -> Tuple[go.Figure, RawData]:
16
18
  """
17
19
  Generates histograms to provide graphical insight into the distribution of time intervals in a model's datetime
18
20
  data.
@@ -2,6 +2,9 @@
2
2
  # See the LICENSE file in the root of this repository for details.
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
+
6
+ from typing import Tuple
7
+
5
8
  import pandas as pd
6
9
 
7
10
  from validmind import tags, tasks
@@ -9,7 +12,7 @@ from validmind import tags, tasks
9
12
 
10
13
  @tags("tabular_data")
11
14
  @tasks("classification", "regression")
12
- def TabularDescriptionTables(dataset):
15
+ def TabularDescriptionTables(dataset) -> Tuple[pd.DataFrame]:
13
16
  """
14
17
  Summarizes key descriptive statistics for numerical, categorical, and datetime variables in a dataset.
15
18
 
@@ -2,6 +2,8 @@
2
2
  # See the LICENSE file in the root of this repository for details.
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
+ from typing import Tuple
6
+
5
7
  import plotly.graph_objs as go
6
8
 
7
9
  from validmind import tags, tasks
@@ -10,7 +12,7 @@ from validmind.vm_models import VMDataset
10
12
 
11
13
  @tags("tabular_data", "visualization")
12
14
  @tasks("classification", "regression")
13
- def TabularNumericalHistograms(dataset: VMDataset):
15
+ def TabularNumericalHistograms(dataset: VMDataset) -> Tuple[go.Figure]:
14
16
  """
15
17
  Generates histograms for each numerical feature in a dataset to provide visual insights into data distribution and
16
18
  detect potential issues.
@@ -2,6 +2,9 @@
2
2
  # See the LICENSE file in the root of this repository for details.
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
+
6
+ from typing import Tuple
7
+
5
8
  import numpy as np
6
9
  import plotly.graph_objs as go
7
10
  from plotly.subplots import make_subplots
@@ -13,7 +16,7 @@ from validmind.vm_models import VMDataset
13
16
 
14
17
  @tags("tabular_data", "visualization", "categorical_data")
15
18
  @tasks("classification")
16
- def TargetRateBarPlots(dataset: VMDataset):
19
+ def TargetRateBarPlots(dataset: VMDataset) -> Tuple[go.Figure, RawData]:
17
20
  """
18
21
  Generates bar plots visualizing the default rates of categorical features for a classification machine learning
19
22
  model.
@@ -9,7 +9,7 @@ from validmind import tags, tasks
9
9
 
10
10
  @tags("time_series_data", "analysis")
11
11
  @tasks("regression")
12
- def TimeSeriesDescription(dataset):
12
+ def TimeSeriesDescription(dataset) -> pd.DataFrame:
13
13
  """
14
14
  Generates a detailed analysis for the provided time series dataset, summarizing key statistics to identify trends,
15
15
  patterns, and data quality issues.
@@ -10,7 +10,7 @@ from validmind import tags, tasks
10
10
 
11
11
  @tags("time_series_data", "analysis")
12
12
  @tasks("regression")
13
- def TimeSeriesDescriptiveStatistics(dataset):
13
+ def TimeSeriesDescriptiveStatistics(dataset) -> pd.DataFrame:
14
14
  """
15
15
  Evaluates the descriptive statistics of a time series dataset to identify trends, patterns, and data quality issues.
16
16
 
@@ -2,6 +2,8 @@
2
2
  # See the LICENSE file in the root of this repository for details.
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
+ from typing import Any, Dict, List, Tuple
6
+
5
7
  import pandas as pd
6
8
  import plotly.graph_objects as go
7
9
 
@@ -12,7 +14,9 @@ from validmind.vm_models import VMDataset
12
14
 
13
15
  @tags("time_series_data")
14
16
  @tasks("regression")
15
- def TimeSeriesFrequency(dataset: VMDataset):
17
+ def TimeSeriesFrequency(
18
+ dataset: VMDataset,
19
+ ) -> Tuple[List[Dict[str, Any]], go.Figure, bool, RawData]:
16
20
  """
17
21
  Evaluates consistency of time series data frequency and generates a frequency plot.
18
22
 
@@ -2,8 +2,11 @@
2
2
  # See the LICENSE file in the root of this repository for details.
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
+ from typing import Tuple
6
+
5
7
  import pandas as pd
6
8
  import plotly.express as px
9
+ import plotly.graph_objects as go
7
10
 
8
11
  from validmind import tags, tasks
9
12
  from validmind.logging import get_logger
@@ -13,7 +16,7 @@ logger = get_logger(__name__)
13
16
 
14
17
  @tags("data_validation", "visualization", "time_series_data")
15
18
  @tasks("regression", "time_series_forecasting")
16
- def TimeSeriesHistogram(dataset, nbins=30):
19
+ def TimeSeriesHistogram(dataset, nbins=30) -> Tuple[go.Figure]:
17
20
  """
18
21
  Visualizes distribution of time-series data using histograms and Kernel Density Estimation (KDE) lines.
19
22
 
@@ -2,6 +2,8 @@
2
2
  # See the LICENSE file in the root of this repository for details.
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
+ from typing import Tuple
6
+
5
7
  import pandas as pd
6
8
  import plotly.graph_objects as go
7
9
 
@@ -12,7 +14,7 @@ from validmind.vm_models import VMDataset
12
14
 
13
15
  @tags("time_series_data", "visualization")
14
16
  @tasks("regression")
15
- def TimeSeriesLinePlot(dataset: VMDataset):
17
+ def TimeSeriesLinePlot(dataset: VMDataset) -> Tuple[go.Figure]:
16
18
  """
17
19
  Generates and analyses time-series data through line plots revealing trends, patterns, anomalies over time.
18
20
 
@@ -2,9 +2,12 @@
2
2
  # See the LICENSE file in the root of this repository for details.
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
+ from typing import Any, Dict, List, Tuple
6
+
5
7
  import pandas as pd
6
8
  import plotly.express as px
7
9
  import plotly.figure_factory as ff
10
+ import plotly.graph_objects as go
8
11
 
9
12
  from validmind import RawData, tags, tasks
10
13
  from validmind.errors import SkipTestError
@@ -13,7 +16,9 @@ from validmind.vm_models import VMDataset
13
16
 
14
17
  @tags("time_series_data")
15
18
  @tasks("regression")
16
- def TimeSeriesMissingValues(dataset: VMDataset, min_threshold: int = 1):
19
+ def TimeSeriesMissingValues(
20
+ dataset: VMDataset, min_threshold: int = 1
21
+ ) -> Tuple[List[Dict[str, Any]], go.Figure, go.Figure, bool, RawData]:
17
22
  """
18
23
  Validates time-series data quality by confirming the count of missing values is below a certain threshold.
19
24
 
@@ -2,6 +2,8 @@
2
2
  # See the LICENSE file in the root of this repository for details.
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
+ from typing import List, Tuple
6
+
5
7
  import pandas as pd
6
8
  import plotly.graph_objects as go
7
9
 
@@ -12,7 +14,9 @@ from validmind.vm_models import VMDataset
12
14
 
13
15
  @tags("time_series_data")
14
16
  @tasks("regression")
15
- def TimeSeriesOutliers(dataset: VMDataset, zscore_threshold: int = 3):
17
+ def TimeSeriesOutliers(
18
+ dataset: VMDataset, zscore_threshold: int = 3
19
+ ) -> Tuple[pd.DataFrame, List[go.Figure], bool, RawData]:
16
20
  """
17
21
  Identifies and visualizes outliers in time-series data using the z-score method.
18
22
 
@@ -2,13 +2,18 @@
2
2
  # See the LICENSE file in the root of this repository for details.
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
+
6
+ from typing import Any, Dict, List, Tuple
7
+
5
8
  from validmind.tests import tags, tasks
6
9
  from validmind.vm_models import VMDataset
7
10
 
8
11
 
9
12
  @tags("tabular_data")
10
13
  @tasks("regression", "classification")
11
- def TooManyZeroValues(dataset: VMDataset, max_percent_threshold: float = 0.03):
14
+ def TooManyZeroValues(
15
+ dataset: VMDataset, max_percent_threshold: float = 0.03
16
+ ) -> Tuple[List[Dict[str, Any]], bool]:
12
17
  """
13
18
  Identifies numerical columns in a dataset that contain an excessive number of zero values, defined by a threshold
14
19
  percentage.
@@ -2,13 +2,17 @@
2
2
  # See the LICENSE file in the root of this repository for details.
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
+ from typing import Any, Dict, List, Tuple
6
+
5
7
  from validmind import tags, tasks
6
8
  from validmind.vm_models import VMDataset
7
9
 
8
10
 
9
11
  @tags("tabular_data")
10
12
  @tasks("regression", "classification")
11
- def UniqueRows(dataset: VMDataset, min_percent_threshold: float = 1):
13
+ def UniqueRows(
14
+ dataset: VMDataset, min_percent_threshold: float = 1
15
+ ) -> Tuple[List[Dict[str, Any]], bool]:
12
16
  """
13
17
  Verifies the diversity of the dataset by ensuring that the count of unique rows exceeds a prescribed threshold.
14
18
 
@@ -2,6 +2,9 @@
2
2
  # See the LICENSE file in the root of this repository for details.
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
+
6
+ from typing import Tuple
7
+
5
8
  import numpy as np
6
9
  import pandas as pd
7
10
  import plotly.express as px
@@ -24,7 +27,7 @@ def WOEBinPlots(
24
27
  breaks_adj: list = None,
25
28
  fig_height: int = 600,
26
29
  fig_width: int = 500,
27
- ):
30
+ ) -> Tuple[go.Figure, RawData]:
28
31
  """
29
32
  Generates visualizations of Weight of Evidence (WoE) and Information Value (IV) for understanding predictive power
30
33
  of categorical variables in a data set.
@@ -2,6 +2,8 @@
2
2
  # See the LICENSE file in the root of this repository for details.
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
+ from typing import Dict, Tuple
6
+
5
7
  import pandas as pd
6
8
  import scorecardpy as sc
7
9
 
@@ -12,7 +14,9 @@ from validmind.vm_models import VMDataset
12
14
 
13
15
  @tags("tabular_data", "categorical_data")
14
16
  @tasks("classification")
15
- def WOEBinTable(dataset: VMDataset, breaks_adj: list = None):
17
+ def WOEBinTable(
18
+ dataset: VMDataset, breaks_adj: list = None
19
+ ) -> Tuple[Dict[str, pd.DataFrame], RawData]:
16
20
  """
17
21
  Assesses the Weight of Evidence (WoE) and Information Value (IV) of each feature to evaluate its predictive power
18
22
  in a binary classification model.
@@ -2,6 +2,8 @@
2
2
  # See the LICENSE file in the root of this repository for details.
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
+ from typing import Any, Dict, Tuple
6
+
5
7
  import pandas as pd
6
8
  from arch.unitroot import ZivotAndrews
7
9
  from numpy.linalg import LinAlgError
@@ -16,7 +18,7 @@ logger = get_logger(__name__)
16
18
 
17
19
  @tags("time_series_data", "stationarity", "unit_root_test")
18
20
  @tasks("regression")
19
- def ZivotAndrewsArch(dataset: VMDataset):
21
+ def ZivotAndrewsArch(dataset: VMDataset) -> Tuple[Dict[str, Any], RawData]:
20
22
  """
21
23
  Evaluates the order of integration and stationarity of time series data using the Zivot-Andrews unit root test.
22
24
 
@@ -3,6 +3,7 @@
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
5
  from collections import Counter
6
+ from typing import Tuple
6
7
 
7
8
  import nltk
8
9
  import plotly.graph_objects as go
@@ -14,7 +15,7 @@ from validmind.vm_models import VMDataset
14
15
 
15
16
  @tags("nlp", "text_data", "visualization", "frequency_analysis")
16
17
  @tasks("text_classification", "text_summarization")
17
- def CommonWords(dataset: VMDataset):
18
+ def CommonWords(dataset: VMDataset) -> Tuple[go.Figure, RawData]:
18
19
  """
19
20
  Assesses the most frequent non-stopwords in a text column for identifying prevalent language patterns.
20
21
 
@@ -3,6 +3,7 @@
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
5
  import re
6
+ from typing import Tuple
6
7
 
7
8
  import plotly.graph_objects as go
8
9
 
@@ -13,7 +14,7 @@ from validmind.vm_models import VMDataset
13
14
 
14
15
  @tags("nlp", "text_data", "visualization", "frequency_analysis")
15
16
  @tasks("text_classification", "text_summarization")
16
- def Hashtags(dataset: VMDataset, top_hashtags: int = 25):
17
+ def Hashtags(dataset: VMDataset, top_hashtags: int = 25) -> Tuple[go.Figure, RawData]:
17
18
  """
18
19
  Assesses hashtag frequency in a text column, highlighting usage trends and potential dataset bias or spam.
19
20
 
@@ -2,7 +2,10 @@
2
2
  # See the LICENSE file in the root of this repository for details.
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
+ from typing import Tuple
6
+
5
7
  import plotly.express as px
8
+ import plotly.graph_objects as go
6
9
  from langdetect import LangDetectException, detect
7
10
 
8
11
  from validmind import RawData, tags, tasks
@@ -10,7 +13,7 @@ from validmind import RawData, tags, tasks
10
13
 
11
14
  @tags("nlp", "text_data", "visualization")
12
15
  @tasks("text_classification", "text_summarization")
13
- def LanguageDetection(dataset):
16
+ def LanguageDetection(dataset) -> Tuple[go.Figure, RawData]:
14
17
  """
15
18
  Assesses the diversity of languages in a textual dataset by detecting and visualizing the distribution of languages.
16
19
 
@@ -3,9 +3,11 @@
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
5
  import re
6
+ from typing import Tuple
6
7
 
7
8
  import pandas as pd
8
9
  import plotly.express as px
10
+ import plotly.graph_objects as go
9
11
 
10
12
  from validmind import RawData, tags, tasks
11
13
  from validmind.errors import SkipTestError
@@ -14,7 +16,7 @@ from validmind.vm_models import VMDataset
14
16
 
15
17
  @tags("nlp", "text_data", "visualization", "frequency_analysis")
16
18
  @tasks("text_classification", "text_summarization")
17
- def Mentions(dataset: VMDataset, top_mentions: int = 25):
19
+ def Mentions(dataset: VMDataset, top_mentions: int = 25) -> Tuple[go.Figure, RawData]:
18
20
  """
19
21
  Calculates and visualizes frequencies of '@' prefixed mentions in a text-based dataset for NLP model analysis.
20
22
 
@@ -3,8 +3,11 @@
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
5
 
6
+ from typing import Dict, Tuple
7
+
6
8
  import pandas as pd
7
9
  import plotly.express as px
10
+ import plotly.graph_objects as go
8
11
  from textblob import TextBlob
9
12
 
10
13
  from validmind import RawData, tags, tasks
@@ -12,7 +15,9 @@ from validmind import RawData, tags, tasks
12
15
 
13
16
  @tags("nlp", "text_data", "data_validation")
14
17
  @tasks("nlp")
15
- def PolarityAndSubjectivity(dataset, threshold_subjectivity=0.5, threshold_polarity=0):
18
+ def PolarityAndSubjectivity(
19
+ dataset, threshold_subjectivity=0.5, threshold_polarity=0
20
+ ) -> Tuple[go.Figure, Dict[str, pd.DataFrame], RawData]:
16
21
  """
17
22
  Analyzes the polarity and subjectivity of text data within a given dataset to visualize the sentiment distribution.
18
23
 
@@ -8,6 +8,7 @@ Metrics functions for any Pandas-compatible datasets
8
8
 
9
9
  import string
10
10
  from collections import defaultdict
11
+ from typing import Tuple
11
12
 
12
13
  import plotly.graph_objects as go
13
14
 
@@ -16,7 +17,7 @@ from validmind import RawData, tags, tasks
16
17
 
17
18
  @tags("nlp", "text_data", "visualization", "frequency_analysis")
18
19
  @tasks("text_classification", "text_summarization", "nlp")
19
- def Punctuations(dataset, count_mode="token"):
20
+ def Punctuations(dataset, count_mode="token") -> Tuple[go.Figure, RawData]:
20
21
  """
21
22
  Analyzes and visualizes the frequency distribution of punctuation usage in a given text dataset.
22
23