validmind 2.5.6__py3-none-any.whl → 2.5.15__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (212) hide show
  1. validmind/__version__.py +1 -1
  2. validmind/ai/test_descriptions.py +26 -7
  3. validmind/api_client.py +89 -43
  4. validmind/client.py +2 -2
  5. validmind/client_config.py +11 -14
  6. validmind/datasets/regression/fred_timeseries.py +67 -138
  7. validmind/template.py +1 -0
  8. validmind/test_suites/__init__.py +0 -2
  9. validmind/test_suites/statsmodels_timeseries.py +1 -1
  10. validmind/test_suites/summarization.py +0 -1
  11. validmind/test_suites/time_series.py +0 -43
  12. validmind/tests/__types__.py +3 -13
  13. validmind/tests/data_validation/ACFandPACFPlot.py +15 -13
  14. validmind/tests/data_validation/ADF.py +31 -24
  15. validmind/tests/data_validation/AutoAR.py +9 -9
  16. validmind/tests/data_validation/AutoMA.py +23 -16
  17. validmind/tests/data_validation/AutoSeasonality.py +18 -16
  18. validmind/tests/data_validation/AutoStationarity.py +21 -16
  19. validmind/tests/data_validation/BivariateScatterPlots.py +67 -96
  20. validmind/tests/data_validation/ChiSquaredFeaturesTable.py +82 -124
  21. validmind/tests/data_validation/ClassImbalance.py +15 -12
  22. validmind/tests/data_validation/DFGLSArch.py +19 -13
  23. validmind/tests/data_validation/DatasetDescription.py +17 -11
  24. validmind/tests/data_validation/DatasetSplit.py +7 -5
  25. validmind/tests/data_validation/DescriptiveStatistics.py +28 -21
  26. validmind/tests/data_validation/Duplicates.py +33 -25
  27. validmind/tests/data_validation/EngleGrangerCoint.py +35 -33
  28. validmind/tests/data_validation/FeatureTargetCorrelationPlot.py +59 -71
  29. validmind/tests/data_validation/HighCardinality.py +19 -12
  30. validmind/tests/data_validation/HighPearsonCorrelation.py +27 -22
  31. validmind/tests/data_validation/IQROutliersBarPlot.py +13 -10
  32. validmind/tests/data_validation/IQROutliersTable.py +40 -36
  33. validmind/tests/data_validation/IsolationForestOutliers.py +21 -14
  34. validmind/tests/data_validation/KPSS.py +34 -29
  35. validmind/tests/data_validation/LaggedCorrelationHeatmap.py +22 -15
  36. validmind/tests/data_validation/MissingValues.py +32 -27
  37. validmind/tests/data_validation/MissingValuesBarPlot.py +25 -21
  38. validmind/tests/data_validation/PearsonCorrelationMatrix.py +71 -84
  39. validmind/tests/data_validation/PhillipsPerronArch.py +37 -30
  40. validmind/tests/data_validation/RollingStatsPlot.py +31 -23
  41. validmind/tests/data_validation/ScatterPlot.py +63 -78
  42. validmind/tests/data_validation/SeasonalDecompose.py +38 -34
  43. validmind/tests/data_validation/Skewness.py +35 -37
  44. validmind/tests/data_validation/SpreadPlot.py +35 -35
  45. validmind/tests/data_validation/TabularCategoricalBarPlots.py +23 -17
  46. validmind/tests/data_validation/TabularDateTimeHistograms.py +21 -13
  47. validmind/tests/data_validation/TabularDescriptionTables.py +51 -16
  48. validmind/tests/data_validation/TabularNumericalHistograms.py +25 -22
  49. validmind/tests/data_validation/TargetRateBarPlots.py +21 -14
  50. validmind/tests/data_validation/TimeSeriesDescription.py +25 -18
  51. validmind/tests/data_validation/TimeSeriesDescriptiveStatistics.py +23 -17
  52. validmind/tests/data_validation/TimeSeriesFrequency.py +24 -17
  53. validmind/tests/data_validation/TimeSeriesHistogram.py +33 -32
  54. validmind/tests/data_validation/TimeSeriesLinePlot.py +17 -10
  55. validmind/tests/data_validation/TimeSeriesMissingValues.py +15 -10
  56. validmind/tests/data_validation/TimeSeriesOutliers.py +37 -33
  57. validmind/tests/data_validation/TooManyZeroValues.py +16 -11
  58. validmind/tests/data_validation/UniqueRows.py +11 -6
  59. validmind/tests/data_validation/WOEBinPlots.py +23 -16
  60. validmind/tests/data_validation/WOEBinTable.py +35 -30
  61. validmind/tests/data_validation/ZivotAndrewsArch.py +34 -28
  62. validmind/tests/data_validation/nlp/CommonWords.py +21 -14
  63. validmind/tests/data_validation/nlp/Hashtags.py +27 -20
  64. validmind/tests/data_validation/nlp/LanguageDetection.py +33 -14
  65. validmind/tests/data_validation/nlp/Mentions.py +21 -15
  66. validmind/tests/data_validation/nlp/PolarityAndSubjectivity.py +32 -9
  67. validmind/tests/data_validation/nlp/Punctuations.py +24 -20
  68. validmind/tests/data_validation/nlp/Sentiment.py +27 -8
  69. validmind/tests/data_validation/nlp/StopWords.py +26 -19
  70. validmind/tests/data_validation/nlp/TextDescription.py +36 -35
  71. validmind/tests/data_validation/nlp/Toxicity.py +32 -9
  72. validmind/tests/decorator.py +81 -42
  73. validmind/tests/model_validation/BertScore.py +36 -27
  74. validmind/tests/model_validation/BleuScore.py +25 -19
  75. validmind/tests/model_validation/ClusterSizeDistribution.py +38 -34
  76. validmind/tests/model_validation/ContextualRecall.py +35 -13
  77. validmind/tests/model_validation/FeaturesAUC.py +32 -13
  78. validmind/tests/model_validation/MeteorScore.py +46 -33
  79. validmind/tests/model_validation/ModelMetadata.py +32 -64
  80. validmind/tests/model_validation/ModelPredictionResiduals.py +75 -73
  81. validmind/tests/model_validation/RegardScore.py +30 -14
  82. validmind/tests/model_validation/RegressionResidualsPlot.py +10 -5
  83. validmind/tests/model_validation/RougeScore.py +36 -30
  84. validmind/tests/model_validation/TimeSeriesPredictionWithCI.py +30 -14
  85. validmind/tests/model_validation/TimeSeriesPredictionsPlot.py +27 -30
  86. validmind/tests/model_validation/TimeSeriesR2SquareBySegments.py +68 -63
  87. validmind/tests/model_validation/TokenDisparity.py +31 -23
  88. validmind/tests/model_validation/ToxicityScore.py +26 -17
  89. validmind/tests/model_validation/embeddings/ClusterDistribution.py +24 -20
  90. validmind/tests/model_validation/embeddings/CosineSimilarityComparison.py +30 -27
  91. validmind/tests/model_validation/embeddings/CosineSimilarityDistribution.py +7 -5
  92. validmind/tests/model_validation/embeddings/CosineSimilarityHeatmap.py +32 -23
  93. validmind/tests/model_validation/embeddings/DescriptiveAnalytics.py +7 -5
  94. validmind/tests/model_validation/embeddings/EmbeddingsVisualization2D.py +15 -11
  95. validmind/tests/model_validation/embeddings/EuclideanDistanceComparison.py +29 -29
  96. validmind/tests/model_validation/embeddings/EuclideanDistanceHeatmap.py +34 -25
  97. validmind/tests/model_validation/embeddings/PCAComponentsPairwisePlots.py +38 -26
  98. validmind/tests/model_validation/embeddings/StabilityAnalysis.py +40 -1
  99. validmind/tests/model_validation/embeddings/StabilityAnalysisKeyword.py +18 -17
  100. validmind/tests/model_validation/embeddings/StabilityAnalysisRandomNoise.py +40 -45
  101. validmind/tests/model_validation/embeddings/StabilityAnalysisSynonyms.py +17 -19
  102. validmind/tests/model_validation/embeddings/StabilityAnalysisTranslation.py +29 -25
  103. validmind/tests/model_validation/embeddings/TSNEComponentsPairwisePlots.py +38 -28
  104. validmind/tests/model_validation/ragas/AnswerCorrectness.py +5 -4
  105. validmind/tests/model_validation/ragas/AnswerRelevance.py +5 -4
  106. validmind/tests/model_validation/ragas/AnswerSimilarity.py +5 -4
  107. validmind/tests/model_validation/ragas/AspectCritique.py +7 -0
  108. validmind/tests/model_validation/ragas/ContextEntityRecall.py +9 -8
  109. validmind/tests/model_validation/ragas/ContextPrecision.py +5 -4
  110. validmind/tests/model_validation/ragas/ContextRecall.py +5 -4
  111. validmind/tests/model_validation/ragas/Faithfulness.py +5 -4
  112. validmind/tests/model_validation/ragas/utils.py +6 -0
  113. validmind/tests/model_validation/sklearn/AdjustedMutualInformation.py +19 -12
  114. validmind/tests/model_validation/sklearn/AdjustedRandIndex.py +22 -17
  115. validmind/tests/model_validation/sklearn/ClassifierPerformance.py +27 -25
  116. validmind/tests/model_validation/sklearn/ClusterCosineSimilarity.py +7 -5
  117. validmind/tests/model_validation/sklearn/ClusterPerformance.py +40 -78
  118. validmind/tests/model_validation/sklearn/ClusterPerformanceMetrics.py +15 -17
  119. validmind/tests/model_validation/sklearn/CompletenessScore.py +17 -11
  120. validmind/tests/model_validation/sklearn/ConfusionMatrix.py +22 -15
  121. validmind/tests/model_validation/sklearn/FeatureImportance.py +95 -0
  122. validmind/tests/model_validation/sklearn/FowlkesMallowsScore.py +7 -7
  123. validmind/tests/model_validation/sklearn/HomogeneityScore.py +19 -12
  124. validmind/tests/model_validation/sklearn/HyperParametersTuning.py +35 -30
  125. validmind/tests/model_validation/sklearn/KMeansClustersOptimization.py +10 -5
  126. validmind/tests/model_validation/sklearn/MinimumAccuracy.py +32 -32
  127. validmind/tests/model_validation/sklearn/MinimumF1Score.py +23 -23
  128. validmind/tests/model_validation/sklearn/MinimumROCAUCScore.py +15 -10
  129. validmind/tests/model_validation/sklearn/ModelsPerformanceComparison.py +26 -19
  130. validmind/tests/model_validation/sklearn/OverfitDiagnosis.py +38 -18
  131. validmind/tests/model_validation/sklearn/PermutationFeatureImportance.py +31 -25
  132. validmind/tests/model_validation/sklearn/PopulationStabilityIndex.py +8 -6
  133. validmind/tests/model_validation/sklearn/PrecisionRecallCurve.py +24 -17
  134. validmind/tests/model_validation/sklearn/ROCCurve.py +12 -7
  135. validmind/tests/model_validation/sklearn/RegressionErrors.py +74 -130
  136. validmind/tests/model_validation/sklearn/RegressionErrorsComparison.py +27 -12
  137. validmind/tests/model_validation/sklearn/{RegressionModelsPerformanceComparison.py → RegressionPerformance.py} +18 -20
  138. validmind/tests/model_validation/sklearn/RegressionR2Square.py +55 -93
  139. validmind/tests/model_validation/sklearn/RegressionR2SquareComparison.py +32 -13
  140. validmind/tests/model_validation/sklearn/RobustnessDiagnosis.py +113 -73
  141. validmind/tests/model_validation/sklearn/SHAPGlobalImportance.py +7 -5
  142. validmind/tests/model_validation/sklearn/SilhouettePlot.py +27 -19
  143. validmind/tests/model_validation/sklearn/TrainingTestDegradation.py +25 -18
  144. validmind/tests/model_validation/sklearn/VMeasure.py +14 -13
  145. validmind/tests/model_validation/sklearn/WeakspotsDiagnosis.py +7 -5
  146. validmind/tests/model_validation/statsmodels/AutoARIMA.py +24 -18
  147. validmind/tests/model_validation/statsmodels/BoxPierce.py +14 -10
  148. validmind/tests/model_validation/statsmodels/CumulativePredictionProbabilities.py +73 -104
  149. validmind/tests/model_validation/statsmodels/DurbinWatsonTest.py +19 -12
  150. validmind/tests/model_validation/statsmodels/GINITable.py +44 -77
  151. validmind/tests/model_validation/statsmodels/JarqueBera.py +27 -22
  152. validmind/tests/model_validation/statsmodels/KolmogorovSmirnov.py +33 -34
  153. validmind/tests/model_validation/statsmodels/LJungBox.py +32 -28
  154. validmind/tests/model_validation/statsmodels/Lilliefors.py +27 -24
  155. validmind/tests/model_validation/statsmodels/PredictionProbabilitiesHistogram.py +87 -119
  156. validmind/tests/model_validation/statsmodels/RegressionCoeffs.py +100 -0
  157. validmind/tests/model_validation/statsmodels/RegressionFeatureSignificance.py +14 -9
  158. validmind/tests/model_validation/statsmodels/RegressionModelForecastPlot.py +17 -13
  159. validmind/tests/model_validation/statsmodels/RegressionModelForecastPlotLevels.py +46 -43
  160. validmind/tests/model_validation/statsmodels/RegressionModelSensitivityPlot.py +38 -36
  161. validmind/tests/model_validation/statsmodels/RegressionModelSummary.py +30 -28
  162. validmind/tests/model_validation/statsmodels/RegressionPermutationFeatureImportance.py +18 -11
  163. validmind/tests/model_validation/statsmodels/RunsTest.py +32 -28
  164. validmind/tests/model_validation/statsmodels/ScorecardHistogram.py +75 -107
  165. validmind/tests/model_validation/statsmodels/ShapiroWilk.py +15 -8
  166. validmind/tests/ongoing_monitoring/FeatureDrift.py +10 -6
  167. validmind/tests/ongoing_monitoring/PredictionAcrossEachFeature.py +31 -25
  168. validmind/tests/ongoing_monitoring/PredictionCorrelation.py +29 -21
  169. validmind/tests/ongoing_monitoring/TargetPredictionDistributionPlot.py +31 -23
  170. validmind/tests/prompt_validation/Bias.py +14 -11
  171. validmind/tests/prompt_validation/Clarity.py +16 -14
  172. validmind/tests/prompt_validation/Conciseness.py +7 -5
  173. validmind/tests/prompt_validation/Delimitation.py +23 -22
  174. validmind/tests/prompt_validation/NegativeInstruction.py +7 -5
  175. validmind/tests/prompt_validation/Robustness.py +12 -10
  176. validmind/tests/prompt_validation/Specificity.py +13 -11
  177. validmind/tests/prompt_validation/ai_powered_test.py +6 -0
  178. validmind/tests/run.py +68 -23
  179. validmind/unit_metrics/__init__.py +81 -144
  180. validmind/unit_metrics/classification/{sklearn/Accuracy.py → Accuracy.py} +1 -1
  181. validmind/unit_metrics/classification/{sklearn/F1.py → F1.py} +1 -1
  182. validmind/unit_metrics/classification/{sklearn/Precision.py → Precision.py} +1 -1
  183. validmind/unit_metrics/classification/{sklearn/ROC_AUC.py → ROC_AUC.py} +1 -2
  184. validmind/unit_metrics/classification/{sklearn/Recall.py → Recall.py} +1 -1
  185. validmind/unit_metrics/regression/{sklearn/AdjustedRSquaredScore.py → AdjustedRSquaredScore.py} +1 -1
  186. validmind/unit_metrics/regression/GiniCoefficient.py +1 -1
  187. validmind/unit_metrics/regression/HuberLoss.py +1 -1
  188. validmind/unit_metrics/regression/KolmogorovSmirnovStatistic.py +1 -1
  189. validmind/unit_metrics/regression/{sklearn/MeanAbsoluteError.py → MeanAbsoluteError.py} +1 -1
  190. validmind/unit_metrics/regression/MeanAbsolutePercentageError.py +1 -1
  191. validmind/unit_metrics/regression/MeanBiasDeviation.py +1 -1
  192. validmind/unit_metrics/regression/{sklearn/MeanSquaredError.py → MeanSquaredError.py} +1 -1
  193. validmind/unit_metrics/regression/QuantileLoss.py +1 -1
  194. validmind/unit_metrics/regression/{sklearn/RSquaredScore.py → RSquaredScore.py} +1 -1
  195. validmind/unit_metrics/regression/{sklearn/RootMeanSquaredError.py → RootMeanSquaredError.py} +1 -1
  196. validmind/vm_models/dataset/dataset.py +2 -0
  197. validmind/vm_models/figure.py +5 -0
  198. validmind/vm_models/test/result_wrapper.py +93 -132
  199. {validmind-2.5.6.dist-info → validmind-2.5.15.dist-info}/METADATA +1 -1
  200. {validmind-2.5.6.dist-info → validmind-2.5.15.dist-info}/RECORD +203 -210
  201. validmind/tests/data_validation/ANOVAOneWayTable.py +0 -138
  202. validmind/tests/data_validation/BivariateFeaturesBarPlots.py +0 -142
  203. validmind/tests/data_validation/BivariateHistograms.py +0 -117
  204. validmind/tests/data_validation/HeatmapFeatureCorrelations.py +0 -124
  205. validmind/tests/data_validation/MissingValuesRisk.py +0 -88
  206. validmind/tests/model_validation/ModelMetadataComparison.py +0 -59
  207. validmind/tests/model_validation/sklearn/FeatureImportanceComparison.py +0 -83
  208. validmind/tests/model_validation/statsmodels/RegressionCoeffsPlot.py +0 -135
  209. validmind/tests/model_validation/statsmodels/RegressionModelsCoeffs.py +0 -103
  210. {validmind-2.5.6.dist-info → validmind-2.5.15.dist-info}/LICENSE +0 -0
  211. {validmind-2.5.6.dist-info → validmind-2.5.15.dist-info}/WHEEL +0 -0
  212. {validmind-2.5.6.dist-info → validmind-2.5.15.dist-info}/entry_points.txt +0 -0
@@ -10,17 +10,21 @@ from validmind.vm_models import Figure, Metric
10
10
 
11
11
  class TabularCategoricalBarPlots(Metric):
12
12
  """
13
- Generates and visualizes bar plots for each category in categorical features to evaluate dataset's composition.
13
+ Generates and visualizes bar plots for each category in categorical features to evaluate the dataset's composition.
14
14
 
15
- **Purpose**: The purpose of this metric is to visually analyze categorical data using bar plots. It is intended to
16
- evaluate the dataset's composition by displaying the counts of each category in each categorical feature.
15
+ ### Purpose
17
16
 
18
- **Test Mechanism**: The provided dataset is first checked to determine if it contains any categorical variables. If
19
- no categorical columns are found, the tool raises a ValueError. For each categorical variable in the dataset, a
20
- separate bar plot is generated. The number of occurrences for each category is calculated and displayed on the
21
- plot. If a dataset contains multiple categorical columns, multiple bar plots are produced.
17
+ The purpose of this metric is to visually analyze categorical data using bar plots. It is intended to evaluate the
18
+ dataset's composition by displaying the counts of each category in each categorical feature.
22
19
 
23
- **Signs of High Risk**:
20
+ ### Test Mechanism
21
+
22
+ The provided dataset is first checked to determine if it contains any categorical variables. If no categorical
23
+ columns are found, the tool raises a ValueError. For each categorical variable in the dataset, a separate bar plot
24
+ is generated. The number of occurrences for each category is calculated and displayed on the plot. If a dataset
25
+ contains multiple categorical columns, multiple bar plots are produced.
26
+
27
+ ### Signs of High Risk
24
28
 
25
29
  - High risk could occur if the categorical variables exhibit an extreme imbalance, with categories having very few
26
30
  instances possibly being underrepresented in the model, which could affect the model's performance and its ability
@@ -28,17 +32,19 @@ class TabularCategoricalBarPlots(Metric):
28
32
  - Another sign of risk is if there are too many categories in a single variable, which could lead to overfitting
29
33
  and make the model complex.
30
34
 
31
- **Strengths**: This metric provides a visual and intuitively understandable representation of categorical data,
32
- which aids in the analysis of variable distributions. By presenting model inputs in this way, we can easily
33
- identify imbalances or rare categories that could affect the model's performance.
35
+ ### Strengths
36
+
37
+ - Provides a visual and intuitively understandable representation of categorical data.
38
+ - Aids in the analysis of variable distributions.
39
+ - Helps in easily identifying imbalances or rare categories that could affect the model's performance.
34
40
 
35
- **Limitations**:
41
+ ### Limitations
36
42
 
37
- - This method only works with categorical data, meaning it won't apply to numerical variables.
38
- - In addition, the method does not provide any informative value when there are too many categories, as the bar
39
- chart could become cluttered and hard to interpret.
40
- - It offers no insights into the model's performance or precision, but rather provides a descriptive analysis of
41
- the input.
43
+ - This method only works with categorical data and won't apply to numerical variables.
44
+ - It does not provide informative value when there are too many categories, as the bar chart could become cluttered
45
+ and hard to interpret.
46
+ - Offers no insights into the model's performance or precision, but rather provides a descriptive analysis of the
47
+ input.
42
48
  """
43
49
 
44
50
  name = "tabular_categorical_bar_plots"
@@ -10,26 +10,33 @@ from validmind.vm_models import Figure, Metric
10
10
 
11
11
  class TabularDateTimeHistograms(Metric):
12
12
  """
13
- Generates histograms to provide graphical insight into the distribution of time intervals in model's datetime data.
13
+ Generates histograms to provide graphical insight into the distribution of time intervals in a model's datetime
14
+ data.
14
15
 
15
- **Purpose**: The `TabularDateTimeHistograms` metric is designed to provide graphical insight into the distribution
16
- of time intervals in a machine learning model's datetime data. By plotting histograms of differences between
17
- consecutive date entries in all datetime variables, it enables an examination of the underlying pattern of time
18
- series data and identification of anomalies.
16
+ ### Purpose
19
17
 
20
- **Test Mechanism**: This test operates by first identifying all datetime columns and extracting them from the
21
- dataset. For each datetime column, it next computes the differences (in days) between consecutive dates, excluding
22
- zero values, and visualizes these differences in a histogram. The seaborn library's histplot function is used to
23
- generate histograms, which are labeled appropriately and provide a graphical representation of the frequency of
24
- different day intervals in the dataset.
18
+ The `TabularDateTimeHistograms` metric is designed to provide graphical insight into the distribution of time
19
+ intervals in a machine learning model's datetime data. By plotting histograms of differences between consecutive
20
+ date entries in all datetime variables, it enables an examination of the underlying pattern of time series data and
21
+ identification of anomalies.
22
+
23
+ ### Test Mechanism
24
+
25
+ This test operates by first identifying all datetime columns and extracting them from the dataset. For each
26
+ datetime column, it next computes the differences (in days) between consecutive dates, excluding zero values, and
27
+ visualizes these differences in a histogram. The Plotly library's histogram function is used to generate
28
+ histograms, which are labeled appropriately and provide a graphical representation of the frequency of different
29
+ day intervals in the dataset.
30
+
31
+ ### Signs of High Risk
25
32
 
26
- **Signs of High Risk**:
27
33
  - If no datetime columns are detected in the dataset, this would lead to a ValueError. Hence, the absence of
28
34
  datetime columns signifies a high risk.
29
35
  - A severely skewed or irregular distribution depicted in the histogram may indicate possible complications with
30
36
  the data, such as faulty timestamps or abnormalities.
31
37
 
32
- **Strengths**:
38
+ ### Strengths
39
+
33
40
  - The metric offers a visual overview of time interval frequencies within the dataset, supporting the recognition
34
41
  of inherent patterns.
35
42
  - Histogram plots can aid in the detection of potential outliers and data anomalies, contributing to an assessment
@@ -37,7 +44,8 @@ class TabularDateTimeHistograms(Metric):
37
44
  - The metric is versatile, compatible with a range of task types, including classification and regression, and can
38
45
  work with multiple datetime variables if present.
39
46
 
40
- **Limitations**:
47
+ ### Limitations
48
+
41
49
  - A major weakness of this metric is its dependence on the visual examination of data, as it does not provide a
42
50
  measurable evaluation of the model.
43
51
  - The metric might overlook complex or multi-dimensional trends in the data.
@@ -13,14 +13,17 @@ def TabularDescriptionTables(dataset):
13
13
  """
14
14
  Summarizes key descriptive statistics for numerical, categorical, and datetime variables in a dataset.
15
15
 
16
- **Purpose**: The main purpose of this metric is to gather and present the descriptive statistics of numerical,
17
- categorical, and datetime variables present in a dataset. The attributes it measures include the count, mean,
18
- minimum and maximum values, percentage of missing values, data types of fields, and unique values for categorical
19
- fields, among others.
16
+ ### Purpose
20
17
 
21
- **Test Mechanism**: The test first segregates the variables in the dataset according to their data types
22
- (numerical, categorical, or datetime). Then, it compiles summary statistics for each type of variable. The
23
- specifics of these statistics vary depending on the type of variable:
18
+ The main purpose of this metric is to gather and present the descriptive statistics of numerical, categorical, and
19
+ datetime variables present in a dataset. The attributes it measures include the count, mean, minimum and maximum
20
+ values, percentage of missing values, data types of fields, and unique values for categorical fields, among others.
21
+
22
+ ### Test Mechanism
23
+
24
+ The test first segregates the variables in the dataset according to their data types (numerical, categorical, or
25
+ datetime). Then, it compiles summary statistics for each type of variable. The specifics of these statistics vary
26
+ depending on the type of variable:
24
27
 
25
28
  - For numerical variables, the metric extracts descriptors like count, mean, minimum and maximum values, count of
26
29
  missing values, and data types.
@@ -29,14 +32,16 @@ def TabularDescriptionTables(dataset):
29
32
  - For datetime variables, it counts the number of unique values, identifies the earliest and latest dates, counts
30
33
  missing values, and identifies data types.
31
34
 
32
- **Signs of High Risk**:
35
+ ### Signs of High Risk
36
+
33
37
  - Masses of missing values in the descriptive statistics results could hint at high risk or failure, indicating
34
38
  potential data collection, integrity, and quality issues.
35
39
  - Detection of inappropriate distributions for numerical variables, like having negative values for variables that
36
40
  are always supposed to be positive.
37
41
  - Identifying inappropriate data types, like a continuous variable being encoded as a categorical type.
38
42
 
39
- **Strengths**:
43
+ ### Strengths
44
+
40
45
  - Provides a comprehensive overview of the dataset.
41
46
  - Gives a snapshot into the essence of the numerical, categorical, and datetime fields.
42
47
  - Identifies potential data quality issues such as missing values or inconsistencies crucial for building credible
@@ -44,7 +49,8 @@ def TabularDescriptionTables(dataset):
44
49
  - The metadata, including the data type and missing value information, are vital for anyone including data
45
50
  scientists dealing with the dataset before the modeling process.
46
51
 
47
- **Limitations**:
52
+ ### Limitations
53
+
48
54
  - It does not perform any deeper statistical analysis or tests on the data.
49
55
  - It does not handle issues such as outliers, or relationships between variables.
50
56
  - It offers no insights into potential correlations or possible interactions between variables.
@@ -57,15 +63,44 @@ def TabularDescriptionTables(dataset):
57
63
  categorical_fields = get_categorical_columns(dataset)
58
64
  datetime_fields = get_datetime_columns(dataset)
59
65
 
60
- summary_stats_numerical = get_summary_statistics_numerical(
61
- dataset, numerical_fields
66
+ summary_stats_numerical = (
67
+ get_summary_statistics_numerical(dataset, numerical_fields)
68
+ if numerical_fields
69
+ else pd.DataFrame()
70
+ )
71
+ summary_stats_categorical = (
72
+ get_summary_statistics_categorical(dataset, categorical_fields)
73
+ if categorical_fields
74
+ else pd.DataFrame()
75
+ )
76
+ summary_stats_datetime = (
77
+ get_summary_statistics_datetime(dataset, datetime_fields)
78
+ if datetime_fields
79
+ else pd.DataFrame()
80
+ )
81
+
82
+ # Replace empty DataFrames with None
83
+ summary_stats_numerical = (
84
+ summary_stats_numerical if not summary_stats_numerical.empty else None
62
85
  )
63
- summary_stats_categorical = get_summary_statistics_categorical(
64
- dataset, categorical_fields
86
+ summary_stats_categorical = (
87
+ summary_stats_categorical if not summary_stats_categorical.empty else None
88
+ )
89
+ summary_stats_datetime = (
90
+ summary_stats_datetime if not summary_stats_datetime.empty else None
65
91
  )
66
- summary_stats_datetime = get_summary_statistics_datetime(dataset, datetime_fields)
67
92
 
68
- return (summary_stats_numerical, summary_stats_categorical, summary_stats_datetime)
93
+ # Return a tuple with only non-None values (tables with data)
94
+ return tuple(
95
+ filter(
96
+ lambda x: x is not None,
97
+ (
98
+ summary_stats_numerical,
99
+ summary_stats_categorical,
100
+ summary_stats_datetime,
101
+ ),
102
+ )
103
+ )
69
104
 
70
105
 
71
106
  def get_summary_statistics_numerical(dataset, numerical_fields):
@@ -13,39 +13,42 @@ class TabularNumericalHistograms(Metric):
13
13
  Generates histograms for each numerical feature in a dataset to provide visual insights into data distribution and
14
14
  detect potential issues.
15
15
 
16
- **Purpose**: The purpose of this test is to provide visual analysis of numerical data through the generation of
17
- histograms for each numerical feature in the dataset. Histograms aid in the exploratory analysis of data, offering
18
- insight into the distribution of the data, skewness, presence of outliers, and central tendencies. It helps in
19
- understanding if the inputs to the model are normally distributed which is a common assumption in many machine
20
- learning algorithms.
16
+ ### Purpose
21
17
 
22
- **Test Mechanism**: This test scans the provided dataset and extracts all the numerical columns. For each numerical
23
- column, it constructs a histogram using plotly, with 50 bins. The deployment of histograms offers a robust visual
24
- aid, ensuring unruffled identification and understanding of numerical data distribution patterns.
18
+ The purpose of this test is to provide visual analysis of numerical data through the generation of histograms for
19
+ each numerical feature in the dataset. Histograms aid in the exploratory analysis of data, offering insight into
20
+ the distribution of the data, skewness, presence of outliers, and central tendencies. It helps in understanding if
21
+ the inputs to the model are normally distributed, which is a common assumption in many machine learning algorithms.
25
22
 
26
- **Signs of High Risk**:
23
+ ### Test Mechanism
24
+
25
+ This test scans the provided dataset and extracts all the numerical columns. For each numerical column, it
26
+ constructs a histogram using plotly, with 50 bins. The deployment of histograms offers a robust visual aid,
27
+ ensuring unruffled identification and understanding of numerical data distribution patterns.
28
+
29
+ ### Signs of High Risk
27
30
 
28
31
  - A high degree of skewness
29
32
  - Unexpected data distributions
30
33
  - Existence of extreme outliers in the histograms
34
+
31
35
  These may indicate issues with the data that the model is receiving. If data for a numerical feature is expected to
32
- follow a certain distribution (like normal distribution) but does not, it could lead to sub-par performance by the
33
- model. As such these instances should be treated as high-risk indicators.
36
+ follow a certain distribution (like a normal distribution) but does not, it could lead to sub-par performance by
37
+ the model. As such these instances should be treated as high-risk indicators.
34
38
 
35
- **Strengths**:
39
+ ### Strengths
36
40
 
37
- - This test provides a simple, easy-to-interpret visualization of how data for each numerical attribute is
38
- distributed.
39
- - It can help detect skewed values and outliers, that could potentially harm the AI model's performance.
40
- - It can be applied to large datasets and multiple numerical variables conveniently.
41
+ - Provides a simple, easy-to-interpret visualization of how data for each numerical attribute is distributed.
42
+ - Helps detect skewed values and outliers that could potentially harm the AI model's performance.
43
+ - Can be applied to large datasets and multiple numerical variables conveniently.
41
44
 
42
- **Limitations**:
45
+ ### Limitations
43
46
 
44
- - This test only works with numerical data, thus ignoring non-numerical or categorical data.
45
- - It does not analyze relationships between different features, only the individual feature distributions.
46
- - It is a univariate analysis, and may miss patterns or anomalies that only appear when considering multiple
47
- variables together.
48
- - It does not provide any insight into how these features affect the output of the model; it is purely an input
47
+ - Only works with numerical data, thus ignoring non-numerical or categorical data.
48
+ - Does not analyze relationships between different features, only the individual feature distributions.
49
+ - Is a univariate analysis and may miss patterns or anomalies that only appear when considering multiple variables
50
+ together.
51
+ - Does not provide any insight into how these features affect the output of the model; it is purely an input
49
52
  analysis tool.
50
53
  """
51
54
 
@@ -13,29 +13,36 @@ class TargetRateBarPlots(Metric):
13
13
  Generates bar plots visualizing the default rates of categorical features for a classification machine learning
14
14
  model.
15
15
 
16
- **Purpose**: This test, implemented as a metric, is designed to provide an intuitive, graphical summary of the
17
- decision-making patterns exhibited by a categorical classification machine learning model. The model's performance
18
- is evaluated using bar plots depicting the ratio of target rates—meaning the proportion of positive classes—for
19
- different categorical inputs. This allows for an easy, at-a-glance understanding of the model's accuracy.
20
-
21
- **Test Mechanism**: The test involves creating a pair of bar plots for each categorical feature in the dataset. The
22
- first plot depicts the frequency of each category in the dataset, with each category visually distinguished by its
23
- unique color. The second plot shows the mean target rate of each category (sourced from the "default_column").
24
- Plotly, a Python library, is used to generate these plots, with distinct plots created for each feature. If no
25
- specific columns are selected, the test will generate plots for each categorical column in the dataset.
26
-
27
- **Signs of High Risk**:
16
+ ### Purpose
17
+
18
+ This test, implemented as a metric, is designed to provide an intuitive, graphical summary of the decision-making
19
+ patterns exhibited by a categorical classification machine learning model. The model's performance is evaluated
20
+ using bar plots depicting the ratio of target rates—meaning the proportion of positive classes—for different
21
+ categorical inputs. This allows for an easy, at-a-glance understanding of the model's accuracy.
22
+
23
+ ### Test Mechanism
24
+
25
+ The test involves creating a pair of bar plots for each categorical feature in the dataset. The first plot depicts
26
+ the frequency of each category in the dataset, with each category visually distinguished by its unique color. The
27
+ second plot shows the mean target rate of each category (sourced from the "default_column"). Plotly, a Python
28
+ library, is used to generate these plots, with distinct plots created for each feature. If no specific columns are
29
+ selected, the test will generate plots for each categorical column in the dataset.
30
+
31
+ ### Signs of High Risk
32
+
28
33
  - Inconsistent or non-binary values in the "default_column" could complicate or render impossible the calculation
29
34
  of average target rates.
30
35
  - Particularly low or high target rates for a specific category might suggest that the model is misclassifying
31
36
  instances of that category.
32
37
 
33
- **Strengths**:
38
+ ### Strengths
39
+
34
40
  - This test offers a visually interpretable breakdown of the model's decisions, providing an easy way to spot
35
41
  irregularities, inconsistencies, or patterns.
36
42
  - Its flexibility allows for the inspection of one or multiple columns, as needed.
37
43
 
38
- **Limitations**:
44
+ ### Limitations
45
+
39
46
  - The test is less useful when dealing with numeric or continuous data, as it's designed specifically for
40
47
  categorical features.
41
48
  - If the model in question is dealing with a multi-class problem rather than binary classification, the test's
@@ -11,31 +11,38 @@ from validmind import tags, tasks
11
11
  @tasks("regression")
12
12
  def TimeSeriesDescription(dataset):
13
13
  """
14
- Generates a detailed analysis for the provided time series dataset.
14
+ Generates a detailed analysis for the provided time series dataset, summarizing key statistics to identify trends,
15
+ patterns, and data quality issues.
15
16
 
16
- **Purpose**: The purpose of the TimeSeriesDescription function is to analyze an individual time series
17
- by providing a summary of key statistics. This helps in understanding trends, patterns, and data quality issues
18
- within the time series.
17
+ ### Purpose
19
18
 
20
- **Test Mechanism**: The function extracts the time series data and provides a summary of key statistics.
21
- The dataset is expected to have a datetime index. The function checks this and raises an error if the index is
22
- not in datetime format. For each variable (column) in the dataset, appropriate statistics including start date,
23
- end date, frequency, number of missing values, count, min, and max values are calculated.
19
+ The TimeSeriesDescription function aims to analyze an individual time series by providing a summary of key
20
+ statistics. This helps in understanding trends, patterns, and data quality issues within the time series.
21
+
22
+ ### Test Mechanism
23
+
24
+ The function extracts the time series data and provides a summary of key statistics. The dataset is expected to
25
+ have a datetime index. The function checks this and raises an error if the index is not in datetime format. For
26
+ each variable (column) in the dataset, appropriate statistics including start date, end date, frequency, number of
27
+ missing values, count, min, and max values are calculated.
28
+
29
+ ### Signs of High Risk
24
30
 
25
- **Signs of High Risk**:
26
31
  - If the index of the dataset is not in datetime format, it could lead to errors in time-series analysis.
27
32
  - Inconsistent or missing data within the dataset might affect the analysis of trends and patterns.
28
33
 
29
- **Strengths**:
30
- - This function provides a comprehensive summary of key statistics for each variable, helping to identify data quality
31
- issues such as missing values.
32
- - The function helps in understanding the distribution and range of the data by including min and max values.
34
+ ### Strengths
35
+
36
+ - Provides a comprehensive summary of key statistics for each variable, helping to identify data quality issues
37
+ such as missing values.
38
+ - Helps in understanding the distribution and range of the data by including min and max values.
39
+
40
+ ### Limitations
33
41
 
34
- **Limitations**:
35
- - This function assumes that the dataset is provided as a DataFrameDataset object with a .df attribute to access
36
- the pandas DataFrame.
37
- - It only analyzes datasets with a datetime index and will raise an error for other types of indices.
38
- - The function does not handle large datasets efficiently, and performance may degrade with very large datasets.
42
+ - Assumes that the dataset is provided as a DataFrameDataset object with a .df attribute to access the pandas
43
+ DataFrame.
44
+ - Only analyzes datasets with a datetime index and will raise an error for other types of indices.
45
+ - Does not handle large datasets efficiently; performance may degrade with very large datasets.
39
46
  """
40
47
 
41
48
  summary = []
@@ -12,30 +12,36 @@ from validmind import tags, tasks
12
12
  @tasks("regression")
13
13
  def TimeSeriesDescriptiveStatistics(dataset):
14
14
  """
15
- Generates a detailed table of descriptive statistics for the provided time series dataset.
15
+ Evaluates the descriptive statistics of a time series dataset to identify trends, patterns, and data quality issues.
16
16
 
17
- **Purpose**: The purpose of the TimeSeriesDescriptiveStatistics function is to analyze an individual time series
18
- by providing a summary of key descriptive statistics. This helps in understanding trends, patterns, and data quality issues
19
- within the time series.
17
+ ### Purpose
20
18
 
21
- **Test Mechanism**: The function extracts the time series data and provides a summary of key descriptive statistics.
22
- The dataset is expected to have a datetime index. The function checks this and raises an error if the index is
23
- not in datetime format. For each variable (column) in the dataset, appropriate statistics including start date,
24
- end date, min, mean, max, skewness, kurtosis, and count are calculated.
19
+ The purpose of the TimeSeriesDescriptiveStatistics function is to analyze an individual time series by providing a
20
+ summary of key descriptive statistics. This analysis helps in understanding trends, patterns, and data quality
21
+ issues within the time series dataset.
22
+
23
+ ### Test Mechanism
24
+
25
+ The function extracts the time series data and provides a summary of key descriptive statistics. The dataset is
26
+ expected to have a datetime index, and the function will check this and raise an error if the index is not in a
27
+ datetime format. For each variable (column) in the dataset, appropriate statistics, including start date, end date,
28
+ min, mean, max, skewness, kurtosis, and count, are calculated.
29
+
30
+ ### Signs of High Risk
25
31
 
26
- **Signs of High Risk**:
27
32
  - If the index of the dataset is not in datetime format, it could lead to errors in time-series analysis.
28
33
  - Inconsistent or missing data within the dataset might affect the analysis of trends and patterns.
29
34
 
30
- **Strengths**:
31
- - This function provides a comprehensive summary of key descriptive statistics for each variable, helping to identify data quality
32
- issues and understand the distribution of the data.
35
+ ### Strengths
36
+
37
+ - Provides a comprehensive summary of key descriptive statistics for each variable.
38
+ - Helps identify data quality issues and understand the distribution of the data.
39
+
40
+ ### Limitations
33
41
 
34
- **Limitations**:
35
- - This function assumes that the dataset is provided as a DataFrameDataset object with a .df attribute to access
36
- the pandas DataFrame.
37
- - It only analyzes datasets with a datetime index and will raise an error for other types of indices.
38
- - The function does not handle large datasets efficiently, and performance may degrade with very large datasets.
42
+ - Assumes the dataset is provided as a DataFrameDataset object with a .df attribute to access the pandas DataFrame.
43
+ - Only analyzes datasets with a datetime index and will raise an error for other types of indices.
44
+ - Does not handle large datasets efficiently, and performance may degrade with very large datasets.
39
45
  """
40
46
 
41
47
  summary = []
@@ -22,34 +22,41 @@ class TimeSeriesFrequency(ThresholdTest):
22
22
  """
23
23
  Evaluates consistency of time series data frequency and generates a frequency plot.
24
24
 
25
- **Purpose**: The purpose of the TimeSeriesFrequency test is to evaluate the consistency in the frequency of data
26
- points in a time-series dataset. This test inspects the intervals or duration between each data point to determine
27
- if a fixed pattern (such as daily, weekly, or monthly) exists. The identification of such patterns is crucial to
28
- time-series analysis as any irregularities could lead to erroneous results and hinder the model's capacity for
29
- identifying trends and patterns.
30
-
31
- **Test Mechanism**: Initially, the test checks if the dataframe index is in datetime format. Subsequently, it
32
- utilizes pandas' `infer_freq` method to identify the frequency of each data series within the dataframe. The
33
- `infer_freq` method attempts to establish the frequency of a time series and returns both the frequency string and
34
- a dictionary relating these strings to their respective labels. The test compares the frequencies of all datasets.
35
- If they share a common frequency, the test passes, but it fails if they do not. Additionally, Plotly is used to
36
- create a frequency plot, offering a visual depiction of the time differences between consecutive entries in the
37
- dataframe index.
38
-
39
- **Signs of High Risk**:
25
+ ### Purpose
26
+
27
+ The purpose of the TimeSeriesFrequency test is to evaluate the consistency in the frequency of data points in a
28
+ time-series dataset. This test inspects the intervals or duration between each data point to determine if a fixed
29
+ pattern (such as daily, weekly, or monthly) exists. The identification of such patterns is crucial to time-series
30
+ analysis as any irregularities could lead to erroneous results and hinder the model's capacity for identifying
31
+ trends and patterns.
32
+
33
+ ### Test Mechanism
34
+
35
+ Initially, the test checks if the dataframe index is in datetime format. Subsequently, it utilizes pandas'
36
+ `infer_freq` method to identify the frequency of each data series within the dataframe. The `infer_freq` method
37
+ attempts to establish the frequency of a time series and returns both the frequency string and a dictionary
38
+ relating these strings to their respective labels. The test compares the frequencies of all datasets. If they share
39
+ a common frequency, the test passes, but it fails if they do not. Additionally, Plotly is used to create a
40
+ frequency plot, offering a visual depiction of the time differences between consecutive entries in the dataframe
41
+ index.
42
+
43
+ ### Signs of High Risk
44
+
40
45
  - The test fails, indicating multiple unique frequencies within the dataset. This failure could suggest irregular
41
46
  intervals between observations, potentially interrupting pattern recognition or trend analysis.
42
47
  - The presence of missing or null frequencies could be an indication of inconsistencies in data or gaps within the
43
48
  data collection process.
44
49
 
45
- **Strengths**:
50
+ ### Strengths
51
+
46
52
  - This test uses a systematic approach to checking the consistency of data frequency within a time-series dataset.
47
53
  - It increases the model's reliability by asserting the consistency of observations over time, an essential factor
48
54
  in time-series analysis.
49
55
  - The test generates a visual plot, providing an intuitive representation of the dataset's frequency distribution,
50
56
  which caters to visual learners and aids in interpretation and explanation.
51
57
 
52
- **Limitations**:
58
+ ### Limitations
59
+
53
60
  - This test is only applicable to time-series datasets and hence not suitable for other types of datasets.
54
61
  - The `infer_freq` method might not always correctly infer frequency when faced with missing or irregular data
55
62
  points.
@@ -13,39 +13,40 @@ def TimeSeriesHistogram(dataset, nbins=30):
13
13
  """
14
14
  Visualizes distribution of time-series data using histograms and Kernel Density Estimation (KDE) lines.
15
15
 
16
- **Purpose**: The purpose of this metric is to perform a histogram analysis on time-series data. It primarily
17
- assesses the distribution of values within a dataset over a period of time, typically used for regression tasks.
18
- The types of data that this metric can be applicable to are diverse, ranging from internet traffic and stock prices
19
- to weather data. This analysis provides valuable insights into the probability distribution, skewness, and peakness
20
- (kurtosis) underlying the data.
21
-
22
- **Test Mechanism**: This test operates on a specific column within the dataset that is required to have a datetime
23
- type index. It goes through each column in the given dataset, creating a histogram with Plotly's histplot
24
- function. In cases where the dataset includes more than one time-series (i.e., more than one column with a datetime
25
- type index), a distinct histogram is plotted for each series. Additionally, a kernel density estimate (KDE) line is
26
- drawn for each histogram, providing a visualization of the data's underlying probability distribution. The x and
27
- y-axis labels are purposely hidden to concentrate solely on the data distribution.
28
-
29
- **Signs of High Risk**:
16
+ ### Purpose
17
+
18
+ The TimeSeriesHistogram test aims to perform a histogram analysis on time-series data to assess the distribution of
19
+ values within a dataset over time. This test is useful for regression tasks and can be applied to various types of
20
+ data, such as internet traffic, stock prices, and weather data, providing insights into the probability
21
+ distribution, skewness, and kurtosis of the dataset.
22
+
23
+ ### Test Mechanism
24
+
25
+ This test operates on a specific column within the dataset that must have a datetime type index. For each column in
26
+ the dataset, a histogram is created using Plotly's histplot function. If the dataset includes more than one
27
+ time-series, a distinct histogram is plotted for each series. Additionally, a Kernel Density Estimate (KDE) line is
28
+ drawn for each histogram, visualizing the data's underlying probability distribution. The x and y-axis labels are
29
+ hidden to focus solely on the data distribution.
30
+
31
+ ### Signs of High Risk
32
+
30
33
  - The dataset lacks a column with a datetime type index.
31
34
  - The specified columns do not exist within the dataset.
32
- - The data distribution within the histogram demonstrates high degrees of skewness or kurtosis, which could bias
33
- the model.
34
- - Outliers that differ significantly from the primary data distribution are present.
35
-
36
- **Strengths**:
37
- - It serves as a visual diagnostic tool, offering an ideal starting point for understanding the overall behavior
38
- and distribution trends within the dataset.
39
- - It is effective for both single and multiple time-series data analysis.
40
- - The Kernel Density Estimation (KDE) line provides a smooth estimate of the overall trend in data distribution.
41
-
42
- **Limitations**:
43
- - The metric only presents a high-level view of data distribution and does not offer specific numeric measures such
44
- as skewness or kurtosis.
45
- - The histogram does not display precise data values; due to the data grouping into bins, some detail is inevitably
46
- lost, marking a trade-off between precision and general overview.
47
- - The histogram cannot handle non-numeric data columns.
48
- - The histogram's shape may be sensitive to the number of bins used.
35
+ - High skewness or kurtosis in the data distribution, indicating potential bias.
36
+ - Presence of significant outliers in the data distribution.
37
+
38
+ ### Strengths
39
+
40
+ - Serves as a visual diagnostic tool for understanding data behavior and distribution trends.
41
+ - Effective for analyzing both single and multiple time-series data.
42
+ - KDE line provides a smooth estimate of the overall trend in data distribution.
43
+
44
+ ### Limitations
45
+
46
+ - Provides a high-level view without specific numeric measures such as skewness or kurtosis.
47
+ - The histogram loses some detail due to binning of data values.
48
+ - Cannot handle non-numeric data columns.
49
+ - Histogram shape may be sensitive to the number of bins used.
49
50
  """
50
51
 
51
52
  df = dataset.df
@@ -62,7 +63,7 @@ def TimeSeriesHistogram(dataset, nbins=30):
62
63
  )
63
64
  fig.update_layout(
64
65
  title={
65
- "text": f"Histogram for {col}",
66
+ "text": f"{col}",
66
67
  "y": 0.9,
67
68
  "x": 0.5,
68
69
  "xanchor": "center",