validmind 2.5.8__py3-none-any.whl → 2.5.18__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (233) hide show
  1. validmind/__version__.py +1 -1
  2. validmind/ai/test_descriptions.py +80 -119
  3. validmind/ai/test_result_description/config.yaml +29 -0
  4. validmind/ai/test_result_description/context.py +73 -0
  5. validmind/ai/test_result_description/image_processing.py +124 -0
  6. validmind/ai/test_result_description/system.jinja +39 -0
  7. validmind/ai/test_result_description/user.jinja +25 -0
  8. validmind/api_client.py +89 -43
  9. validmind/client.py +2 -2
  10. validmind/client_config.py +11 -14
  11. validmind/datasets/credit_risk/__init__.py +1 -0
  12. validmind/datasets/credit_risk/datasets/lending_club_biased.csv.gz +0 -0
  13. validmind/datasets/credit_risk/lending_club_bias.py +142 -0
  14. validmind/datasets/regression/fred_timeseries.py +67 -138
  15. validmind/template.py +1 -0
  16. validmind/test_suites/__init__.py +0 -2
  17. validmind/test_suites/statsmodels_timeseries.py +1 -1
  18. validmind/test_suites/summarization.py +0 -1
  19. validmind/test_suites/time_series.py +0 -43
  20. validmind/tests/__types__.py +14 -15
  21. validmind/tests/data_validation/ACFandPACFPlot.py +15 -13
  22. validmind/tests/data_validation/ADF.py +31 -24
  23. validmind/tests/data_validation/AutoAR.py +9 -9
  24. validmind/tests/data_validation/AutoMA.py +23 -16
  25. validmind/tests/data_validation/AutoSeasonality.py +18 -16
  26. validmind/tests/data_validation/AutoStationarity.py +21 -16
  27. validmind/tests/data_validation/BivariateScatterPlots.py +67 -96
  28. validmind/tests/{model_validation/statsmodels → data_validation}/BoxPierce.py +34 -34
  29. validmind/tests/data_validation/ChiSquaredFeaturesTable.py +85 -124
  30. validmind/tests/data_validation/ClassImbalance.py +15 -12
  31. validmind/tests/data_validation/DFGLSArch.py +19 -13
  32. validmind/tests/data_validation/DatasetDescription.py +17 -11
  33. validmind/tests/data_validation/DatasetSplit.py +7 -5
  34. validmind/tests/data_validation/DescriptiveStatistics.py +28 -21
  35. validmind/tests/data_validation/Duplicates.py +33 -25
  36. validmind/tests/data_validation/EngleGrangerCoint.py +35 -33
  37. validmind/tests/data_validation/FeatureTargetCorrelationPlot.py +59 -71
  38. validmind/tests/data_validation/HighCardinality.py +19 -12
  39. validmind/tests/data_validation/HighPearsonCorrelation.py +27 -22
  40. validmind/tests/data_validation/IQROutliersBarPlot.py +13 -10
  41. validmind/tests/data_validation/IQROutliersTable.py +40 -36
  42. validmind/tests/data_validation/IsolationForestOutliers.py +21 -14
  43. validmind/tests/data_validation/JarqueBera.py +70 -0
  44. validmind/tests/data_validation/KPSS.py +34 -29
  45. validmind/tests/data_validation/LJungBox.py +66 -0
  46. validmind/tests/data_validation/LaggedCorrelationHeatmap.py +22 -15
  47. validmind/tests/data_validation/MissingValues.py +32 -27
  48. validmind/tests/data_validation/MissingValuesBarPlot.py +25 -21
  49. validmind/tests/data_validation/PearsonCorrelationMatrix.py +71 -84
  50. validmind/tests/data_validation/PhillipsPerronArch.py +37 -30
  51. validmind/tests/data_validation/ProtectedClassesCombination.py +197 -0
  52. validmind/tests/data_validation/ProtectedClassesDescription.py +130 -0
  53. validmind/tests/data_validation/ProtectedClassesDisparity.py +133 -0
  54. validmind/tests/data_validation/ProtectedClassesThresholdOptimizer.py +172 -0
  55. validmind/tests/data_validation/RollingStatsPlot.py +31 -23
  56. validmind/tests/data_validation/RunsTest.py +72 -0
  57. validmind/tests/data_validation/ScatterPlot.py +63 -78
  58. validmind/tests/data_validation/SeasonalDecompose.py +38 -34
  59. validmind/tests/{model_validation/statsmodels → data_validation}/ShapiroWilk.py +35 -30
  60. validmind/tests/data_validation/Skewness.py +35 -37
  61. validmind/tests/data_validation/SpreadPlot.py +35 -35
  62. validmind/tests/data_validation/TabularCategoricalBarPlots.py +23 -17
  63. validmind/tests/data_validation/TabularDateTimeHistograms.py +21 -13
  64. validmind/tests/data_validation/TabularDescriptionTables.py +51 -16
  65. validmind/tests/data_validation/TabularNumericalHistograms.py +25 -22
  66. validmind/tests/data_validation/TargetRateBarPlots.py +21 -14
  67. validmind/tests/data_validation/TimeSeriesDescription.py +25 -18
  68. validmind/tests/data_validation/TimeSeriesDescriptiveStatistics.py +23 -17
  69. validmind/tests/data_validation/TimeSeriesFrequency.py +24 -17
  70. validmind/tests/data_validation/TimeSeriesHistogram.py +33 -32
  71. validmind/tests/data_validation/TimeSeriesLinePlot.py +17 -10
  72. validmind/tests/data_validation/TimeSeriesMissingValues.py +15 -10
  73. validmind/tests/data_validation/TimeSeriesOutliers.py +37 -33
  74. validmind/tests/data_validation/TooManyZeroValues.py +16 -11
  75. validmind/tests/data_validation/UniqueRows.py +11 -6
  76. validmind/tests/data_validation/WOEBinPlots.py +23 -16
  77. validmind/tests/data_validation/WOEBinTable.py +35 -30
  78. validmind/tests/data_validation/ZivotAndrewsArch.py +34 -28
  79. validmind/tests/data_validation/nlp/CommonWords.py +21 -14
  80. validmind/tests/data_validation/nlp/Hashtags.py +42 -40
  81. validmind/tests/data_validation/nlp/LanguageDetection.py +33 -14
  82. validmind/tests/data_validation/nlp/Mentions.py +21 -15
  83. validmind/tests/data_validation/nlp/PolarityAndSubjectivity.py +32 -9
  84. validmind/tests/data_validation/nlp/Punctuations.py +24 -20
  85. validmind/tests/data_validation/nlp/Sentiment.py +27 -8
  86. validmind/tests/data_validation/nlp/StopWords.py +26 -19
  87. validmind/tests/data_validation/nlp/TextDescription.py +39 -36
  88. validmind/tests/data_validation/nlp/Toxicity.py +32 -9
  89. validmind/tests/decorator.py +81 -42
  90. validmind/tests/model_validation/BertScore.py +36 -27
  91. validmind/tests/model_validation/BleuScore.py +25 -19
  92. validmind/tests/model_validation/ClusterSizeDistribution.py +38 -34
  93. validmind/tests/model_validation/ContextualRecall.py +38 -13
  94. validmind/tests/model_validation/FeaturesAUC.py +32 -13
  95. validmind/tests/model_validation/MeteorScore.py +46 -33
  96. validmind/tests/model_validation/ModelMetadata.py +32 -64
  97. validmind/tests/model_validation/ModelPredictionResiduals.py +75 -73
  98. validmind/tests/model_validation/RegardScore.py +30 -14
  99. validmind/tests/model_validation/RegressionResidualsPlot.py +10 -5
  100. validmind/tests/model_validation/RougeScore.py +36 -30
  101. validmind/tests/model_validation/TimeSeriesPredictionWithCI.py +30 -14
  102. validmind/tests/model_validation/TimeSeriesPredictionsPlot.py +27 -30
  103. validmind/tests/model_validation/TimeSeriesR2SquareBySegments.py +68 -63
  104. validmind/tests/model_validation/TokenDisparity.py +31 -23
  105. validmind/tests/model_validation/ToxicityScore.py +26 -17
  106. validmind/tests/model_validation/embeddings/ClusterDistribution.py +24 -20
  107. validmind/tests/model_validation/embeddings/CosineSimilarityComparison.py +30 -27
  108. validmind/tests/model_validation/embeddings/CosineSimilarityDistribution.py +7 -5
  109. validmind/tests/model_validation/embeddings/CosineSimilarityHeatmap.py +32 -23
  110. validmind/tests/model_validation/embeddings/DescriptiveAnalytics.py +7 -5
  111. validmind/tests/model_validation/embeddings/EmbeddingsVisualization2D.py +15 -11
  112. validmind/tests/model_validation/embeddings/EuclideanDistanceComparison.py +29 -29
  113. validmind/tests/model_validation/embeddings/EuclideanDistanceHeatmap.py +34 -25
  114. validmind/tests/model_validation/embeddings/PCAComponentsPairwisePlots.py +38 -26
  115. validmind/tests/model_validation/embeddings/StabilityAnalysis.py +40 -1
  116. validmind/tests/model_validation/embeddings/StabilityAnalysisKeyword.py +18 -17
  117. validmind/tests/model_validation/embeddings/StabilityAnalysisRandomNoise.py +40 -45
  118. validmind/tests/model_validation/embeddings/StabilityAnalysisSynonyms.py +17 -19
  119. validmind/tests/model_validation/embeddings/StabilityAnalysisTranslation.py +29 -25
  120. validmind/tests/model_validation/embeddings/TSNEComponentsPairwisePlots.py +38 -28
  121. validmind/tests/model_validation/ragas/AnswerCorrectness.py +5 -4
  122. validmind/tests/model_validation/ragas/AnswerRelevance.py +5 -4
  123. validmind/tests/model_validation/ragas/AnswerSimilarity.py +5 -4
  124. validmind/tests/model_validation/ragas/AspectCritique.py +12 -6
  125. validmind/tests/model_validation/ragas/ContextEntityRecall.py +9 -8
  126. validmind/tests/model_validation/ragas/ContextPrecision.py +5 -4
  127. validmind/tests/model_validation/ragas/ContextRecall.py +5 -4
  128. validmind/tests/model_validation/ragas/ContextUtilization.py +155 -0
  129. validmind/tests/model_validation/ragas/Faithfulness.py +5 -4
  130. validmind/tests/model_validation/ragas/NoiseSensitivity.py +152 -0
  131. validmind/tests/model_validation/ragas/utils.py +6 -0
  132. validmind/tests/model_validation/sklearn/AdjustedMutualInformation.py +19 -12
  133. validmind/tests/model_validation/sklearn/AdjustedRandIndex.py +22 -17
  134. validmind/tests/model_validation/sklearn/ClassifierPerformance.py +27 -25
  135. validmind/tests/model_validation/sklearn/ClusterCosineSimilarity.py +7 -5
  136. validmind/tests/model_validation/sklearn/ClusterPerformance.py +40 -78
  137. validmind/tests/model_validation/sklearn/ClusterPerformanceMetrics.py +15 -17
  138. validmind/tests/model_validation/sklearn/CompletenessScore.py +17 -11
  139. validmind/tests/model_validation/sklearn/ConfusionMatrix.py +22 -15
  140. validmind/tests/model_validation/sklearn/FeatureImportance.py +95 -0
  141. validmind/tests/model_validation/sklearn/FowlkesMallowsScore.py +7 -7
  142. validmind/tests/model_validation/sklearn/HomogeneityScore.py +19 -12
  143. validmind/tests/model_validation/sklearn/HyperParametersTuning.py +35 -30
  144. validmind/tests/model_validation/sklearn/KMeansClustersOptimization.py +10 -5
  145. validmind/tests/model_validation/sklearn/MinimumAccuracy.py +32 -32
  146. validmind/tests/model_validation/sklearn/MinimumF1Score.py +23 -23
  147. validmind/tests/model_validation/sklearn/MinimumROCAUCScore.py +15 -10
  148. validmind/tests/model_validation/sklearn/ModelsPerformanceComparison.py +26 -19
  149. validmind/tests/model_validation/sklearn/OverfitDiagnosis.py +38 -18
  150. validmind/tests/model_validation/sklearn/PermutationFeatureImportance.py +32 -26
  151. validmind/tests/model_validation/sklearn/PopulationStabilityIndex.py +8 -6
  152. validmind/tests/model_validation/sklearn/PrecisionRecallCurve.py +24 -17
  153. validmind/tests/model_validation/sklearn/ROCCurve.py +12 -7
  154. validmind/tests/model_validation/sklearn/RegressionErrors.py +74 -130
  155. validmind/tests/model_validation/sklearn/RegressionErrorsComparison.py +27 -12
  156. validmind/tests/model_validation/sklearn/{RegressionModelsPerformanceComparison.py → RegressionPerformance.py} +18 -20
  157. validmind/tests/model_validation/sklearn/RegressionR2Square.py +55 -94
  158. validmind/tests/model_validation/sklearn/RegressionR2SquareComparison.py +32 -13
  159. validmind/tests/model_validation/sklearn/RobustnessDiagnosis.py +36 -32
  160. validmind/tests/model_validation/sklearn/SHAPGlobalImportance.py +66 -5
  161. validmind/tests/model_validation/sklearn/SilhouettePlot.py +27 -19
  162. validmind/tests/model_validation/sklearn/TrainingTestDegradation.py +25 -18
  163. validmind/tests/model_validation/sklearn/VMeasure.py +14 -13
  164. validmind/tests/model_validation/sklearn/WeakspotsDiagnosis.py +7 -5
  165. validmind/tests/model_validation/statsmodels/AutoARIMA.py +24 -18
  166. validmind/tests/model_validation/statsmodels/CumulativePredictionProbabilities.py +73 -104
  167. validmind/tests/model_validation/statsmodels/DurbinWatsonTest.py +59 -32
  168. validmind/tests/model_validation/statsmodels/GINITable.py +44 -77
  169. validmind/tests/model_validation/statsmodels/KolmogorovSmirnov.py +33 -34
  170. validmind/tests/model_validation/statsmodels/Lilliefors.py +27 -24
  171. validmind/tests/model_validation/statsmodels/PredictionProbabilitiesHistogram.py +86 -119
  172. validmind/tests/model_validation/statsmodels/RegressionCoeffs.py +100 -0
  173. validmind/tests/model_validation/statsmodels/RegressionFeatureSignificance.py +14 -9
  174. validmind/tests/model_validation/statsmodels/RegressionModelForecastPlot.py +17 -13
  175. validmind/tests/model_validation/statsmodels/RegressionModelForecastPlotLevels.py +46 -43
  176. validmind/tests/model_validation/statsmodels/RegressionModelSensitivityPlot.py +38 -36
  177. validmind/tests/model_validation/statsmodels/RegressionModelSummary.py +30 -28
  178. validmind/tests/model_validation/statsmodels/RegressionPermutationFeatureImportance.py +18 -11
  179. validmind/tests/model_validation/statsmodels/ScorecardHistogram.py +75 -107
  180. validmind/tests/ongoing_monitoring/FeatureDrift.py +10 -6
  181. validmind/tests/ongoing_monitoring/PredictionAcrossEachFeature.py +31 -25
  182. validmind/tests/ongoing_monitoring/PredictionCorrelation.py +29 -21
  183. validmind/tests/ongoing_monitoring/TargetPredictionDistributionPlot.py +31 -23
  184. validmind/tests/prompt_validation/Bias.py +14 -11
  185. validmind/tests/prompt_validation/Clarity.py +16 -14
  186. validmind/tests/prompt_validation/Conciseness.py +7 -5
  187. validmind/tests/prompt_validation/Delimitation.py +23 -22
  188. validmind/tests/prompt_validation/NegativeInstruction.py +7 -5
  189. validmind/tests/prompt_validation/Robustness.py +12 -10
  190. validmind/tests/prompt_validation/Specificity.py +13 -11
  191. validmind/tests/prompt_validation/ai_powered_test.py +6 -0
  192. validmind/tests/run.py +68 -23
  193. validmind/unit_metrics/__init__.py +81 -144
  194. validmind/unit_metrics/classification/{sklearn/Accuracy.py → Accuracy.py} +1 -1
  195. validmind/unit_metrics/classification/{sklearn/F1.py → F1.py} +1 -1
  196. validmind/unit_metrics/classification/{sklearn/Precision.py → Precision.py} +1 -1
  197. validmind/unit_metrics/classification/{sklearn/ROC_AUC.py → ROC_AUC.py} +1 -2
  198. validmind/unit_metrics/classification/{sklearn/Recall.py → Recall.py} +1 -1
  199. validmind/unit_metrics/regression/{sklearn/AdjustedRSquaredScore.py → AdjustedRSquaredScore.py} +1 -1
  200. validmind/unit_metrics/regression/GiniCoefficient.py +1 -1
  201. validmind/unit_metrics/regression/HuberLoss.py +1 -1
  202. validmind/unit_metrics/regression/KolmogorovSmirnovStatistic.py +1 -1
  203. validmind/unit_metrics/regression/{sklearn/MeanAbsoluteError.py → MeanAbsoluteError.py} +1 -1
  204. validmind/unit_metrics/regression/MeanAbsolutePercentageError.py +1 -1
  205. validmind/unit_metrics/regression/MeanBiasDeviation.py +1 -1
  206. validmind/unit_metrics/regression/{sklearn/MeanSquaredError.py → MeanSquaredError.py} +1 -1
  207. validmind/unit_metrics/regression/QuantileLoss.py +1 -1
  208. validmind/unit_metrics/regression/{sklearn/RSquaredScore.py → RSquaredScore.py} +1 -1
  209. validmind/unit_metrics/regression/{sklearn/RootMeanSquaredError.py → RootMeanSquaredError.py} +1 -1
  210. validmind/utils.py +4 -0
  211. validmind/vm_models/dataset/dataset.py +2 -0
  212. validmind/vm_models/figure.py +5 -0
  213. validmind/vm_models/test/metric.py +1 -0
  214. validmind/vm_models/test/result_wrapper.py +143 -158
  215. validmind/vm_models/test/threshold_test.py +1 -0
  216. {validmind-2.5.8.dist-info → validmind-2.5.18.dist-info}/METADATA +4 -3
  217. validmind-2.5.18.dist-info/RECORD +324 -0
  218. validmind/tests/data_validation/ANOVAOneWayTable.py +0 -138
  219. validmind/tests/data_validation/BivariateFeaturesBarPlots.py +0 -142
  220. validmind/tests/data_validation/BivariateHistograms.py +0 -117
  221. validmind/tests/data_validation/HeatmapFeatureCorrelations.py +0 -124
  222. validmind/tests/data_validation/MissingValuesRisk.py +0 -88
  223. validmind/tests/model_validation/ModelMetadataComparison.py +0 -59
  224. validmind/tests/model_validation/sklearn/FeatureImportanceComparison.py +0 -83
  225. validmind/tests/model_validation/statsmodels/JarqueBera.py +0 -73
  226. validmind/tests/model_validation/statsmodels/LJungBox.py +0 -66
  227. validmind/tests/model_validation/statsmodels/RegressionCoeffsPlot.py +0 -135
  228. validmind/tests/model_validation/statsmodels/RegressionModelsCoeffs.py +0 -103
  229. validmind/tests/model_validation/statsmodels/RunsTest.py +0 -71
  230. validmind-2.5.8.dist-info/RECORD +0 -318
  231. {validmind-2.5.8.dist-info → validmind-2.5.18.dist-info}/LICENSE +0 -0
  232. {validmind-2.5.8.dist-info → validmind-2.5.18.dist-info}/WHEEL +0 -0
  233. {validmind-2.5.8.dist-info → validmind-2.5.18.dist-info}/entry_points.txt +0 -0
@@ -77,6 +77,7 @@ class Metric(Test):
77
77
 
78
78
  self.result = MetricResultWrapper(
79
79
  result_id=self.test_id,
80
+ result_description=self.description(),
80
81
  result_metadata=[
81
82
  (
82
83
  get_description_metadata(
@@ -63,19 +63,63 @@ async def update_metadata(content_id: str, text: str, _json: Union[Dict, List] =
63
63
 
64
64
 
65
65
  def plot_figures(figures: List[Figure]) -> None:
66
- """
67
- Plot figures to a ipywidgets GridBox
68
- """
69
-
66
+ """Plot figures to a ipywidgets GridBox"""
70
67
  plots = [figure.to_widget() for figure in figures]
71
-
72
68
  num_columns = 2 if len(figures) > 1 else 1
69
+
73
70
  return GridBox(
74
71
  plots,
75
72
  layout=Layout(grid_template_columns=f"repeat({num_columns}, 1fr)"),
76
73
  )
77
74
 
78
75
 
76
+ def _summary_tables_to_widget(summary: ResultSummary):
77
+ """Convert summary (list of json tables) into ipywidgets"""
78
+ widgets = []
79
+
80
+ for table in summary.results:
81
+ if table.metadata and table.metadata.title:
82
+ widgets.append(HTML(f"<h4>{table.metadata.title}</h4>"))
83
+
84
+ df_html = (
85
+ pd.DataFrame(table.data)
86
+ .style.format(precision=4)
87
+ .hide(axis="index")
88
+ .set_table_styles(
89
+ [
90
+ {
91
+ "selector": "",
92
+ "props": [("width", "100%")],
93
+ },
94
+ {
95
+ "selector": "th",
96
+ "props": [("text-align", "left")],
97
+ },
98
+ {
99
+ "selector": "tbody tr:nth-child(even)",
100
+ "props": [("background-color", "#FFFFFF")],
101
+ },
102
+ {
103
+ "selector": "tbody tr:nth-child(odd)",
104
+ "props": [("background-color", "#F5F5F5")],
105
+ },
106
+ {
107
+ "selector": "td, th",
108
+ "props": [
109
+ ("padding-left", "5px"),
110
+ ("padding-right", "5px"),
111
+ ],
112
+ },
113
+ ]
114
+ )
115
+ .set_properties(**{"text-align": "left"})
116
+ .to_html(escape=False)
117
+ )
118
+ widgets.append(HTML(df_html))
119
+
120
+ return widgets
121
+
122
+
79
123
  @dataclass
80
124
  class ResultWrapper(ABC):
81
125
  """Base Class for test suite results"""
@@ -84,6 +128,8 @@ class ResultWrapper(ABC):
84
128
  # id of the result, can be set by the subclass. This helps
85
129
  # looking up results later on
86
130
  result_id: str = None
131
+ # Text description from test or metric (docstring usually)
132
+ result_description: str = None
87
133
  # Text metadata about the result, can include description, etc.
88
134
  result_metadata: List[dict] = None
89
135
  # Output template to use for rendering the result
@@ -105,53 +151,6 @@ class ResultWrapper(ABC):
105
151
 
106
152
  return self.to_widget()
107
153
 
108
- def _summary_tables_to_widget(self, summary: ResultSummary):
109
- """
110
- Create an ipywdiget representation of the summary tables
111
- """
112
- tables = []
113
- for table in summary.results:
114
- # Explore advanced styling
115
- summary_table = (
116
- pd.DataFrame(table.data)
117
- .style.format(precision=4)
118
- .hide(axis="index")
119
- .set_table_styles(
120
- [
121
- {
122
- "selector": "",
123
- "props": [("width", "100%")],
124
- },
125
- {
126
- "selector": "th",
127
- "props": [("text-align", "left")],
128
- },
129
- {
130
- "selector": "tbody tr:nth-child(even)",
131
- "props": [("background-color", "#FFFFFF")],
132
- },
133
- {
134
- "selector": "tbody tr:nth-child(odd)",
135
- "props": [("background-color", "#F5F5F5")],
136
- },
137
- {
138
- "selector": "td, th",
139
- "props": [
140
- ("padding-left", "5px"),
141
- ("padding-right", "5px"),
142
- ],
143
- },
144
- ]
145
- )
146
- .set_properties(**{"text-align": "left"})
147
- .to_html(escape=False)
148
- ) # table.data is an orient=records dump
149
-
150
- if table.metadata and table.metadata.title:
151
- tables.append(HTML(value=f"<h3>{table.metadata.title}</h3>"))
152
- tables.append(HTML(value=summary_table))
153
- return tables
154
-
155
154
  def _validate_section_id_for_block(self, section_id: str, position: int = None):
156
155
  """
157
156
  Validate the section_id exits on the template before logging. We validate
@@ -244,9 +243,11 @@ class MetricResultWrapper(ResultWrapper):
244
243
  """
245
244
 
246
245
  name: str = "Metric"
247
- figures: Optional[List[Figure]] = None
246
+ scalar: Optional[Union[int, float]] = None
248
247
  metric: Optional[MetricResult] = None
249
- inputs: List[str] = None
248
+ figures: Optional[List[Figure]] = None
249
+ inputs: List[str] = None # List of input ids
250
+ params: Dict = None
250
251
 
251
252
  def __repr__(self) -> str:
252
253
  if self.metric:
@@ -254,18 +255,12 @@ class MetricResultWrapper(ResultWrapper):
254
255
  else:
255
256
  return f'{self.__class__.__name__}(result_id="{self.result_id}", figures)'
256
257
 
257
- def __str__(self) -> str:
258
- if self.metric:
259
- return f'{self.__class__.__name__}(result_id="{self.result_id}", metric, figures)'
260
- else:
261
- return f"{self.__class__.__name__}(result_id={self.result_id}, figures)"
262
-
263
258
  def to_widget(self):
264
259
  if self.metric and self.metric.key == "dataset_description":
265
260
  return ""
266
261
 
267
262
  vbox_children = [
268
- HTML(value=f"<h1>{test_id_to_name(self.result_id)}</h1>"),
263
+ HTML(f"<h1>{test_id_to_name(self.result_id)}</h1>"),
269
264
  ]
270
265
 
271
266
  if self.result_metadata:
@@ -274,111 +269,110 @@ class MetricResultWrapper(ResultWrapper):
274
269
  metric_description = metric_description.get_description()
275
270
  self.result_metadata[0]["text"] = metric_description
276
271
 
277
- vbox_children.append(HTML(value=metric_description))
272
+ vbox_children.append(HTML(metric_description))
273
+
274
+ if self.scalar is not None:
275
+ vbox_children.append(
276
+ HTML(
277
+ "<h3>Unit Metrics</h3>"
278
+ f"<p>{test_id_to_name(self.result_id)} "
279
+ f"(<i>{self.result_id}</i>): "
280
+ f"<code>{self.scalar}</code></p>"
281
+ )
282
+ )
278
283
 
279
284
  if self.metric:
285
+ vbox_children.append(HTML("<h3>Tables</h3>"))
280
286
  if self.output_template:
281
- rendered_output = OutputTemplate(self.output_template).render(
282
- value=self.metric.value
287
+ vbox_children.append(
288
+ HTML(
289
+ OutputTemplate(self.output_template).render(
290
+ value=self.metric.value
291
+ )
292
+ )
283
293
  )
284
- vbox_children.append(HTML(rendered_output))
285
294
  elif self.metric.summary:
286
- tables = self._summary_tables_to_widget(self.metric.summary)
287
- vbox_children.extend(tables)
295
+ vbox_children.extend(_summary_tables_to_widget(self.metric.summary))
288
296
 
289
297
  if self.figures:
290
- vbox_children.append(HTML(value="<h3>Plots</h3>"))
298
+ vbox_children.append(HTML("<h3>Plots</h3>"))
291
299
  plot_widgets = plot_figures(self.figures)
292
300
  vbox_children.append(plot_widgets)
293
301
 
294
- vbox_children.append(
295
- HTML(
296
- value="""
297
- <style>
298
- .metric-result {
299
- background-color: #F5F5F5;
300
- border: 1px solid #e0e0e0;
301
- border-radius: 4px;
302
- padding: 10px;
303
- margin: 10px 0;
304
- }
305
- .metric-result-body {
306
- display: flex;
307
- flex-direction: column;
308
- justify-content: space-between;
309
- gap: 10px;
310
- }
311
- .metric-body-column {
312
- display: flex;
313
- flex-direction: column;
314
- justify-content: space-between;
315
- width: 33%;
316
- }
317
- .metric-body-column-title {
318
- font-size: 16px;
319
- font-weight: 600;
320
- }
321
- .metric-value {
322
- display: flex;
323
- flex-direction: column;
324
- justify-content: space-between;
325
- margin-top: 15px;
326
- }
327
- .metric-value-title {
328
- font-size: 16px;
329
- font-weight: 600;
330
- }
331
- .metric-value-value {
332
- font-size: 14px;
333
- font-weight: 500;
334
- margin-top: 10px;
335
- }
336
- </style>
337
- """
338
- )
339
- )
340
-
341
302
  return VBox(vbox_children)
342
303
 
343
304
  def _get_filtered_summary(self):
344
- """Check if the metric summary has columns from input datasets"""
345
- dataset_columns = set()
346
-
347
- for input in self.inputs:
348
- input_id = input if isinstance(input, str) else input.input_id
349
- input_obj = input_registry.get(input_id)
350
- if isinstance(input_obj, VMDataset):
351
- dataset_columns.update(input_obj.columns)
352
-
353
- for table in [*self.metric.summary.results]:
354
- columns = set()
305
+ """Check if the metric summary has columns from input datasets with matching row counts."""
306
+ dataset_columns = self._get_dataset_columns()
307
+ filtered_results = []
308
+
309
+ for table in self.metric.summary.results:
310
+ table_columns = self._get_table_columns(table)
311
+ sensitive_columns = self._find_sensitive_columns(
312
+ dataset_columns, table_columns
313
+ )
355
314
 
356
- if isinstance(table.data, pd.DataFrame):
357
- columns.update(table.data.columns)
358
- elif isinstance(table.data, list):
359
- columns.update(table.data[0].keys())
315
+ if sensitive_columns:
316
+ self._log_sensitive_data_warning(sensitive_columns)
360
317
  else:
361
- raise ValueError("Invalid data type in summary table")
318
+ filtered_results.append(table)
362
319
 
363
- if bool(columns.intersection(dataset_columns)):
364
- logger.warning(
365
- "Sensitive data in metric summary table. Not logging to API automatically."
366
- " Pass `unsafe=True` to result.log() method to override manually."
367
- )
368
- logger.warning(
369
- f"The following columns are present in the table: {columns}"
370
- f" and also present in the dataset: {dataset_columns}"
320
+ self.metric.summary.results = filtered_results
321
+ return self.metric.summary
322
+
323
+ def _get_dataset_columns(self):
324
+ dataset_columns = {}
325
+ for input_item in self.inputs:
326
+ input_id = (
327
+ input_item if isinstance(input_item, str) else input_item.input_id
328
+ )
329
+ input_obj = input_registry.get(input_id)
330
+ if isinstance(input_obj, VMDataset):
331
+ dataset_columns.update(
332
+ {col: len(input_obj.df) for col in input_obj.columns}
371
333
  )
334
+ return dataset_columns
372
335
 
373
- self.metric.summary.results.remove(table)
336
+ def _get_table_columns(self, table):
337
+ if isinstance(table.data, pd.DataFrame):
338
+ return {col: len(table.data) for col in table.data.columns}
339
+ elif isinstance(table.data, list) and table.data:
340
+ return {col: len(table.data) for col in table.data[0].keys()}
341
+ else:
342
+ raise ValueError("Invalid data type in summary table")
374
343
 
375
- return self.metric.summary
344
+ def _find_sensitive_columns(self, dataset_columns, table_columns):
345
+ return [
346
+ col
347
+ for col, row_count in table_columns.items()
348
+ if col in dataset_columns and row_count == dataset_columns[col]
349
+ ]
350
+
351
+ def _log_sensitive_data_warning(self, sensitive_columns):
352
+ logger.warning(
353
+ "Sensitive data in metric summary table. Not logging to API automatically. "
354
+ "Pass `unsafe=True` to result.log() method to override manually."
355
+ )
356
+ logger.warning(
357
+ f"The following columns are present in the table with matching row counts: {sensitive_columns}"
358
+ )
376
359
 
377
360
  async def log_async(
378
361
  self, section_id: str = None, position: int = None, unsafe=False
379
362
  ):
380
363
  tasks = [] # collect tasks to run in parallel (async)
381
364
 
365
+ if self.scalar is not None:
366
+ # scalars (unit metrics) are logged as key-value pairs associated with the inventory model
367
+ tasks.append(
368
+ api_client.alog_metric(
369
+ key=self.result_id,
370
+ value=self.scalar,
371
+ inputs=self.inputs,
372
+ params=self.params,
373
+ )
374
+ )
375
+
382
376
  if self.metric:
383
377
  if self.metric.summary and not unsafe:
384
378
  self.metric.summary = self._get_filtered_summary()
@@ -411,7 +405,7 @@ class MetricResultWrapper(ResultWrapper):
411
405
  )
412
406
  )
413
407
 
414
- await asyncio.gather(*tasks)
408
+ return await asyncio.gather(*tasks)
415
409
 
416
410
 
417
411
  @dataclass
@@ -433,24 +427,13 @@ class ThresholdTestResultWrapper(ResultWrapper):
433
427
  else:
434
428
  return f'{self.__class__.__name__}(result_id="{self.result_id}", figures)'
435
429
 
436
- def __str__(self) -> str:
437
- if self.test_results:
438
- return (
439
- f'{self.__class__.__name__}(result_id="{self.result_id}", test_results)'
440
- )
441
- else:
442
- return f'{self.__class__.__name__}(result_id="{self.result_id}", figures)'
443
-
444
430
  def to_widget(self):
445
431
  vbox_children = []
446
432
  description_html = []
447
433
 
448
- test_params = json.dumps(self.test_results.params, cls=NumpyEncoder, indent=2)
449
-
450
- test_title = test_id_to_name(self.test_results.test_name)
451
434
  description_html.append(
452
435
  f"""
453
- <h1>{test_title} {"✅" if self.test_results.passed else "❌"}</h1>
436
+ <h1>{test_id_to_name(self.test_results.test_name)} {"✅" if self.test_results.passed else "❌"}</h1>
454
437
  """
455
438
  )
456
439
 
@@ -462,6 +445,7 @@ class ThresholdTestResultWrapper(ResultWrapper):
462
445
 
463
446
  description_html.append(metric_description)
464
447
 
448
+ test_params = json.dumps(self.test_results.params, cls=NumpyEncoder, indent=2)
465
449
  description_html.append(
466
450
  f"""
467
451
  <h4>Test Parameters</h4>
@@ -469,14 +453,14 @@ class ThresholdTestResultWrapper(ResultWrapper):
469
453
  """
470
454
  )
471
455
 
472
- vbox_children.append(HTML(value="".join(description_html)))
456
+ vbox_children.append(HTML("".join(description_html)))
473
457
 
474
458
  if self.test_results.summary:
475
- tables = self._summary_tables_to_widget(self.test_results.summary)
476
- vbox_children.extend(tables)
459
+ vbox_children.append(HTML("<h3>Tables</h3>"))
460
+ vbox_children.extend(_summary_tables_to_widget(self.test_results.summary))
477
461
 
478
462
  if self.figures:
479
- vbox_children.append(HTML(value="<h3>Plots</h3>"))
463
+ vbox_children.append(HTML("<h3>Plots</h3>"))
480
464
  plot_widgets = plot_figures(self.figures)
481
465
  vbox_children.append(plot_widgets)
482
466
 
@@ -491,6 +475,7 @@ class ThresholdTestResultWrapper(ResultWrapper):
491
475
 
492
476
  if self.figures:
493
477
  tasks.append(api_client.log_figures(self.figures))
478
+
494
479
  if hasattr(self, "result_metadata") and self.result_metadata:
495
480
  description = self.result_metadata[0].get("text", "")
496
481
  if isinstance(description, DescriptionFuture):
@@ -80,6 +80,7 @@ class ThresholdTest(Test):
80
80
 
81
81
  self.result = ThresholdTestResultWrapper(
82
82
  result_id=self.test_id,
83
+ result_description=self.description(),
83
84
  result_metadata=[
84
85
  get_description_metadata(
85
86
  test_id=self.test_id,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: validmind
3
- Version: 2.5.8
3
+ Version: 2.5.18
4
4
  Summary: ValidMind Developer Framework
5
5
  License: Commercial License
6
6
  Author: Andres Rodriguez
@@ -20,6 +20,7 @@ Requires-Dist: aiohttp[speedups]
20
20
  Requires-Dist: arch
21
21
  Requires-Dist: bert-score (>=0.3.13)
22
22
  Requires-Dist: catboost
23
+ Requires-Dist: datasets (>=2.10.0,<3.0.0)
23
24
  Requires-Dist: evaluate
24
25
  Requires-Dist: ipywidgets
25
26
  Requires-Dist: kaleido (>=0.2.1,!=0.2.1.post1)
@@ -34,13 +35,13 @@ Requires-Dist: nltk (>=3.8.1,<4.0.0)
34
35
  Requires-Dist: numba (<0.59.0)
35
36
  Requires-Dist: numpy
36
37
  Requires-Dist: openai (>=1)
37
- Requires-Dist: pandas (>=1.1,<2)
38
+ Requires-Dist: pandas (>=1.1,<=2.0.3)
38
39
  Requires-Dist: plotly
39
40
  Requires-Dist: plotly-express
40
41
  Requires-Dist: polars
41
42
  Requires-Dist: pycocoevalcap (>=1.2,<2.0) ; extra == "all" or extra == "llm"
42
43
  Requires-Dist: python-dotenv
43
- Requires-Dist: ragas (>=0.1.7) ; extra == "all" or extra == "llm"
44
+ Requires-Dist: ragas (>=0.1.19) ; extra == "all" or extra == "llm"
44
45
  Requires-Dist: rouge (>=1)
45
46
  Requires-Dist: rpy2 (>=3.5.10,<4.0.0) ; extra == "all" or extra == "r-support"
46
47
  Requires-Dist: scikit-learn