validmind 2.1.1__py3-none-any.whl → 2.2.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (113) hide show
  1. validmind/__version__.py +1 -1
  2. validmind/ai.py +72 -49
  3. validmind/api_client.py +42 -16
  4. validmind/client.py +68 -25
  5. validmind/datasets/llm/rag/__init__.py +11 -0
  6. validmind/datasets/llm/rag/datasets/rfp_existing_questions_client_1.csv +30 -0
  7. validmind/datasets/llm/rag/datasets/rfp_existing_questions_client_2.csv +30 -0
  8. validmind/datasets/llm/rag/datasets/rfp_existing_questions_client_3.csv +53 -0
  9. validmind/datasets/llm/rag/datasets/rfp_existing_questions_client_4.csv +53 -0
  10. validmind/datasets/llm/rag/datasets/rfp_existing_questions_client_5.csv +53 -0
  11. validmind/datasets/llm/rag/rfp.py +41 -0
  12. validmind/errors.py +1 -1
  13. validmind/html_templates/__init__.py +0 -0
  14. validmind/html_templates/content_blocks.py +89 -14
  15. validmind/models/__init__.py +7 -4
  16. validmind/models/foundation.py +8 -34
  17. validmind/models/function.py +51 -0
  18. validmind/models/huggingface.py +16 -46
  19. validmind/models/metadata.py +42 -0
  20. validmind/models/pipeline.py +66 -0
  21. validmind/models/pytorch.py +8 -42
  22. validmind/models/r_model.py +33 -82
  23. validmind/models/sklearn.py +39 -38
  24. validmind/template.py +8 -26
  25. validmind/tests/__init__.py +43 -20
  26. validmind/tests/data_validation/ANOVAOneWayTable.py +1 -1
  27. validmind/tests/data_validation/ChiSquaredFeaturesTable.py +1 -1
  28. validmind/tests/data_validation/DescriptiveStatistics.py +2 -4
  29. validmind/tests/data_validation/Duplicates.py +1 -1
  30. validmind/tests/data_validation/IsolationForestOutliers.py +2 -2
  31. validmind/tests/data_validation/LaggedCorrelationHeatmap.py +1 -1
  32. validmind/tests/data_validation/TargetRateBarPlots.py +1 -1
  33. validmind/tests/data_validation/nlp/LanguageDetection.py +59 -0
  34. validmind/tests/data_validation/nlp/PolarityAndSubjectivity.py +48 -0
  35. validmind/tests/data_validation/nlp/Punctuations.py +11 -12
  36. validmind/tests/data_validation/nlp/Sentiment.py +57 -0
  37. validmind/tests/data_validation/nlp/Toxicity.py +45 -0
  38. validmind/tests/decorator.py +12 -7
  39. validmind/tests/model_validation/BertScore.py +100 -98
  40. validmind/tests/model_validation/BleuScore.py +93 -64
  41. validmind/tests/model_validation/ContextualRecall.py +74 -91
  42. validmind/tests/model_validation/MeteorScore.py +86 -74
  43. validmind/tests/model_validation/RegardScore.py +103 -121
  44. validmind/tests/model_validation/RougeScore.py +118 -0
  45. validmind/tests/model_validation/TokenDisparity.py +84 -121
  46. validmind/tests/model_validation/ToxicityScore.py +109 -123
  47. validmind/tests/model_validation/embeddings/CosineSimilarityComparison.py +96 -0
  48. validmind/tests/model_validation/embeddings/CosineSimilarityHeatmap.py +71 -0
  49. validmind/tests/model_validation/embeddings/EuclideanDistanceComparison.py +92 -0
  50. validmind/tests/model_validation/embeddings/EuclideanDistanceHeatmap.py +69 -0
  51. validmind/tests/model_validation/embeddings/PCAComponentsPairwisePlots.py +78 -0
  52. validmind/tests/model_validation/embeddings/StabilityAnalysis.py +35 -23
  53. validmind/tests/model_validation/embeddings/StabilityAnalysisKeyword.py +3 -0
  54. validmind/tests/model_validation/embeddings/StabilityAnalysisRandomNoise.py +7 -1
  55. validmind/tests/model_validation/embeddings/StabilityAnalysisSynonyms.py +3 -0
  56. validmind/tests/model_validation/embeddings/StabilityAnalysisTranslation.py +3 -0
  57. validmind/tests/model_validation/embeddings/TSNEComponentsPairwisePlots.py +99 -0
  58. validmind/tests/model_validation/ragas/AnswerCorrectness.py +131 -0
  59. validmind/tests/model_validation/ragas/AnswerRelevance.py +134 -0
  60. validmind/tests/model_validation/ragas/AnswerSimilarity.py +119 -0
  61. validmind/tests/model_validation/ragas/AspectCritique.py +167 -0
  62. validmind/tests/model_validation/ragas/ContextEntityRecall.py +133 -0
  63. validmind/tests/model_validation/ragas/ContextPrecision.py +123 -0
  64. validmind/tests/model_validation/ragas/ContextRecall.py +123 -0
  65. validmind/tests/model_validation/ragas/ContextRelevancy.py +114 -0
  66. validmind/tests/model_validation/ragas/Faithfulness.py +119 -0
  67. validmind/tests/model_validation/ragas/utils.py +66 -0
  68. validmind/tests/model_validation/sklearn/OverfitDiagnosis.py +3 -7
  69. validmind/tests/model_validation/sklearn/PermutationFeatureImportance.py +8 -9
  70. validmind/tests/model_validation/sklearn/PopulationStabilityIndex.py +5 -10
  71. validmind/tests/model_validation/sklearn/PrecisionRecallCurve.py +3 -2
  72. validmind/tests/model_validation/sklearn/ROCCurve.py +2 -1
  73. validmind/tests/model_validation/sklearn/RegressionR2Square.py +1 -1
  74. validmind/tests/model_validation/sklearn/RobustnessDiagnosis.py +2 -3
  75. validmind/tests/model_validation/sklearn/SHAPGlobalImportance.py +7 -11
  76. validmind/tests/model_validation/sklearn/WeakspotsDiagnosis.py +3 -4
  77. validmind/tests/model_validation/statsmodels/RegressionModelForecastPlot.py +1 -1
  78. validmind/tests/model_validation/statsmodels/RegressionModelForecastPlotLevels.py +1 -1
  79. validmind/tests/model_validation/statsmodels/RegressionModelInsampleComparison.py +1 -1
  80. validmind/tests/model_validation/statsmodels/RegressionModelOutsampleComparison.py +1 -1
  81. validmind/tests/model_validation/statsmodels/RegressionModelSummary.py +1 -1
  82. validmind/tests/model_validation/statsmodels/RegressionModelsCoeffs.py +1 -1
  83. validmind/tests/model_validation/statsmodels/RegressionModelsPerformance.py +1 -1
  84. validmind/tests/model_validation/statsmodels/ScorecardHistogram.py +5 -6
  85. validmind/unit_metrics/__init__.py +26 -49
  86. validmind/unit_metrics/composite.py +13 -7
  87. validmind/unit_metrics/regression/sklearn/AdjustedRSquaredScore.py +1 -1
  88. validmind/utils.py +99 -6
  89. validmind/vm_models/__init__.py +1 -1
  90. validmind/vm_models/dataset/__init__.py +7 -0
  91. validmind/vm_models/dataset/dataset.py +560 -0
  92. validmind/vm_models/dataset/utils.py +146 -0
  93. validmind/vm_models/model.py +97 -72
  94. validmind/vm_models/test/metric.py +9 -24
  95. validmind/vm_models/test/result_wrapper.py +124 -28
  96. validmind/vm_models/test/threshold_test.py +10 -28
  97. validmind/vm_models/test_context.py +1 -1
  98. validmind/vm_models/test_suite/summary.py +3 -4
  99. {validmind-2.1.1.dist-info → validmind-2.2.4.dist-info}/METADATA +5 -3
  100. {validmind-2.1.1.dist-info → validmind-2.2.4.dist-info}/RECORD +103 -78
  101. validmind/models/catboost.py +0 -33
  102. validmind/models/statsmodels.py +0 -50
  103. validmind/models/xgboost.py +0 -30
  104. validmind/tests/model_validation/BertScoreAggregate.py +0 -90
  105. validmind/tests/model_validation/RegardHistogram.py +0 -148
  106. validmind/tests/model_validation/RougeMetrics.py +0 -147
  107. validmind/tests/model_validation/RougeMetricsAggregate.py +0 -133
  108. validmind/tests/model_validation/SelfCheckNLIScore.py +0 -112
  109. validmind/tests/model_validation/ToxicityHistogram.py +0 -136
  110. validmind/vm_models/dataset.py +0 -1303
  111. {validmind-2.1.1.dist-info → validmind-2.2.4.dist-info}/LICENSE +0 -0
  112. {validmind-2.1.1.dist-info → validmind-2.2.4.dist-info}/WHEEL +0 -0
  113. {validmind-2.1.1.dist-info → validmind-2.2.4.dist-info}/entry_points.txt +0 -0
@@ -7,7 +7,7 @@ from dataclasses import dataclass
7
7
  import pandas as pd
8
8
 
9
9
  from validmind.logging import get_logger
10
- from validmind.vm_models.model import ModelAttributes, VMModel
10
+ from validmind.models.function import FunctionModel
11
11
 
12
12
  logger = get_logger(__name__)
13
13
 
@@ -18,7 +18,7 @@ class Prompt:
18
18
  variables: list
19
19
 
20
20
 
21
- class FoundationModel(VMModel):
21
+ class FoundationModel(FunctionModel):
22
22
  """FoundationModel class wraps a Foundation LLM endpoint
23
23
 
24
24
  This class wraps a predict function that is user-defined and adapts it to works
@@ -29,22 +29,14 @@ class FoundationModel(VMModel):
29
29
  and return the result from the model
30
30
  prompt (Prompt): The prompt object that defines the prompt template and the
31
31
  variables (if any)
32
- attributes (ModelAttributes, optional): The attributes of the model. Defaults to None.
32
+ name (str, optional): The name of the model. Defaults to name of the predict_fn
33
33
  """
34
34
 
35
- def __init__(
36
- self,
37
- predict_fn: callable,
38
- prompt: Prompt, # prompt used for model (for now just a string)
39
- attributes: ModelAttributes = None,
40
- input_id: str = None,
41
- ):
42
- super().__init__(
43
- attributes=attributes,
44
- input_id=input_id,
45
- )
46
- self.predict_fn = predict_fn
47
- self.prompt = prompt
35
+ def __post_init__(self):
36
+ if not getattr(self, "predict_fn") or not callable(self.predict_fn):
37
+ raise ValueError("FoundationModel requires a callable predict_fn")
38
+
39
+ self.name = self.name or self.predict_fn.__name__
48
40
 
49
41
  def _build_prompt(self, x: pd.DataFrame):
50
42
  """
@@ -59,21 +51,3 @@ class FoundationModel(VMModel):
59
51
  Predict method for the model. This is a wrapper around the model's
60
52
  """
61
53
  return [self.predict_fn(self._build_prompt(x[1])) for x in X.iterrows()]
62
-
63
- def model_library(self):
64
- """
65
- Returns the model library name
66
- """
67
- return "FoundationModel"
68
-
69
- def model_class(self):
70
- """
71
- Returns the model class name
72
- """
73
- return "FoundationModel"
74
-
75
- def model_name(self):
76
- """
77
- Returns model name
78
- """
79
- return "FoundationModel"
@@ -0,0 +1,51 @@
1
+ # Copyright © 2023-2024 ValidMind Inc. All rights reserved.
2
+ # See the LICENSE file in the root of this repository for details.
3
+ # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
+
5
+ from validmind.vm_models.model import VMModel
6
+
7
+
8
+ # semi-immutable dict
9
+ class Input(dict):
10
+ def __init__(self, *args, **kwargs):
11
+ super().__init__(*args, **kwargs)
12
+ self._new = set()
13
+
14
+ def __setitem__(self, key, value):
15
+ self._new.add(key)
16
+ super().__setitem__(key, value)
17
+
18
+ def __delitem__(self, _):
19
+ raise TypeError("Cannot delete keys from Input")
20
+
21
+ def get_new(self):
22
+ return {k: self[k] for k in self._new}
23
+
24
+
25
+ class FunctionModel(VMModel):
26
+ """
27
+ FunctionModel class wraps a user-defined predict function
28
+
29
+ Attributes:
30
+ predict_fn (callable): The predict function that should take a dictionary of
31
+ input features and return a prediction.
32
+ input_id (str, optional): The input ID for the model. Defaults to None.
33
+ name (str, optional): The name of the model. Defaults to the name of the predict_fn.
34
+ """
35
+
36
+ def __post_init__(self):
37
+ if not getattr(self, "predict_fn") or not callable(self.predict_fn):
38
+ raise ValueError("FunctionModel requires a callable predict_fn")
39
+
40
+ self.name = self.name or self.predict_fn.__name__
41
+
42
+ def predict(self, X):
43
+ """Compute predictions for the input (X)
44
+
45
+ Args:
46
+ X (pandas.DataFrame): The input features to predict on
47
+
48
+ Returns:
49
+ list: The predictions
50
+ """
51
+ return [self.predict_fn(x) for x in X.to_dict(orient="records")]
@@ -4,41 +4,32 @@
4
4
 
5
5
  from dataclasses import dataclass
6
6
 
7
- import pandas as pd
8
-
9
7
  from validmind.errors import MissingOrInvalidModelPredictFnError
10
8
  from validmind.logging import get_logger
11
- from validmind.vm_models.model import (
12
- ModelAttributes,
13
- VMModel,
14
- has_method_with_arguments,
15
- )
9
+ from validmind.vm_models.model import VMModel, has_method_with_arguments
16
10
 
17
11
  logger = get_logger(__name__)
18
12
 
19
13
 
20
14
  @dataclass
21
15
  class HFModel(VMModel):
22
- """
23
- An Hugging Face model class that wraps a trained model instance and its associated data.
24
-
25
- Attributes:
26
- attributes (ModelAttributes, optional): The attributes of the model. Defaults to None.
27
- model (object, optional): The trained model instance. Defaults to None.
28
- """
29
-
30
16
  def __init__(
31
17
  self,
32
18
  input_id: str = None,
33
- model: object = None, # Trained model instance
34
- attributes: ModelAttributes = None,
19
+ model: object = None,
20
+ attributes: object = None,
21
+ name: str = None,
22
+ **kwargs,
35
23
  ):
36
24
  super().__init__(
37
- model=model,
38
- input_id=input_id,
39
- attributes=attributes,
25
+ input_id=input_id, model=model, attributes=attributes, name=name, **kwargs
40
26
  )
41
27
 
28
+ def __post_init__(self):
29
+ self.library = self.model.__class__.__module__.split(".")[0]
30
+ self.class_ = self.model.__class__.__name__
31
+ self.name = self.name or type(self.model).__name__
32
+
42
33
  def predict_proba(self, *args, **kwargs):
43
34
  """
44
35
  Invoke predict_proba from underline model
@@ -57,36 +48,15 @@ class HFModel(VMModel):
57
48
  Predict method for the model. This is a wrapper around the HF model's pipeline function
58
49
  """
59
50
  results = self.model([str(datapoint) for datapoint in data])
60
-
61
51
  tasks = self.model.__class__.__module__.split(".")
62
52
 
63
53
  if "text2text_generation" in tasks:
64
- return pd.DataFrame(results).summary_text.values
54
+ return [result["summary_text"] for result in results]
65
55
  elif "text_classification" in tasks:
66
- return pd.DataFrame(results).label.values
56
+ return [result["label"] for result in results]
67
57
  elif tasks[-1] == "feature_extraction":
68
- # extract [CLS] token embedding for each input and wrap in dataframe
69
- return pd.DataFrame([embedding[0][0] for embedding in results])
58
+ # Extract [CLS] token embedding for each input and return as list of lists
59
+ print(f"len(results): {len(results)}")
60
+ return [embedding[0][0] for embedding in results]
70
61
  else:
71
62
  return results
72
-
73
- def model_library(self):
74
- """
75
- Returns the model library name
76
- """
77
- return self.model.__class__.__module__.split(".")[0]
78
-
79
- def model_class(self):
80
- """
81
- Returns the model class name
82
- """
83
- return self.model.__class__.__name__
84
-
85
- def model_name(self):
86
- """
87
- Returns model name
88
- """
89
- return type(self.model).__name__
90
-
91
- def is_pytorch_model(self):
92
- return self.model_library() == "torch"
@@ -0,0 +1,42 @@
1
+ # Copyright © 2023-2024 ValidMind Inc. All rights reserved.
2
+ # See the LICENSE file in the root of this repository for details.
3
+ # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
+
5
+ from validmind.errors import MissingOrInvalidModelPredictFnError
6
+ from validmind.vm_models.model import VMModel
7
+
8
+
9
+ class MetadataModel(VMModel):
10
+ """
11
+ MetadataModel is designed to represent a model that is not available for inference
12
+ for various reasons but for which metadata and pre-computed predictions are available.
13
+
14
+ Model attributes are required since this will be the only information we can
15
+ collect and log about the model.
16
+
17
+ This class should not be instantiated directly. Instead call `vm.init_model()` and
18
+ pass in a dictionary with the required metadata as `attributes`.
19
+
20
+ Attributes:
21
+ attributes (ModelAttributes): The attributes of the model. Required.
22
+ input_id (str, optional): The input ID for the model. Defaults to None.
23
+ name (str, optional): The name of the model. Defaults to the class name.
24
+ """
25
+
26
+ def __post_init__(self):
27
+ if not getattr(self, "attributes"):
28
+ raise ValueError("MetadataModel requires attributes")
29
+
30
+ self.name = self.name or "Metadata Model"
31
+
32
+ def predict(self, *args, **kwargs):
33
+ """Not implemented for MetadataModel"""
34
+ raise MissingOrInvalidModelPredictFnError(
35
+ "MetadataModel does not support inference"
36
+ )
37
+
38
+ def predict_proba(self, *args, **kwargs):
39
+ """Not implemented for MetadataModel"""
40
+ raise MissingOrInvalidModelPredictFnError(
41
+ "MetadataModel does not support inference"
42
+ )
@@ -0,0 +1,66 @@
1
+ # Copyright © 2023-2024 ValidMind Inc. All rights reserved.
2
+ # See the LICENSE file in the root of this repository for details.
3
+ # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
+
5
+ from validmind.logging import get_logger
6
+ from validmind.vm_models.model import ModelAttributes, ModelPipeline, VMModel
7
+
8
+ logger = get_logger(__name__)
9
+
10
+
11
+ class PipelineModel(VMModel):
12
+ """
13
+ An base class that wraps a trained model instance and its associated data.
14
+
15
+ Attributes:
16
+ pipeline (ModelPipeline): A pipeline of models to be executed. ModelPipeline
17
+ is just a simple container class with a list that can be chained with the
18
+ `|` operator.
19
+ input_id (str, optional): The input ID for the model. Defaults to None.
20
+ attributes (ModelAttributes, optional): The attributes of the model. Defaults to None.
21
+ name (str, optional): The name of the model. Defaults to the class name.
22
+ """
23
+
24
+ predict_col: str = None
25
+
26
+ def __init__(
27
+ self,
28
+ pipeline: ModelPipeline,
29
+ attributes: ModelAttributes = None,
30
+ input_id: str = None,
31
+ name: str = None,
32
+ ):
33
+ self.pipeline = pipeline
34
+ self.input_id = input_id
35
+
36
+ self.language = "Python"
37
+ self.library = self.__class__.__name__
38
+ self.library_version = "N/A"
39
+ self.class_ = self.__class__.__name__
40
+
41
+ self.name = name or self.__class__.__name__
42
+
43
+ self.attributes = attributes
44
+
45
+ def __or__(self, other):
46
+ if not isinstance(other, VMModel):
47
+ raise ValueError("Can only chain VMModel objects")
48
+
49
+ return ModelPipeline([self, other])
50
+
51
+ def serialize(self):
52
+ """
53
+ Serializes the model to a dictionary so it can be sent to the API
54
+ """
55
+ return {
56
+ "attributes": self.attributes.__dict__,
57
+ }
58
+
59
+ def predict(self, X):
60
+ X = X.copy()
61
+
62
+ for model in self.pipeline.models:
63
+ predictions = model.predict(X)
64
+ X[model.input_id] = predictions
65
+
66
+ return predictions
@@ -4,37 +4,21 @@
4
4
 
5
5
  from validmind.errors import MissingOrInvalidModelPredictFnError
6
6
  from validmind.logging import get_logger
7
- from validmind.vm_models.model import (
8
- ModelAttributes,
9
- VMModel,
10
- has_method_with_arguments,
11
- )
7
+ from validmind.vm_models.model import VMModel, has_method_with_arguments
12
8
 
13
9
  logger = get_logger(__name__)
14
10
 
15
11
 
16
12
  class PyTorchModel(VMModel):
17
- """
18
- An PyTorch model class that wraps a trained model instance and its associated data.
13
+ """PyTorchModel class wraps a PyTorch model"""
19
14
 
20
- Attributes:
21
- attributes (ModelAttributes, optional): The attributes of the model. Defaults to None.
22
- model (object, optional): The trained model instance. Defaults to None.
23
- device_type(str, optional) The device where model is trained
24
- """
15
+ def __post_init__(self):
16
+ if not self.model:
17
+ raise ValueError("Model object is a required argument for PyTorchModel")
25
18
 
26
- def __init__(
27
- self,
28
- model: object = None, # Trained model instance
29
- input_id: str = None,
30
- attributes: ModelAttributes = None,
31
- ):
32
- super().__init__(
33
- model=model,
34
- input_id=input_id,
35
- attributes=attributes,
36
- )
37
- self._device_type = next(self.model.parameters()).device
19
+ self.library = "torch"
20
+ self.name = self.name or "PyTorch Neural Network"
21
+ self.device_type = next(self.model.parameters()).device
38
22
 
39
23
  def predict_proba(self, *args, **kwargs):
40
24
  """
@@ -61,21 +45,3 @@ class PyTorchModel(VMModel):
61
45
  import torch
62
46
 
63
47
  return self.model.predict(torch.tensor(args[0]).to(self.device_type))
64
-
65
- def model_library(self):
66
- """
67
- Returns the model library name
68
- """
69
- return "torch"
70
-
71
- def model_class(self):
72
- """
73
- Returns the model class name
74
- """
75
- return "PyTorchModel"
76
-
77
- def model_name(self):
78
- """
79
- Returns model architecture
80
- """
81
- return "PyTorch Neural Networks"
@@ -6,7 +6,7 @@ import numpy as np
6
6
  import pandas as pd
7
7
 
8
8
  from validmind.logging import get_logger
9
- from validmind.vm_models.model import ModelAttributes, VMModel
9
+ from validmind.vm_models.model import VMModel
10
10
 
11
11
  logger = get_logger(__name__)
12
12
 
@@ -16,49 +16,23 @@ def get_full_class_name(obj):
16
16
 
17
17
 
18
18
  class RModel(VMModel):
19
- """
20
- An R model class that wraps a "fitted" R model instance and its associated data.
21
-
22
- Attributes:
23
- attributes (ModelAttributes, optional): The attributes of the model. Defaults to None.
24
- model (object, optional): The trained model instance. Defaults to None.
25
- device_type(str, optional) The device where model is trained
26
- """
27
-
28
- def __init__(
29
- self,
30
- r: object = None, # R instance
31
- model: object = None, # Trained model instance
32
- attributes: ModelAttributes = None,
33
- ):
34
- self.r = r
35
- self._is_classification_model = False
36
-
37
- super().__init__(
38
- model=model,
39
- attributes=attributes,
19
+ """An R model class that wraps a "fitted" R model instance and its associated data."""
20
+
21
+ def __post_init__(self):
22
+ self.language = self.r["version"].rx2("version.string")[0]
23
+ self.library = self.class_ = "R"
24
+
25
+ name_map = {
26
+ "xgb.Booster": "XGBoost",
27
+ "glm": self.__glm_model_class(),
28
+ "lm": "Linear Regression",
29
+ }
30
+ self.name = self.name or name_map.get(
31
+ self.__model_class(), self.__model_class()
40
32
  )
41
33
 
42
34
  self._is_classification_model = self.__is_classification_model()
43
35
 
44
- def __get_predict_data_as_df(self, new_data):
45
- """
46
- Builds the correct data shape and format for the predict method when the
47
- caller has passed a Pandas dataframe as input. This function makes sure to
48
- adjust the shape of the input dataset to the predict() signature depending
49
- if it's a regular R model or an XGBoost model
50
- """
51
- if self.__model_class() == "xgb.Booster":
52
- return new_data.df.drop(new_data.target_column, axis=1)
53
-
54
- return new_data.df
55
-
56
- def __model_class(self):
57
- """
58
- Returns the model class name
59
- """
60
- return self.r["class"](self.model)[0]
61
-
62
36
  def __is_classification_model(self):
63
37
  """
64
38
  Only supported classification models are XGBClassifier and GLM with binomial family (logistic regression).
@@ -78,6 +52,24 @@ class RModel(VMModel):
78
52
 
79
53
  return False
80
54
 
55
+ def __get_predict_data_as_df(self, new_data):
56
+ """
57
+ Builds the correct data shape and format for the predict method when the
58
+ caller has passed a Pandas dataframe as input. This function makes sure to
59
+ adjust the shape of the input dataset to the predict() signature depending
60
+ if it's a regular R model or an XGBoost model
61
+ """
62
+ if self.__model_class() == "xgb.Booster":
63
+ return new_data.df.drop(new_data.target_column, axis=1)
64
+
65
+ return new_data.df
66
+
67
+ def __model_class(self):
68
+ """
69
+ Returns the model class name
70
+ """
71
+ return self.r["class"](self.model)[0]
72
+
81
73
  def __glm_model_class(self):
82
74
  """
83
75
  Returns the model class name for GLM models which include family and link function
@@ -142,9 +134,7 @@ class RModel(VMModel):
142
134
 
143
135
  if new_data_class == "numpy.ndarray":
144
136
  # We need to reconstruct the DataFrame from the ndarray using the column names
145
- new_data = pd.DataFrame(
146
- new_data, columns=self.test_ds.get_features_columns()
147
- )
137
+ new_data = pd.DataFrame(new_data, columns=self.test_ds.feature_columns)
148
138
  elif new_data_class != "pandas.core.frame.DataFrame":
149
139
  raise ValueError(
150
140
  f"new_data must be a DataFrame or ndarray. Got {new_data_class}"
@@ -163,45 +153,6 @@ class RModel(VMModel):
163
153
 
164
154
  return predicted_probs
165
155
 
166
- def model_language(self):
167
- """
168
- Returns the model library name
169
- """
170
- return self.r["version"].rx2("version.string")[0]
171
-
172
- def model_library(self):
173
- """
174
- Returns the model library name
175
- """
176
- return "R"
177
-
178
- def model_library_version(self, *args, **kwargs):
179
- """
180
- Model framework library version
181
- """
182
- return "N/A"
183
-
184
- def model_class(self):
185
- """
186
- Returns the model class name
187
- """
188
- return "R"
189
-
190
- def model_name(self):
191
- """
192
- Returns model name
193
- """
194
- model_class_name = self.__model_class()
195
-
196
- if model_class_name == "lm":
197
- return "Linear Regression"
198
- elif model_class_name == "xgb.Booster":
199
- return "XGBoost"
200
- elif model_class_name == "glm":
201
- return self.__glm_model_class()
202
-
203
- return model_class_name
204
-
205
156
  def regression_coefficients(self):
206
157
  """
207
158
  Returns the regression coefficients summary of the model
@@ -2,38 +2,23 @@
2
2
  # See the LICENSE file in the root of this repository for details.
3
3
  # SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
4
4
 
5
+ import pandas as pd
6
+
5
7
  from validmind.errors import MissingOrInvalidModelPredictFnError
6
8
  from validmind.logging import get_logger
7
- from validmind.vm_models.model import (
8
- ModelAttributes,
9
- VMModel,
10
- has_method_with_arguments,
11
- )
9
+ from validmind.vm_models.model import VMModel, has_method_with_arguments
12
10
 
13
11
  logger = get_logger(__name__)
14
12
 
15
13
 
16
14
  class SKlearnModel(VMModel):
17
- """
18
- An SKlearn model class that wraps a trained model instance and its associated data.
19
-
20
- Attributes:
21
- attributes (ModelAttributes, optional): The attributes of the model. Defaults to None.
22
- model (object, optional): The trained model instance. Defaults to None.
23
- device_type(str, optional) The device where model is trained
24
- """
25
-
26
- def __init__(
27
- self,
28
- model: object = None, # Trained model instance
29
- input_id: str = None,
30
- attributes: ModelAttributes = None,
31
- ):
32
- super().__init__(
33
- model=model,
34
- input_id=input_id,
35
- attributes=attributes,
36
- )
15
+ def __post_init__(self):
16
+ if not self.model:
17
+ raise ValueError("Model object is a required argument for SKlearnModel")
18
+
19
+ self.library = self.model.__class__.__module__.split(".")[0]
20
+ self.class_ = self.model.__class__.__name__
21
+ self.name = self.name or type(self.model).__name__
37
22
 
38
23
  def predict_proba(self, *args, **kwargs):
39
24
  """
@@ -54,20 +39,36 @@ class SKlearnModel(VMModel):
54
39
  """
55
40
  return self.model.predict(*args, **kwargs)
56
41
 
57
- def model_library(self):
58
- """
59
- Returns the model library name
60
- """
61
- return self.model.__class__.__module__.split(".")[0]
62
42
 
63
- def model_class(self):
64
- """
65
- Returns the model class name
66
- """
67
- return self.model.__class__.__name__
43
+ class CatBoostModel(SKlearnModel):
44
+ """Wrapper for CatBoost model"""
45
+
46
+ pass
47
+
68
48
 
69
- def model_name(self):
49
+ class XGBoostModel(SKlearnModel):
50
+ """Wrapper for XGBoost model"""
51
+
52
+ def __post_init__(self):
53
+ super().__post_init__()
54
+ self.library = "xgboost"
55
+
56
+
57
+ class StatsModelsModel(SKlearnModel):
58
+ """Wrapper for StatsModels model"""
59
+
60
+ def __post_init__(self):
61
+ super().__post_init__()
62
+ self.library = "statsmodels"
63
+
64
+ def regression_coefficients(self):
70
65
  """
71
- Returns model name
66
+ Returns the regression coefficients summary of the model
72
67
  """
73
- return type(self.model).__name__
68
+ raw_summary = self.model.summary()
69
+
70
+ table = raw_summary.tables[1].data
71
+ headers = table.pop(0)
72
+ headers[0] = "Feature"
73
+
74
+ return pd.DataFrame(table, columns=headers)