validmind 2.1.1__py3-none-any.whl → 2.2.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- validmind/__version__.py +1 -1
- validmind/ai.py +72 -49
- validmind/api_client.py +42 -16
- validmind/client.py +68 -25
- validmind/datasets/llm/rag/__init__.py +11 -0
- validmind/datasets/llm/rag/datasets/rfp_existing_questions_client_1.csv +30 -0
- validmind/datasets/llm/rag/datasets/rfp_existing_questions_client_2.csv +30 -0
- validmind/datasets/llm/rag/datasets/rfp_existing_questions_client_3.csv +53 -0
- validmind/datasets/llm/rag/datasets/rfp_existing_questions_client_4.csv +53 -0
- validmind/datasets/llm/rag/datasets/rfp_existing_questions_client_5.csv +53 -0
- validmind/datasets/llm/rag/rfp.py +41 -0
- validmind/errors.py +1 -1
- validmind/html_templates/__init__.py +0 -0
- validmind/html_templates/content_blocks.py +89 -14
- validmind/models/__init__.py +7 -4
- validmind/models/foundation.py +8 -34
- validmind/models/function.py +51 -0
- validmind/models/huggingface.py +16 -46
- validmind/models/metadata.py +42 -0
- validmind/models/pipeline.py +66 -0
- validmind/models/pytorch.py +8 -42
- validmind/models/r_model.py +33 -82
- validmind/models/sklearn.py +39 -38
- validmind/template.py +8 -26
- validmind/tests/__init__.py +43 -20
- validmind/tests/data_validation/ANOVAOneWayTable.py +1 -1
- validmind/tests/data_validation/ChiSquaredFeaturesTable.py +1 -1
- validmind/tests/data_validation/DescriptiveStatistics.py +2 -4
- validmind/tests/data_validation/Duplicates.py +1 -1
- validmind/tests/data_validation/IsolationForestOutliers.py +2 -2
- validmind/tests/data_validation/LaggedCorrelationHeatmap.py +1 -1
- validmind/tests/data_validation/TargetRateBarPlots.py +1 -1
- validmind/tests/data_validation/nlp/LanguageDetection.py +59 -0
- validmind/tests/data_validation/nlp/PolarityAndSubjectivity.py +48 -0
- validmind/tests/data_validation/nlp/Punctuations.py +11 -12
- validmind/tests/data_validation/nlp/Sentiment.py +57 -0
- validmind/tests/data_validation/nlp/Toxicity.py +45 -0
- validmind/tests/decorator.py +12 -7
- validmind/tests/model_validation/BertScore.py +100 -98
- validmind/tests/model_validation/BleuScore.py +93 -64
- validmind/tests/model_validation/ContextualRecall.py +74 -91
- validmind/tests/model_validation/MeteorScore.py +86 -74
- validmind/tests/model_validation/RegardScore.py +103 -121
- validmind/tests/model_validation/RougeScore.py +118 -0
- validmind/tests/model_validation/TokenDisparity.py +84 -121
- validmind/tests/model_validation/ToxicityScore.py +109 -123
- validmind/tests/model_validation/embeddings/CosineSimilarityComparison.py +96 -0
- validmind/tests/model_validation/embeddings/CosineSimilarityHeatmap.py +71 -0
- validmind/tests/model_validation/embeddings/EuclideanDistanceComparison.py +92 -0
- validmind/tests/model_validation/embeddings/EuclideanDistanceHeatmap.py +69 -0
- validmind/tests/model_validation/embeddings/PCAComponentsPairwisePlots.py +78 -0
- validmind/tests/model_validation/embeddings/StabilityAnalysis.py +35 -23
- validmind/tests/model_validation/embeddings/StabilityAnalysisKeyword.py +3 -0
- validmind/tests/model_validation/embeddings/StabilityAnalysisRandomNoise.py +7 -1
- validmind/tests/model_validation/embeddings/StabilityAnalysisSynonyms.py +3 -0
- validmind/tests/model_validation/embeddings/StabilityAnalysisTranslation.py +3 -0
- validmind/tests/model_validation/embeddings/TSNEComponentsPairwisePlots.py +99 -0
- validmind/tests/model_validation/ragas/AnswerCorrectness.py +131 -0
- validmind/tests/model_validation/ragas/AnswerRelevance.py +134 -0
- validmind/tests/model_validation/ragas/AnswerSimilarity.py +119 -0
- validmind/tests/model_validation/ragas/AspectCritique.py +167 -0
- validmind/tests/model_validation/ragas/ContextEntityRecall.py +133 -0
- validmind/tests/model_validation/ragas/ContextPrecision.py +123 -0
- validmind/tests/model_validation/ragas/ContextRecall.py +123 -0
- validmind/tests/model_validation/ragas/ContextRelevancy.py +114 -0
- validmind/tests/model_validation/ragas/Faithfulness.py +119 -0
- validmind/tests/model_validation/ragas/utils.py +66 -0
- validmind/tests/model_validation/sklearn/OverfitDiagnosis.py +3 -7
- validmind/tests/model_validation/sklearn/PermutationFeatureImportance.py +8 -9
- validmind/tests/model_validation/sklearn/PopulationStabilityIndex.py +5 -10
- validmind/tests/model_validation/sklearn/PrecisionRecallCurve.py +3 -2
- validmind/tests/model_validation/sklearn/ROCCurve.py +2 -1
- validmind/tests/model_validation/sklearn/RegressionR2Square.py +1 -1
- validmind/tests/model_validation/sklearn/RobustnessDiagnosis.py +2 -3
- validmind/tests/model_validation/sklearn/SHAPGlobalImportance.py +7 -11
- validmind/tests/model_validation/sklearn/WeakspotsDiagnosis.py +3 -4
- validmind/tests/model_validation/statsmodels/RegressionModelForecastPlot.py +1 -1
- validmind/tests/model_validation/statsmodels/RegressionModelForecastPlotLevels.py +1 -1
- validmind/tests/model_validation/statsmodels/RegressionModelInsampleComparison.py +1 -1
- validmind/tests/model_validation/statsmodels/RegressionModelOutsampleComparison.py +1 -1
- validmind/tests/model_validation/statsmodels/RegressionModelSummary.py +1 -1
- validmind/tests/model_validation/statsmodels/RegressionModelsCoeffs.py +1 -1
- validmind/tests/model_validation/statsmodels/RegressionModelsPerformance.py +1 -1
- validmind/tests/model_validation/statsmodels/ScorecardHistogram.py +5 -6
- validmind/unit_metrics/__init__.py +26 -49
- validmind/unit_metrics/composite.py +13 -7
- validmind/unit_metrics/regression/sklearn/AdjustedRSquaredScore.py +1 -1
- validmind/utils.py +99 -6
- validmind/vm_models/__init__.py +1 -1
- validmind/vm_models/dataset/__init__.py +7 -0
- validmind/vm_models/dataset/dataset.py +560 -0
- validmind/vm_models/dataset/utils.py +146 -0
- validmind/vm_models/model.py +97 -72
- validmind/vm_models/test/metric.py +9 -24
- validmind/vm_models/test/result_wrapper.py +124 -28
- validmind/vm_models/test/threshold_test.py +10 -28
- validmind/vm_models/test_context.py +1 -1
- validmind/vm_models/test_suite/summary.py +3 -4
- {validmind-2.1.1.dist-info → validmind-2.2.4.dist-info}/METADATA +5 -3
- {validmind-2.1.1.dist-info → validmind-2.2.4.dist-info}/RECORD +103 -78
- validmind/models/catboost.py +0 -33
- validmind/models/statsmodels.py +0 -50
- validmind/models/xgboost.py +0 -30
- validmind/tests/model_validation/BertScoreAggregate.py +0 -90
- validmind/tests/model_validation/RegardHistogram.py +0 -148
- validmind/tests/model_validation/RougeMetrics.py +0 -147
- validmind/tests/model_validation/RougeMetricsAggregate.py +0 -133
- validmind/tests/model_validation/SelfCheckNLIScore.py +0 -112
- validmind/tests/model_validation/ToxicityHistogram.py +0 -136
- validmind/vm_models/dataset.py +0 -1303
- {validmind-2.1.1.dist-info → validmind-2.2.4.dist-info}/LICENSE +0 -0
- {validmind-2.1.1.dist-info → validmind-2.2.4.dist-info}/WHEEL +0 -0
- {validmind-2.1.1.dist-info → validmind-2.2.4.dist-info}/entry_points.txt +0 -0
validmind/models/foundation.py
CHANGED
@@ -7,7 +7,7 @@ from dataclasses import dataclass
|
|
7
7
|
import pandas as pd
|
8
8
|
|
9
9
|
from validmind.logging import get_logger
|
10
|
-
from validmind.
|
10
|
+
from validmind.models.function import FunctionModel
|
11
11
|
|
12
12
|
logger = get_logger(__name__)
|
13
13
|
|
@@ -18,7 +18,7 @@ class Prompt:
|
|
18
18
|
variables: list
|
19
19
|
|
20
20
|
|
21
|
-
class FoundationModel(
|
21
|
+
class FoundationModel(FunctionModel):
|
22
22
|
"""FoundationModel class wraps a Foundation LLM endpoint
|
23
23
|
|
24
24
|
This class wraps a predict function that is user-defined and adapts it to works
|
@@ -29,22 +29,14 @@ class FoundationModel(VMModel):
|
|
29
29
|
and return the result from the model
|
30
30
|
prompt (Prompt): The prompt object that defines the prompt template and the
|
31
31
|
variables (if any)
|
32
|
-
|
32
|
+
name (str, optional): The name of the model. Defaults to name of the predict_fn
|
33
33
|
"""
|
34
34
|
|
35
|
-
def
|
36
|
-
self,
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
input_id: str = None,
|
41
|
-
):
|
42
|
-
super().__init__(
|
43
|
-
attributes=attributes,
|
44
|
-
input_id=input_id,
|
45
|
-
)
|
46
|
-
self.predict_fn = predict_fn
|
47
|
-
self.prompt = prompt
|
35
|
+
def __post_init__(self):
|
36
|
+
if not getattr(self, "predict_fn") or not callable(self.predict_fn):
|
37
|
+
raise ValueError("FoundationModel requires a callable predict_fn")
|
38
|
+
|
39
|
+
self.name = self.name or self.predict_fn.__name__
|
48
40
|
|
49
41
|
def _build_prompt(self, x: pd.DataFrame):
|
50
42
|
"""
|
@@ -59,21 +51,3 @@ class FoundationModel(VMModel):
|
|
59
51
|
Predict method for the model. This is a wrapper around the model's
|
60
52
|
"""
|
61
53
|
return [self.predict_fn(self._build_prompt(x[1])) for x in X.iterrows()]
|
62
|
-
|
63
|
-
def model_library(self):
|
64
|
-
"""
|
65
|
-
Returns the model library name
|
66
|
-
"""
|
67
|
-
return "FoundationModel"
|
68
|
-
|
69
|
-
def model_class(self):
|
70
|
-
"""
|
71
|
-
Returns the model class name
|
72
|
-
"""
|
73
|
-
return "FoundationModel"
|
74
|
-
|
75
|
-
def model_name(self):
|
76
|
-
"""
|
77
|
-
Returns model name
|
78
|
-
"""
|
79
|
-
return "FoundationModel"
|
@@ -0,0 +1,51 @@
|
|
1
|
+
# Copyright © 2023-2024 ValidMind Inc. All rights reserved.
|
2
|
+
# See the LICENSE file in the root of this repository for details.
|
3
|
+
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
|
+
|
5
|
+
from validmind.vm_models.model import VMModel
|
6
|
+
|
7
|
+
|
8
|
+
# semi-immutable dict
|
9
|
+
class Input(dict):
|
10
|
+
def __init__(self, *args, **kwargs):
|
11
|
+
super().__init__(*args, **kwargs)
|
12
|
+
self._new = set()
|
13
|
+
|
14
|
+
def __setitem__(self, key, value):
|
15
|
+
self._new.add(key)
|
16
|
+
super().__setitem__(key, value)
|
17
|
+
|
18
|
+
def __delitem__(self, _):
|
19
|
+
raise TypeError("Cannot delete keys from Input")
|
20
|
+
|
21
|
+
def get_new(self):
|
22
|
+
return {k: self[k] for k in self._new}
|
23
|
+
|
24
|
+
|
25
|
+
class FunctionModel(VMModel):
|
26
|
+
"""
|
27
|
+
FunctionModel class wraps a user-defined predict function
|
28
|
+
|
29
|
+
Attributes:
|
30
|
+
predict_fn (callable): The predict function that should take a dictionary of
|
31
|
+
input features and return a prediction.
|
32
|
+
input_id (str, optional): The input ID for the model. Defaults to None.
|
33
|
+
name (str, optional): The name of the model. Defaults to the name of the predict_fn.
|
34
|
+
"""
|
35
|
+
|
36
|
+
def __post_init__(self):
|
37
|
+
if not getattr(self, "predict_fn") or not callable(self.predict_fn):
|
38
|
+
raise ValueError("FunctionModel requires a callable predict_fn")
|
39
|
+
|
40
|
+
self.name = self.name or self.predict_fn.__name__
|
41
|
+
|
42
|
+
def predict(self, X):
|
43
|
+
"""Compute predictions for the input (X)
|
44
|
+
|
45
|
+
Args:
|
46
|
+
X (pandas.DataFrame): The input features to predict on
|
47
|
+
|
48
|
+
Returns:
|
49
|
+
list: The predictions
|
50
|
+
"""
|
51
|
+
return [self.predict_fn(x) for x in X.to_dict(orient="records")]
|
validmind/models/huggingface.py
CHANGED
@@ -4,41 +4,32 @@
|
|
4
4
|
|
5
5
|
from dataclasses import dataclass
|
6
6
|
|
7
|
-
import pandas as pd
|
8
|
-
|
9
7
|
from validmind.errors import MissingOrInvalidModelPredictFnError
|
10
8
|
from validmind.logging import get_logger
|
11
|
-
from validmind.vm_models.model import
|
12
|
-
ModelAttributes,
|
13
|
-
VMModel,
|
14
|
-
has_method_with_arguments,
|
15
|
-
)
|
9
|
+
from validmind.vm_models.model import VMModel, has_method_with_arguments
|
16
10
|
|
17
11
|
logger = get_logger(__name__)
|
18
12
|
|
19
13
|
|
20
14
|
@dataclass
|
21
15
|
class HFModel(VMModel):
|
22
|
-
"""
|
23
|
-
An Hugging Face model class that wraps a trained model instance and its associated data.
|
24
|
-
|
25
|
-
Attributes:
|
26
|
-
attributes (ModelAttributes, optional): The attributes of the model. Defaults to None.
|
27
|
-
model (object, optional): The trained model instance. Defaults to None.
|
28
|
-
"""
|
29
|
-
|
30
16
|
def __init__(
|
31
17
|
self,
|
32
18
|
input_id: str = None,
|
33
|
-
model: object = None,
|
34
|
-
attributes:
|
19
|
+
model: object = None,
|
20
|
+
attributes: object = None,
|
21
|
+
name: str = None,
|
22
|
+
**kwargs,
|
35
23
|
):
|
36
24
|
super().__init__(
|
37
|
-
model=model,
|
38
|
-
input_id=input_id,
|
39
|
-
attributes=attributes,
|
25
|
+
input_id=input_id, model=model, attributes=attributes, name=name, **kwargs
|
40
26
|
)
|
41
27
|
|
28
|
+
def __post_init__(self):
|
29
|
+
self.library = self.model.__class__.__module__.split(".")[0]
|
30
|
+
self.class_ = self.model.__class__.__name__
|
31
|
+
self.name = self.name or type(self.model).__name__
|
32
|
+
|
42
33
|
def predict_proba(self, *args, **kwargs):
|
43
34
|
"""
|
44
35
|
Invoke predict_proba from underline model
|
@@ -57,36 +48,15 @@ class HFModel(VMModel):
|
|
57
48
|
Predict method for the model. This is a wrapper around the HF model's pipeline function
|
58
49
|
"""
|
59
50
|
results = self.model([str(datapoint) for datapoint in data])
|
60
|
-
|
61
51
|
tasks = self.model.__class__.__module__.split(".")
|
62
52
|
|
63
53
|
if "text2text_generation" in tasks:
|
64
|
-
return
|
54
|
+
return [result["summary_text"] for result in results]
|
65
55
|
elif "text_classification" in tasks:
|
66
|
-
return
|
56
|
+
return [result["label"] for result in results]
|
67
57
|
elif tasks[-1] == "feature_extraction":
|
68
|
-
#
|
69
|
-
|
58
|
+
# Extract [CLS] token embedding for each input and return as list of lists
|
59
|
+
print(f"len(results): {len(results)}")
|
60
|
+
return [embedding[0][0] for embedding in results]
|
70
61
|
else:
|
71
62
|
return results
|
72
|
-
|
73
|
-
def model_library(self):
|
74
|
-
"""
|
75
|
-
Returns the model library name
|
76
|
-
"""
|
77
|
-
return self.model.__class__.__module__.split(".")[0]
|
78
|
-
|
79
|
-
def model_class(self):
|
80
|
-
"""
|
81
|
-
Returns the model class name
|
82
|
-
"""
|
83
|
-
return self.model.__class__.__name__
|
84
|
-
|
85
|
-
def model_name(self):
|
86
|
-
"""
|
87
|
-
Returns model name
|
88
|
-
"""
|
89
|
-
return type(self.model).__name__
|
90
|
-
|
91
|
-
def is_pytorch_model(self):
|
92
|
-
return self.model_library() == "torch"
|
@@ -0,0 +1,42 @@
|
|
1
|
+
# Copyright © 2023-2024 ValidMind Inc. All rights reserved.
|
2
|
+
# See the LICENSE file in the root of this repository for details.
|
3
|
+
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
|
+
|
5
|
+
from validmind.errors import MissingOrInvalidModelPredictFnError
|
6
|
+
from validmind.vm_models.model import VMModel
|
7
|
+
|
8
|
+
|
9
|
+
class MetadataModel(VMModel):
|
10
|
+
"""
|
11
|
+
MetadataModel is designed to represent a model that is not available for inference
|
12
|
+
for various reasons but for which metadata and pre-computed predictions are available.
|
13
|
+
|
14
|
+
Model attributes are required since this will be the only information we can
|
15
|
+
collect and log about the model.
|
16
|
+
|
17
|
+
This class should not be instantiated directly. Instead call `vm.init_model()` and
|
18
|
+
pass in a dictionary with the required metadata as `attributes`.
|
19
|
+
|
20
|
+
Attributes:
|
21
|
+
attributes (ModelAttributes): The attributes of the model. Required.
|
22
|
+
input_id (str, optional): The input ID for the model. Defaults to None.
|
23
|
+
name (str, optional): The name of the model. Defaults to the class name.
|
24
|
+
"""
|
25
|
+
|
26
|
+
def __post_init__(self):
|
27
|
+
if not getattr(self, "attributes"):
|
28
|
+
raise ValueError("MetadataModel requires attributes")
|
29
|
+
|
30
|
+
self.name = self.name or "Metadata Model"
|
31
|
+
|
32
|
+
def predict(self, *args, **kwargs):
|
33
|
+
"""Not implemented for MetadataModel"""
|
34
|
+
raise MissingOrInvalidModelPredictFnError(
|
35
|
+
"MetadataModel does not support inference"
|
36
|
+
)
|
37
|
+
|
38
|
+
def predict_proba(self, *args, **kwargs):
|
39
|
+
"""Not implemented for MetadataModel"""
|
40
|
+
raise MissingOrInvalidModelPredictFnError(
|
41
|
+
"MetadataModel does not support inference"
|
42
|
+
)
|
@@ -0,0 +1,66 @@
|
|
1
|
+
# Copyright © 2023-2024 ValidMind Inc. All rights reserved.
|
2
|
+
# See the LICENSE file in the root of this repository for details.
|
3
|
+
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
|
+
|
5
|
+
from validmind.logging import get_logger
|
6
|
+
from validmind.vm_models.model import ModelAttributes, ModelPipeline, VMModel
|
7
|
+
|
8
|
+
logger = get_logger(__name__)
|
9
|
+
|
10
|
+
|
11
|
+
class PipelineModel(VMModel):
|
12
|
+
"""
|
13
|
+
An base class that wraps a trained model instance and its associated data.
|
14
|
+
|
15
|
+
Attributes:
|
16
|
+
pipeline (ModelPipeline): A pipeline of models to be executed. ModelPipeline
|
17
|
+
is just a simple container class with a list that can be chained with the
|
18
|
+
`|` operator.
|
19
|
+
input_id (str, optional): The input ID for the model. Defaults to None.
|
20
|
+
attributes (ModelAttributes, optional): The attributes of the model. Defaults to None.
|
21
|
+
name (str, optional): The name of the model. Defaults to the class name.
|
22
|
+
"""
|
23
|
+
|
24
|
+
predict_col: str = None
|
25
|
+
|
26
|
+
def __init__(
|
27
|
+
self,
|
28
|
+
pipeline: ModelPipeline,
|
29
|
+
attributes: ModelAttributes = None,
|
30
|
+
input_id: str = None,
|
31
|
+
name: str = None,
|
32
|
+
):
|
33
|
+
self.pipeline = pipeline
|
34
|
+
self.input_id = input_id
|
35
|
+
|
36
|
+
self.language = "Python"
|
37
|
+
self.library = self.__class__.__name__
|
38
|
+
self.library_version = "N/A"
|
39
|
+
self.class_ = self.__class__.__name__
|
40
|
+
|
41
|
+
self.name = name or self.__class__.__name__
|
42
|
+
|
43
|
+
self.attributes = attributes
|
44
|
+
|
45
|
+
def __or__(self, other):
|
46
|
+
if not isinstance(other, VMModel):
|
47
|
+
raise ValueError("Can only chain VMModel objects")
|
48
|
+
|
49
|
+
return ModelPipeline([self, other])
|
50
|
+
|
51
|
+
def serialize(self):
|
52
|
+
"""
|
53
|
+
Serializes the model to a dictionary so it can be sent to the API
|
54
|
+
"""
|
55
|
+
return {
|
56
|
+
"attributes": self.attributes.__dict__,
|
57
|
+
}
|
58
|
+
|
59
|
+
def predict(self, X):
|
60
|
+
X = X.copy()
|
61
|
+
|
62
|
+
for model in self.pipeline.models:
|
63
|
+
predictions = model.predict(X)
|
64
|
+
X[model.input_id] = predictions
|
65
|
+
|
66
|
+
return predictions
|
validmind/models/pytorch.py
CHANGED
@@ -4,37 +4,21 @@
|
|
4
4
|
|
5
5
|
from validmind.errors import MissingOrInvalidModelPredictFnError
|
6
6
|
from validmind.logging import get_logger
|
7
|
-
from validmind.vm_models.model import
|
8
|
-
ModelAttributes,
|
9
|
-
VMModel,
|
10
|
-
has_method_with_arguments,
|
11
|
-
)
|
7
|
+
from validmind.vm_models.model import VMModel, has_method_with_arguments
|
12
8
|
|
13
9
|
logger = get_logger(__name__)
|
14
10
|
|
15
11
|
|
16
12
|
class PyTorchModel(VMModel):
|
17
|
-
"""
|
18
|
-
An PyTorch model class that wraps a trained model instance and its associated data.
|
13
|
+
"""PyTorchModel class wraps a PyTorch model"""
|
19
14
|
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
device_type(str, optional) The device where model is trained
|
24
|
-
"""
|
15
|
+
def __post_init__(self):
|
16
|
+
if not self.model:
|
17
|
+
raise ValueError("Model object is a required argument for PyTorchModel")
|
25
18
|
|
26
|
-
|
27
|
-
self
|
28
|
-
|
29
|
-
input_id: str = None,
|
30
|
-
attributes: ModelAttributes = None,
|
31
|
-
):
|
32
|
-
super().__init__(
|
33
|
-
model=model,
|
34
|
-
input_id=input_id,
|
35
|
-
attributes=attributes,
|
36
|
-
)
|
37
|
-
self._device_type = next(self.model.parameters()).device
|
19
|
+
self.library = "torch"
|
20
|
+
self.name = self.name or "PyTorch Neural Network"
|
21
|
+
self.device_type = next(self.model.parameters()).device
|
38
22
|
|
39
23
|
def predict_proba(self, *args, **kwargs):
|
40
24
|
"""
|
@@ -61,21 +45,3 @@ class PyTorchModel(VMModel):
|
|
61
45
|
import torch
|
62
46
|
|
63
47
|
return self.model.predict(torch.tensor(args[0]).to(self.device_type))
|
64
|
-
|
65
|
-
def model_library(self):
|
66
|
-
"""
|
67
|
-
Returns the model library name
|
68
|
-
"""
|
69
|
-
return "torch"
|
70
|
-
|
71
|
-
def model_class(self):
|
72
|
-
"""
|
73
|
-
Returns the model class name
|
74
|
-
"""
|
75
|
-
return "PyTorchModel"
|
76
|
-
|
77
|
-
def model_name(self):
|
78
|
-
"""
|
79
|
-
Returns model architecture
|
80
|
-
"""
|
81
|
-
return "PyTorch Neural Networks"
|
validmind/models/r_model.py
CHANGED
@@ -6,7 +6,7 @@ import numpy as np
|
|
6
6
|
import pandas as pd
|
7
7
|
|
8
8
|
from validmind.logging import get_logger
|
9
|
-
from validmind.vm_models.model import
|
9
|
+
from validmind.vm_models.model import VMModel
|
10
10
|
|
11
11
|
logger = get_logger(__name__)
|
12
12
|
|
@@ -16,49 +16,23 @@ def get_full_class_name(obj):
|
|
16
16
|
|
17
17
|
|
18
18
|
class RModel(VMModel):
|
19
|
-
"""
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
attributes: ModelAttributes = None,
|
33
|
-
):
|
34
|
-
self.r = r
|
35
|
-
self._is_classification_model = False
|
36
|
-
|
37
|
-
super().__init__(
|
38
|
-
model=model,
|
39
|
-
attributes=attributes,
|
19
|
+
"""An R model class that wraps a "fitted" R model instance and its associated data."""
|
20
|
+
|
21
|
+
def __post_init__(self):
|
22
|
+
self.language = self.r["version"].rx2("version.string")[0]
|
23
|
+
self.library = self.class_ = "R"
|
24
|
+
|
25
|
+
name_map = {
|
26
|
+
"xgb.Booster": "XGBoost",
|
27
|
+
"glm": self.__glm_model_class(),
|
28
|
+
"lm": "Linear Regression",
|
29
|
+
}
|
30
|
+
self.name = self.name or name_map.get(
|
31
|
+
self.__model_class(), self.__model_class()
|
40
32
|
)
|
41
33
|
|
42
34
|
self._is_classification_model = self.__is_classification_model()
|
43
35
|
|
44
|
-
def __get_predict_data_as_df(self, new_data):
|
45
|
-
"""
|
46
|
-
Builds the correct data shape and format for the predict method when the
|
47
|
-
caller has passed a Pandas dataframe as input. This function makes sure to
|
48
|
-
adjust the shape of the input dataset to the predict() signature depending
|
49
|
-
if it's a regular R model or an XGBoost model
|
50
|
-
"""
|
51
|
-
if self.__model_class() == "xgb.Booster":
|
52
|
-
return new_data.df.drop(new_data.target_column, axis=1)
|
53
|
-
|
54
|
-
return new_data.df
|
55
|
-
|
56
|
-
def __model_class(self):
|
57
|
-
"""
|
58
|
-
Returns the model class name
|
59
|
-
"""
|
60
|
-
return self.r["class"](self.model)[0]
|
61
|
-
|
62
36
|
def __is_classification_model(self):
|
63
37
|
"""
|
64
38
|
Only supported classification models are XGBClassifier and GLM with binomial family (logistic regression).
|
@@ -78,6 +52,24 @@ class RModel(VMModel):
|
|
78
52
|
|
79
53
|
return False
|
80
54
|
|
55
|
+
def __get_predict_data_as_df(self, new_data):
|
56
|
+
"""
|
57
|
+
Builds the correct data shape and format for the predict method when the
|
58
|
+
caller has passed a Pandas dataframe as input. This function makes sure to
|
59
|
+
adjust the shape of the input dataset to the predict() signature depending
|
60
|
+
if it's a regular R model or an XGBoost model
|
61
|
+
"""
|
62
|
+
if self.__model_class() == "xgb.Booster":
|
63
|
+
return new_data.df.drop(new_data.target_column, axis=1)
|
64
|
+
|
65
|
+
return new_data.df
|
66
|
+
|
67
|
+
def __model_class(self):
|
68
|
+
"""
|
69
|
+
Returns the model class name
|
70
|
+
"""
|
71
|
+
return self.r["class"](self.model)[0]
|
72
|
+
|
81
73
|
def __glm_model_class(self):
|
82
74
|
"""
|
83
75
|
Returns the model class name for GLM models which include family and link function
|
@@ -142,9 +134,7 @@ class RModel(VMModel):
|
|
142
134
|
|
143
135
|
if new_data_class == "numpy.ndarray":
|
144
136
|
# We need to reconstruct the DataFrame from the ndarray using the column names
|
145
|
-
new_data = pd.DataFrame(
|
146
|
-
new_data, columns=self.test_ds.get_features_columns()
|
147
|
-
)
|
137
|
+
new_data = pd.DataFrame(new_data, columns=self.test_ds.feature_columns)
|
148
138
|
elif new_data_class != "pandas.core.frame.DataFrame":
|
149
139
|
raise ValueError(
|
150
140
|
f"new_data must be a DataFrame or ndarray. Got {new_data_class}"
|
@@ -163,45 +153,6 @@ class RModel(VMModel):
|
|
163
153
|
|
164
154
|
return predicted_probs
|
165
155
|
|
166
|
-
def model_language(self):
|
167
|
-
"""
|
168
|
-
Returns the model library name
|
169
|
-
"""
|
170
|
-
return self.r["version"].rx2("version.string")[0]
|
171
|
-
|
172
|
-
def model_library(self):
|
173
|
-
"""
|
174
|
-
Returns the model library name
|
175
|
-
"""
|
176
|
-
return "R"
|
177
|
-
|
178
|
-
def model_library_version(self, *args, **kwargs):
|
179
|
-
"""
|
180
|
-
Model framework library version
|
181
|
-
"""
|
182
|
-
return "N/A"
|
183
|
-
|
184
|
-
def model_class(self):
|
185
|
-
"""
|
186
|
-
Returns the model class name
|
187
|
-
"""
|
188
|
-
return "R"
|
189
|
-
|
190
|
-
def model_name(self):
|
191
|
-
"""
|
192
|
-
Returns model name
|
193
|
-
"""
|
194
|
-
model_class_name = self.__model_class()
|
195
|
-
|
196
|
-
if model_class_name == "lm":
|
197
|
-
return "Linear Regression"
|
198
|
-
elif model_class_name == "xgb.Booster":
|
199
|
-
return "XGBoost"
|
200
|
-
elif model_class_name == "glm":
|
201
|
-
return self.__glm_model_class()
|
202
|
-
|
203
|
-
return model_class_name
|
204
|
-
|
205
156
|
def regression_coefficients(self):
|
206
157
|
"""
|
207
158
|
Returns the regression coefficients summary of the model
|
validmind/models/sklearn.py
CHANGED
@@ -2,38 +2,23 @@
|
|
2
2
|
# See the LICENSE file in the root of this repository for details.
|
3
3
|
# SPDX-License-Identifier: AGPL-3.0 AND ValidMind Commercial
|
4
4
|
|
5
|
+
import pandas as pd
|
6
|
+
|
5
7
|
from validmind.errors import MissingOrInvalidModelPredictFnError
|
6
8
|
from validmind.logging import get_logger
|
7
|
-
from validmind.vm_models.model import
|
8
|
-
ModelAttributes,
|
9
|
-
VMModel,
|
10
|
-
has_method_with_arguments,
|
11
|
-
)
|
9
|
+
from validmind.vm_models.model import VMModel, has_method_with_arguments
|
12
10
|
|
13
11
|
logger = get_logger(__name__)
|
14
12
|
|
15
13
|
|
16
14
|
class SKlearnModel(VMModel):
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
"""
|
25
|
-
|
26
|
-
def __init__(
|
27
|
-
self,
|
28
|
-
model: object = None, # Trained model instance
|
29
|
-
input_id: str = None,
|
30
|
-
attributes: ModelAttributes = None,
|
31
|
-
):
|
32
|
-
super().__init__(
|
33
|
-
model=model,
|
34
|
-
input_id=input_id,
|
35
|
-
attributes=attributes,
|
36
|
-
)
|
15
|
+
def __post_init__(self):
|
16
|
+
if not self.model:
|
17
|
+
raise ValueError("Model object is a required argument for SKlearnModel")
|
18
|
+
|
19
|
+
self.library = self.model.__class__.__module__.split(".")[0]
|
20
|
+
self.class_ = self.model.__class__.__name__
|
21
|
+
self.name = self.name or type(self.model).__name__
|
37
22
|
|
38
23
|
def predict_proba(self, *args, **kwargs):
|
39
24
|
"""
|
@@ -54,20 +39,36 @@ class SKlearnModel(VMModel):
|
|
54
39
|
"""
|
55
40
|
return self.model.predict(*args, **kwargs)
|
56
41
|
|
57
|
-
def model_library(self):
|
58
|
-
"""
|
59
|
-
Returns the model library name
|
60
|
-
"""
|
61
|
-
return self.model.__class__.__module__.split(".")[0]
|
62
42
|
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
43
|
+
class CatBoostModel(SKlearnModel):
|
44
|
+
"""Wrapper for CatBoost model"""
|
45
|
+
|
46
|
+
pass
|
47
|
+
|
68
48
|
|
69
|
-
|
49
|
+
class XGBoostModel(SKlearnModel):
|
50
|
+
"""Wrapper for XGBoost model"""
|
51
|
+
|
52
|
+
def __post_init__(self):
|
53
|
+
super().__post_init__()
|
54
|
+
self.library = "xgboost"
|
55
|
+
|
56
|
+
|
57
|
+
class StatsModelsModel(SKlearnModel):
|
58
|
+
"""Wrapper for StatsModels model"""
|
59
|
+
|
60
|
+
def __post_init__(self):
|
61
|
+
super().__post_init__()
|
62
|
+
self.library = "statsmodels"
|
63
|
+
|
64
|
+
def regression_coefficients(self):
|
70
65
|
"""
|
71
|
-
Returns model
|
66
|
+
Returns the regression coefficients summary of the model
|
72
67
|
"""
|
73
|
-
|
68
|
+
raw_summary = self.model.summary()
|
69
|
+
|
70
|
+
table = raw_summary.tables[1].data
|
71
|
+
headers = table.pop(0)
|
72
|
+
headers[0] = "Feature"
|
73
|
+
|
74
|
+
return pd.DataFrame(table, columns=headers)
|