tarandm_analytics 1.0.0.dev1__tar.gz → 1.0.0.dev2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {tarandm_analytics-1.0.0.dev1 → tarandm_analytics-1.0.0.dev2}/PKG-INFO +1 -1
- {tarandm_analytics-1.0.0.dev1 → tarandm_analytics-1.0.0.dev2}/pyproject.toml +1 -1
- {tarandm_analytics-1.0.0.dev1 → tarandm_analytics-1.0.0.dev2}/tarandm_analytics/predictive_models/builder.py +78 -77
- {tarandm_analytics-1.0.0.dev1 → tarandm_analytics-1.0.0.dev2}/tarandm_analytics/predictive_models/expert_score.py +3 -3
- {tarandm_analytics-1.0.0.dev1 → tarandm_analytics-1.0.0.dev2}/tarandm_analytics/predictive_models/extended_predictive_model.py +103 -1
- {tarandm_analytics-1.0.0.dev1 → tarandm_analytics-1.0.0.dev2}/tarandm_analytics/predictive_models/extreme_gradient_boosting.py +2 -2
- {tarandm_analytics-1.0.0.dev1 → tarandm_analytics-1.0.0.dev2}/tarandm_analytics/predictive_models/logistic_regression.py +2 -2
- {tarandm_analytics-1.0.0.dev1 → tarandm_analytics-1.0.0.dev2}/tarandm_analytics/predictive_models/random_forest.py +2 -2
- {tarandm_analytics-1.0.0.dev1 → tarandm_analytics-1.0.0.dev2}/README.md +0 -0
- {tarandm_analytics-1.0.0.dev1 → tarandm_analytics-1.0.0.dev2}/tarandm_analytics/__init__.py +0 -0
- {tarandm_analytics-1.0.0.dev1 → tarandm_analytics-1.0.0.dev2}/tarandm_analytics/attribute_evaluator/__init__.py +0 -0
- {tarandm_analytics-1.0.0.dev1 → tarandm_analytics-1.0.0.dev2}/tarandm_analytics/attribute_evaluator/evaluate_attributes.py +0 -0
- {tarandm_analytics-1.0.0.dev1 → tarandm_analytics-1.0.0.dev2}/tarandm_analytics/base_class.py +0 -0
- {tarandm_analytics-1.0.0.dev1 → tarandm_analytics-1.0.0.dev2}/tarandm_analytics/examples/tarandm_model_development.ipynb +0 -0
- {tarandm_analytics-1.0.0.dev1 → tarandm_analytics-1.0.0.dev2}/tarandm_analytics/export_predictive_model/__init__.py +0 -0
- {tarandm_analytics-1.0.0.dev1 → tarandm_analytics-1.0.0.dev2}/tarandm_analytics/export_predictive_model/model_visualization.py +0 -0
- {tarandm_analytics-1.0.0.dev1 → tarandm_analytics-1.0.0.dev2}/tarandm_analytics/export_predictive_model/upload_model_to_gitlab.py +0 -0
- {tarandm_analytics-1.0.0.dev1 → tarandm_analytics-1.0.0.dev2}/tarandm_analytics/plots/__init__.py +0 -0
- {tarandm_analytics-1.0.0.dev1 → tarandm_analytics-1.0.0.dev2}/tarandm_analytics/plots/plot_functions.py +0 -0
- {tarandm_analytics-1.0.0.dev1 → tarandm_analytics-1.0.0.dev2}/tarandm_analytics/predictive_models/__init__.py +0 -0
- {tarandm_analytics-1.0.0.dev1 → tarandm_analytics-1.0.0.dev2}/tarandm_analytics/predictive_models/abstract_predictive_model.py +0 -0
- {tarandm_analytics-1.0.0.dev1 → tarandm_analytics-1.0.0.dev2}/tarandm_analytics/predictive_models/documentation.py +0 -0
- {tarandm_analytics-1.0.0.dev1 → tarandm_analytics-1.0.0.dev2}/tarandm_analytics/predictive_models/pmml_model.py +0 -0
- {tarandm_analytics-1.0.0.dev1 → tarandm_analytics-1.0.0.dev2}/tarandm_analytics/utils/__init__.py +0 -0
- {tarandm_analytics-1.0.0.dev1 → tarandm_analytics-1.0.0.dev2}/tarandm_analytics/utils/formatting.py +0 -0
|
@@ -20,10 +20,7 @@ from tarandm_analytics_utils.predictive_models.attribute_preprocessing.attribute
|
|
|
20
20
|
from tarandm_analytics_utils.predictive_models.attribute_preprocessing.attribute_transformation import (
|
|
21
21
|
AttributeTransformation,
|
|
22
22
|
)
|
|
23
|
-
from tarandm_analytics_utils.predictive_models.extended_predictive_model import
|
|
24
|
-
ExtendedPredictiveModel,
|
|
25
|
-
PredictiveModelType,
|
|
26
|
-
)
|
|
23
|
+
from tarandm_analytics_utils.predictive_models.extended_predictive_model import PredictiveModelType
|
|
27
24
|
from tarandm_analytics_utils.predictive_models.model_description.model_description import (
|
|
28
25
|
AttachedImage,
|
|
29
26
|
PredictiveModelDescription,
|
|
@@ -53,6 +50,7 @@ if TYPE_CHECKING:
|
|
|
53
50
|
import pandas as pd
|
|
54
51
|
import polars as pl
|
|
55
52
|
import numpy as np
|
|
53
|
+
from tarandm_analytics.predictive_models.extended_predictive_model import ExtendedPredictiveModel
|
|
56
54
|
else:
|
|
57
55
|
RandomForestClassifier = Any
|
|
58
56
|
LogisticRegression = Any
|
|
@@ -135,12 +133,7 @@ class PredictiveModelBuilder:
|
|
|
135
133
|
model=model,
|
|
136
134
|
)
|
|
137
135
|
|
|
138
|
-
# 2.
|
|
139
|
-
serialized_model = self._get_dumped_model(model, model_type_final)
|
|
140
|
-
if model_type_final == PredictiveModelType.PMML and "feature_names" not in serialized_model:
|
|
141
|
-
serialized_model["feature_names"] = attributes
|
|
142
|
-
|
|
143
|
-
# 3. Get descriptive data about data samples used in model development
|
|
136
|
+
# 2. Get descriptive data about data samples used in model development
|
|
144
137
|
sample_description_data = self._get_data_sample_description(
|
|
145
138
|
data=data,
|
|
146
139
|
column_name_label=label_name,
|
|
@@ -148,7 +141,7 @@ class PredictiveModelBuilder:
|
|
|
148
141
|
column_name_date=column_name_date,
|
|
149
142
|
)
|
|
150
143
|
|
|
151
|
-
#
|
|
144
|
+
# 3. Get model performance over different samples
|
|
152
145
|
model_performance = self._get_predictive_model_performance(
|
|
153
146
|
data=data,
|
|
154
147
|
column_name_sample=column_name_sample,
|
|
@@ -156,7 +149,7 @@ class PredictiveModelBuilder:
|
|
|
156
149
|
evaluate_performance=evaluate_performance,
|
|
157
150
|
)
|
|
158
151
|
|
|
159
|
-
#
|
|
152
|
+
# 4. Generate images
|
|
160
153
|
images = self._generate_images(
|
|
161
154
|
data=data,
|
|
162
155
|
model=model,
|
|
@@ -165,7 +158,7 @@ class PredictiveModelBuilder:
|
|
|
165
158
|
learning_curves_data=learning_curves_data,
|
|
166
159
|
)
|
|
167
160
|
|
|
168
|
-
#
|
|
161
|
+
# 5. Prepare request data
|
|
169
162
|
return RequestData(
|
|
170
163
|
model_type=model_type_final,
|
|
171
164
|
target_class=target_class,
|
|
@@ -481,7 +474,9 @@ class PredictiveModelBuilder:
|
|
|
481
474
|
|
|
482
475
|
return binning
|
|
483
476
|
|
|
484
|
-
def create_model_from_data_frame(self, df: "pl.DataFrame") -> ExtendedPredictiveModel:
|
|
477
|
+
def create_model_from_data_frame(self, df: "pl.DataFrame") -> "ExtendedPredictiveModel":
|
|
478
|
+
from tarandm_analytics.predictive_models.extended_predictive_model import ExtendedPredictiveModel
|
|
479
|
+
|
|
485
480
|
mandatory_columns = ["attribute", "bin_from", "bin_to", "categories", "value"]
|
|
486
481
|
if any([p not in df.columns for p in mandatory_columns]):
|
|
487
482
|
raise TypeError(f"Expert score csv missing one of the following columns: {mandatory_columns}.")
|
|
@@ -622,7 +617,9 @@ class PredictiveModelBuilder:
|
|
|
622
617
|
|
|
623
618
|
return extended_predictive_model
|
|
624
619
|
|
|
625
|
-
def create_model_from_csv(
|
|
620
|
+
def create_model_from_csv(
|
|
621
|
+
self, filename: Union[io.StringIO, str], delimiter: str = ","
|
|
622
|
+
) -> "ExtendedPredictiveModel":
|
|
626
623
|
"""Expert score model can be defined by csv file. Function 'create_model_from_csv' loads expert score model from csv
|
|
627
624
|
and create internal representation of the model compatible with TaranDM.
|
|
628
625
|
|
|
@@ -652,33 +649,43 @@ class PredictiveModelBuilder:
|
|
|
652
649
|
|
|
653
650
|
def _build_predictive_model(
|
|
654
651
|
self, predictive_model: ModelType, request_data: RequestData
|
|
655
|
-
) -> ExtendedPredictiveModel:
|
|
656
|
-
|
|
657
|
-
model=predictive_model, model_type=request_data.model_type, attributes=request_data.predictors
|
|
658
|
-
)
|
|
652
|
+
) -> "ExtendedPredictiveModel":
|
|
653
|
+
from tarandm_analytics.predictive_models.extended_predictive_model import ExtendedPredictiveModel
|
|
659
654
|
|
|
660
|
-
|
|
661
|
-
|
|
662
|
-
|
|
663
|
-
|
|
664
|
-
|
|
665
|
-
|
|
666
|
-
|
|
667
|
-
|
|
668
|
-
|
|
669
|
-
|
|
670
|
-
|
|
671
|
-
|
|
672
|
-
)
|
|
655
|
+
if request_data.model_type == PredictiveModelType.EXPERT_SCORE:
|
|
656
|
+
extended_model = self.create_model_from_data_frame(df=cast("pl.DataFrame", predictive_model))
|
|
657
|
+
extended_model.target = request_data.label_name
|
|
658
|
+
extended_model.target_class = request_data.target_class
|
|
659
|
+
extended_model.description = request_data.description
|
|
660
|
+
extended_model.performance = request_data.model_performance
|
|
661
|
+
else:
|
|
662
|
+
serialized_model = self._get_dumped_model(
|
|
663
|
+
model=predictive_model, model_type=request_data.model_type, attributes=request_data.predictors
|
|
664
|
+
)
|
|
665
|
+
if request_data.model_type == PredictiveModelType.PMML and "feature_names" not in serialized_model:
|
|
666
|
+
serialized_model["feature_names"] = request_data.predictors
|
|
673
667
|
|
|
674
|
-
|
|
675
|
-
|
|
676
|
-
|
|
677
|
-
|
|
678
|
-
|
|
679
|
-
|
|
668
|
+
# 1. basic validation of provided data
|
|
669
|
+
if request_data.model_type == PredictiveModelType.RANDOM_FOREST and request_data.target_class is None:
|
|
670
|
+
try:
|
|
671
|
+
request_data.target_class = serialized_model["random_forest_model"]["classes_"][-1]
|
|
672
|
+
logger.warning(
|
|
673
|
+
f"Parameter 'target_class' was not provided for RandomForest model. Value was automatically set "
|
|
674
|
+
f"to '{request_data.target_class}'"
|
|
675
|
+
)
|
|
676
|
+
except Exception as e:
|
|
677
|
+
raise ValueError(
|
|
678
|
+
f"Parameter 'target_class' was not provided for random forest model and could not be "
|
|
679
|
+
f"auto-detected (Error in auto-detection: {e})."
|
|
680
|
+
)
|
|
681
|
+
|
|
682
|
+
# 2. Prepare attribute preprocessing data
|
|
683
|
+
# for validating if the binning is defined for valid attribute, we first need to get available attributes
|
|
684
|
+
# (original attributes + attributes created in transformations)
|
|
685
|
+
attribute_binning_preprocessed = self._prepare_attribute_binning(
|
|
686
|
+
attribute_binning=request_data.attribute_binning
|
|
687
|
+
)
|
|
680
688
|
|
|
681
|
-
if request_data.model_type != PredictiveModelType.EXPERT_SCORE:
|
|
682
689
|
# Detect attributes if not provided
|
|
683
690
|
if request_data.predictors is None or len(request_data.predictors) == 0:
|
|
684
691
|
request_data.predictors = self.automated_attribute_detection(
|
|
@@ -709,36 +716,28 @@ class PredictiveModelBuilder:
|
|
|
709
716
|
"dummy_encoding": request_data.dummy_encoding or [],
|
|
710
717
|
}
|
|
711
718
|
)
|
|
719
|
+
if request_data.model_type == PredictiveModelType.PMML:
|
|
720
|
+
extended_model_dict = {
|
|
721
|
+
"external_model": serialized_model,
|
|
722
|
+
"attributes": serialized_model["feature_names"],
|
|
723
|
+
"predictive_model_type": request_data.model_type,
|
|
724
|
+
"target": request_data.label_name,
|
|
725
|
+
}
|
|
712
726
|
|
|
713
|
-
|
|
714
|
-
|
|
715
|
-
|
|
716
|
-
|
|
717
|
-
|
|
718
|
-
|
|
719
|
-
|
|
720
|
-
|
|
721
|
-
|
|
722
|
-
|
|
723
|
-
|
|
724
|
-
|
|
725
|
-
|
|
726
|
-
|
|
727
|
-
|
|
728
|
-
extended_model = ExtendedPredictiveModel.model_validate(extended_model_dict)
|
|
729
|
-
else:
|
|
730
|
-
extended_model_dict = {
|
|
731
|
-
"external_model": serialized_model,
|
|
732
|
-
"predictive_model_type": request_data.model_type,
|
|
733
|
-
"attributes": request_data.predictors,
|
|
734
|
-
"target": request_data.label_name,
|
|
735
|
-
"target_class": request_data.target_class,
|
|
736
|
-
"attribute_preprocessing": attribute_preprocessing,
|
|
737
|
-
"description": request_data.description,
|
|
738
|
-
"performance": request_data.model_performance,
|
|
739
|
-
"monitoring": request_data.monitoring_data,
|
|
740
|
-
}
|
|
741
|
-
extended_model = ExtendedPredictiveModel.model_validate(extended_model_dict)
|
|
727
|
+
extended_model = ExtendedPredictiveModel.model_validate(extended_model_dict)
|
|
728
|
+
else:
|
|
729
|
+
extended_model_dict = {
|
|
730
|
+
"external_model": serialized_model,
|
|
731
|
+
"predictive_model_type": request_data.model_type,
|
|
732
|
+
"attributes": request_data.predictors,
|
|
733
|
+
"target": request_data.label_name,
|
|
734
|
+
"target_class": request_data.target_class,
|
|
735
|
+
"attribute_preprocessing": attribute_preprocessing,
|
|
736
|
+
"description": request_data.description,
|
|
737
|
+
"performance": request_data.model_performance,
|
|
738
|
+
"monitoring": request_data.monitoring_data,
|
|
739
|
+
}
|
|
740
|
+
extended_model = ExtendedPredictiveModel.model_validate(extended_model_dict)
|
|
742
741
|
return extended_model
|
|
743
742
|
|
|
744
743
|
def build(
|
|
@@ -767,7 +766,7 @@ class PredictiveModelBuilder:
|
|
|
767
766
|
evaluate_performance: Optional[Dict[str, Union[str, List[str]]]] = None,
|
|
768
767
|
learning_curves_data: Optional[Dict] = None,
|
|
769
768
|
created_date: Optional[datetime.date] = None,
|
|
770
|
-
) -> ExtendedPredictiveModel:
|
|
769
|
+
) -> "ExtendedPredictiveModel":
|
|
771
770
|
"""
|
|
772
771
|
Function prepares input data for build model zip file, that is ready to be implemented in TaranDM software.
|
|
773
772
|
Created input data will be sent to the TaranDM endpoint, through which final model zip file is returned.
|
|
@@ -1152,13 +1151,13 @@ class PredictiveModelBuilder:
|
|
|
1152
1151
|
model_attributes = list(model.feature_names_in_)
|
|
1153
1152
|
else:
|
|
1154
1153
|
raise ValueError(
|
|
1155
|
-
"Model
|
|
1156
|
-
"was recognized as scikit-learn estimator. Tried to collect model
|
|
1154
|
+
"Model attribute names were not provided and could not be detected automatically. Model "
|
|
1155
|
+
"was recognized as scikit-learn estimator. Tried to collect model attribute names from "
|
|
1157
1156
|
"property 'feature_names_in_'. This property is available in scikit-learn since version "
|
|
1158
1157
|
"0.24."
|
|
1159
1158
|
)
|
|
1160
1159
|
else:
|
|
1161
|
-
raise ValueError("Model
|
|
1160
|
+
raise ValueError("Model attribute names were not provided and could not be detected automatically.")
|
|
1162
1161
|
|
|
1163
1162
|
# We detected feature that enters the model. First, we detect features as they were before dummy encoding
|
|
1164
1163
|
names_encoded_to_orig = {}
|
|
@@ -1176,7 +1175,7 @@ class PredictiveModelBuilder:
|
|
|
1176
1175
|
if binned_attribute_name is not None and binned_attribute_name != binning.attribute:
|
|
1177
1176
|
if binned_attribute_name not in model_attributes:
|
|
1178
1177
|
raise ValueError(
|
|
1179
|
-
"Model
|
|
1178
|
+
"Model attribute names were not provided and could not be detected automatically."
|
|
1180
1179
|
)
|
|
1181
1180
|
model_attributes = [a for a in model_attributes if a != binned_attribute_name]
|
|
1182
1181
|
if binning.attribute not in model_attributes:
|
|
@@ -1192,7 +1191,7 @@ class PredictiveModelBuilder:
|
|
|
1192
1191
|
if transformed_attribute_name is not None and transformed_attribute_name != transformation.attribute:
|
|
1193
1192
|
if transformed_attribute_name not in model_attributes:
|
|
1194
1193
|
raise ValueError(
|
|
1195
|
-
"Model
|
|
1194
|
+
"Model attribute names were not provided and could not be detected automatically."
|
|
1196
1195
|
)
|
|
1197
1196
|
model_attributes = [a for a in model_attributes if a != transformed_attribute_name]
|
|
1198
1197
|
if transformation.attribute not in model_attributes:
|
|
@@ -1995,7 +1994,7 @@ class PredictiveModelBuilder:
|
|
|
1995
1994
|
if len(categories_str) > 30:
|
|
1996
1995
|
categories_str = categories_str[0:27] + "..."
|
|
1997
1996
|
logger.info(
|
|
1998
|
-
f"Target rate for
|
|
1997
|
+
f"Target rate for attribute {col_attribute} and group of categories {{{categories_str}}} is zero."
|
|
1999
1998
|
)
|
|
2000
1999
|
return bin_frequency / total_count, 0.0
|
|
2001
2000
|
|
|
@@ -2033,7 +2032,7 @@ class PredictiveModelBuilder:
|
|
|
2033
2032
|
encoded_feature_name = single_dummy.encoded_feature_name
|
|
2034
2033
|
if encoded_feature_name not in model_attrs:
|
|
2035
2034
|
logger.warning(
|
|
2036
|
-
f"Dummy encoding for
|
|
2035
|
+
f"Dummy encoding for attribute {attr} defines feature {encoded_feature_name}. This "
|
|
2037
2036
|
f"feature is not used in model. Please check dummy encoding for typos."
|
|
2038
2037
|
)
|
|
2039
2038
|
else:
|
|
@@ -2084,7 +2083,7 @@ class PredictiveModelBuilder:
|
|
|
2084
2083
|
elif set(attributes) != set(orig_attrs):
|
|
2085
2084
|
logger.warning(
|
|
2086
2085
|
f"Expected original features (features before transformation and encodings) are different "
|
|
2087
|
-
f"from
|
|
2086
|
+
f"from attributes provided in 'attributes' parameter. Expected: {orig_attrs}; Provided: "
|
|
2088
2087
|
f"{attributes}. Expected original features will be used in exported model. Please check "
|
|
2089
2088
|
f"that this is a correct behavior."
|
|
2090
2089
|
)
|
|
@@ -2169,6 +2168,8 @@ class PredictiveModelBuilder:
|
|
|
2169
2168
|
return unique_attributes
|
|
2170
2169
|
|
|
2171
2170
|
def expert_score_model_dump(self, model: "pl.DataFrame") -> Dict[str, Any]:
|
|
2171
|
+
import polars as pl
|
|
2172
|
+
|
|
2172
2173
|
unique_attributes = self._get_unique_values_from_df_col(model, "attribute")
|
|
2173
2174
|
feature_names = self._get_unique_values_from_df_col(model.filter(pl.col("is_intercept") == 0), "attribute")
|
|
2174
2175
|
|
|
@@ -6,7 +6,7 @@ from tarandm_analytics.predictive_models.abstract_predictive_model import Abstra
|
|
|
6
6
|
|
|
7
7
|
|
|
8
8
|
class ModelExpertScore(AbstractPredictiveModel):
|
|
9
|
-
"""Internal representation of expertly defined score. Expert score defines binning of
|
|
9
|
+
"""Internal representation of expertly defined score. Expert score defines binning of attributes and assigns a value
|
|
10
10
|
to every bin. Prediction is simple sum of assigned values.
|
|
11
11
|
"""
|
|
12
12
|
|
|
@@ -48,13 +48,13 @@ class ModelExpertScore(AbstractPredictiveModel):
|
|
|
48
48
|
values = attribute_values.get(feature, None)
|
|
49
49
|
if not isinstance(values, list):
|
|
50
50
|
raise TypeError(
|
|
51
|
-
f"'predict_batch' method expect values of each
|
|
51
|
+
f"'predict_batch' method expect values of each attribute provided in list. Values for "
|
|
52
52
|
f"{feature} was provided as {type(values)}."
|
|
53
53
|
)
|
|
54
54
|
elif not all(isinstance(val, (int, float)) for val in values):
|
|
55
55
|
raise TypeError("Some of values provided to predict_batch method are not numerical.")
|
|
56
56
|
elif n_obs != len(values):
|
|
57
|
-
raise ValueError("Number of values provided to predict_batch is inconsistent across
|
|
57
|
+
raise ValueError("Number of values provided to predict_batch is inconsistent across attributes.")
|
|
58
58
|
data_for_predict.append(values)
|
|
59
59
|
|
|
60
60
|
intercept = self.intercept or 0.0
|
|
@@ -2,8 +2,9 @@
|
|
|
2
2
|
# duplication or any other usage without previous written agreement of Taran Advisory is
|
|
3
3
|
# prohibited.
|
|
4
4
|
|
|
5
|
+
from enum import Enum
|
|
5
6
|
from io import StringIO
|
|
6
|
-
from typing import Any, Dict, List, Optional, Union, cast
|
|
7
|
+
from typing import Any, Dict, List, Optional, Tuple, Union, cast
|
|
7
8
|
|
|
8
9
|
from pydantic import Field
|
|
9
10
|
|
|
@@ -17,6 +18,10 @@ from tarandm_analytics_utils.predictive_models.extended_predictive_model import
|
|
|
17
18
|
PredictiveModelType,
|
|
18
19
|
)
|
|
19
20
|
from tarandm_analytics_utils.utils.dump import safe_dumps_json
|
|
21
|
+
from tarandm_analytics_utils.predictive_models.attribute_preprocessing.attribute_binning import AttributeDataType
|
|
22
|
+
from tarandm_analytics_utils.predictive_models.attribute_preprocessing.attribute_transformation import (
|
|
23
|
+
AttributeTransformation,
|
|
24
|
+
)
|
|
20
25
|
|
|
21
26
|
|
|
22
27
|
class ExtendedPredictiveModel(AbstractExtendedPredictiveModel):
|
|
@@ -133,3 +138,100 @@ class ExtendedPredictiveModel(AbstractExtendedPredictiveModel):
|
|
|
133
138
|
}
|
|
134
139
|
|
|
135
140
|
return {}
|
|
141
|
+
|
|
142
|
+
def convert_attribute(self, attribute_value: Any) -> Any:
|
|
143
|
+
if isinstance(attribute_value, Enum):
|
|
144
|
+
return attribute_value.value
|
|
145
|
+
return attribute_value
|
|
146
|
+
|
|
147
|
+
def apply_attribute_transformation(
|
|
148
|
+
self, transformation: AttributeTransformation, attribute_value: Union[int, float]
|
|
149
|
+
) -> Optional[Union[int, float]]:
|
|
150
|
+
raise NotImplementedError() # Implemented only in core because of QueryEvaluator
|
|
151
|
+
|
|
152
|
+
def prepare_all_preprocessed_attributes(self, attribute_values: Dict[str, Any]) -> Dict[str, Any]: # noqa: C901
|
|
153
|
+
"""
|
|
154
|
+
Method to preprocess attributes - convert from Money and apply binning/transformations if needed
|
|
155
|
+
"""
|
|
156
|
+
attribute_values_preprocessed = {}
|
|
157
|
+
|
|
158
|
+
# convert money to float
|
|
159
|
+
for attribute in attribute_values:
|
|
160
|
+
attribute_values_preprocessed[attribute] = self.convert_attribute(attribute_values[attribute])
|
|
161
|
+
|
|
162
|
+
# if boolean values convert to either "true"/"false" (CATEGORICAL) or 1/0 (NUMERICAL)
|
|
163
|
+
if isinstance(attribute_values_preprocessed[attribute], bool):
|
|
164
|
+
binning_found = False
|
|
165
|
+
for binning in self.attribute_preprocessing.binning:
|
|
166
|
+
if binning.attribute == attribute:
|
|
167
|
+
if binning.attribute_data_type == AttributeDataType.CATEGORICAL:
|
|
168
|
+
attribute_values_preprocessed[attribute] = str(attribute_values_preprocessed[attribute])
|
|
169
|
+
binning_found = True
|
|
170
|
+
else:
|
|
171
|
+
attribute_values_preprocessed[attribute] = int(attribute_values_preprocessed[attribute])
|
|
172
|
+
binning_found = True
|
|
173
|
+
|
|
174
|
+
if not binning_found:
|
|
175
|
+
attribute_values_preprocessed[attribute] = int(attribute_values_preprocessed[attribute])
|
|
176
|
+
|
|
177
|
+
# apply transformations
|
|
178
|
+
if self.attribute_preprocessing is not None and self.attribute_preprocessing.transformations is not None:
|
|
179
|
+
for transformation in self.attribute_preprocessing.transformations:
|
|
180
|
+
if transformation.attribute in attribute_values_preprocessed.keys():
|
|
181
|
+
attribute_values_preprocessed[
|
|
182
|
+
transformation.transformed_attribute_name or transformation.attribute
|
|
183
|
+
] = self.apply_attribute_transformation(
|
|
184
|
+
transformation=transformation,
|
|
185
|
+
attribute_value=attribute_values_preprocessed[transformation.attribute],
|
|
186
|
+
)
|
|
187
|
+
|
|
188
|
+
# apply binning
|
|
189
|
+
if self.attribute_preprocessing is not None and self.attribute_preprocessing.binning is not None:
|
|
190
|
+
for binning in self.attribute_preprocessing.binning:
|
|
191
|
+
if binning.attribute in attribute_values_preprocessed.keys():
|
|
192
|
+
attribute_values_preprocessed[binning.binned_attribute_name or binning.attribute] = (
|
|
193
|
+
self.attribute_preprocessing.apply_attribute_binning(
|
|
194
|
+
attribute=binning.attribute,
|
|
195
|
+
attribute_value=attribute_values_preprocessed[binning.attribute],
|
|
196
|
+
)
|
|
197
|
+
)
|
|
198
|
+
|
|
199
|
+
# apply dummy encoding
|
|
200
|
+
if self.attribute_preprocessing is not None:
|
|
201
|
+
for dummy_encoding in self.attribute_preprocessing.dummy_encoding:
|
|
202
|
+
attribute_value = attribute_values_preprocessed[dummy_encoding.attribute]
|
|
203
|
+
encoding = self.attribute_preprocessing.apply_dummy_encoding(
|
|
204
|
+
attribute=dummy_encoding.attribute, attribute_value=attribute_value
|
|
205
|
+
)
|
|
206
|
+
if encoding is not None:
|
|
207
|
+
attribute_values_preprocessed.update(encoding)
|
|
208
|
+
|
|
209
|
+
return attribute_values_preprocessed
|
|
210
|
+
|
|
211
|
+
def filter_final_model_attributes(self, attribute_values_preprocessed: Dict[str, Any]) -> Dict[str, Any]:
|
|
212
|
+
# return only attributes that enter final model
|
|
213
|
+
model_attribute_values_preprocessed = {}
|
|
214
|
+
for attr in self.external_model.feature_names:
|
|
215
|
+
model_attribute_values_preprocessed[attr] = attribute_values_preprocessed[attr]
|
|
216
|
+
return model_attribute_values_preprocessed
|
|
217
|
+
|
|
218
|
+
def apply_preprocessing(self, attribute_values: Dict[str, Any]) -> Dict[str, Any]: # noqa: C901
|
|
219
|
+
all_preprocessed_attributes = self.prepare_all_preprocessed_attributes(attribute_values=attribute_values)
|
|
220
|
+
return self.filter_final_model_attributes(attribute_values_preprocessed=all_preprocessed_attributes)
|
|
221
|
+
|
|
222
|
+
def predict(self, attribute_values: Dict[str, Any]) -> Tuple[Optional[float], Dict[str, Any]]:
|
|
223
|
+
"""
|
|
224
|
+
Method predict is responsible for computing prediction of models.
|
|
225
|
+
|
|
226
|
+
:param attribute_values: Dictionary {attribute: its value}.
|
|
227
|
+
:return: Prediction of the models.
|
|
228
|
+
"""
|
|
229
|
+
attribute_values_preprocessed = self.apply_preprocessing(attribute_values=attribute_values)
|
|
230
|
+
|
|
231
|
+
if self.external_model is not None:
|
|
232
|
+
return (
|
|
233
|
+
self.external_model.predict(attribute_values=attribute_values_preprocessed),
|
|
234
|
+
attribute_values_preprocessed,
|
|
235
|
+
)
|
|
236
|
+
|
|
237
|
+
raise Exception("Model is not defined")
|
|
@@ -89,14 +89,14 @@ class ModelXGBoost(AbstractPredictiveModel):
|
|
|
89
89
|
values = attribute_values.get(feature, None)
|
|
90
90
|
if not isinstance(values, list):
|
|
91
91
|
raise TypeError(
|
|
92
|
-
f"'predict_batch' method expect values of each
|
|
92
|
+
f"'predict_batch' method expect values of each attribute provided in list. Values for "
|
|
93
93
|
f"{feature} was provided as {type(values)}."
|
|
94
94
|
)
|
|
95
95
|
elif not all(val is None or isinstance(val, (int, float)) for val in values):
|
|
96
96
|
raise TypeError("Some of values provided to predict_batch method are not numerical or None.")
|
|
97
97
|
elif n_obs is not None and n_obs != len(values):
|
|
98
98
|
raise ValueError(
|
|
99
|
-
"Values provided to predict_batch method do not have the same size for all
|
|
99
|
+
"Values provided to predict_batch method do not have the same size for all attributes."
|
|
100
100
|
)
|
|
101
101
|
data_for_predict.append(values)
|
|
102
102
|
|
|
@@ -94,14 +94,14 @@ class ModelLogisticRegression(AbstractPredictiveModel):
|
|
|
94
94
|
values = attribute_values.get(feature, None)
|
|
95
95
|
if not isinstance(values, list):
|
|
96
96
|
raise TypeError(
|
|
97
|
-
f"'predict_batch' method expect values of each
|
|
97
|
+
f"'predict_batch' method expect values of each attribute provided in list. Values for "
|
|
98
98
|
f"{feature} was provided as {type(values)}."
|
|
99
99
|
)
|
|
100
100
|
elif not all(isinstance(val, (int, float)) for val in values):
|
|
101
101
|
raise TypeError("Some of values provided to predict_batch method are not numerical.")
|
|
102
102
|
elif n_obs is not None and n_obs != len(values):
|
|
103
103
|
raise ValueError(
|
|
104
|
-
"Values provided to predict_batch method do not have the same size for all
|
|
104
|
+
"Values provided to predict_batch method do not have the same size for all attributes."
|
|
105
105
|
)
|
|
106
106
|
data_for_predict.append(values)
|
|
107
107
|
|
|
@@ -76,14 +76,14 @@ class ModelRandomForest(AbstractPredictiveModel):
|
|
|
76
76
|
values = attribute_values.get(feature, None)
|
|
77
77
|
if not isinstance(values, list):
|
|
78
78
|
raise TypeError(
|
|
79
|
-
f"'predict_batch' method expect values of each
|
|
79
|
+
f"'predict_batch' method expect values of each attribute provided in list. Values for "
|
|
80
80
|
f"{feature} was provided as {type(values)}."
|
|
81
81
|
)
|
|
82
82
|
elif not all(isinstance(val, (int, float)) for val in values):
|
|
83
83
|
raise TypeError("Some of values provided to predict_batch method are not numerical.")
|
|
84
84
|
elif n_obs is not None and n_obs != len(values):
|
|
85
85
|
raise ValueError(
|
|
86
|
-
"Values provided to predict_batch method do not have the same size for all
|
|
86
|
+
"Values provided to predict_batch method do not have the same size for all attributes."
|
|
87
87
|
)
|
|
88
88
|
data_for_predict.append(values)
|
|
89
89
|
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{tarandm_analytics-1.0.0.dev1 → tarandm_analytics-1.0.0.dev2}/tarandm_analytics/base_class.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{tarandm_analytics-1.0.0.dev1 → tarandm_analytics-1.0.0.dev2}/tarandm_analytics/plots/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{tarandm_analytics-1.0.0.dev1 → tarandm_analytics-1.0.0.dev2}/tarandm_analytics/utils/__init__.py
RENAMED
|
File without changes
|
{tarandm_analytics-1.0.0.dev1 → tarandm_analytics-1.0.0.dev2}/tarandm_analytics/utils/formatting.py
RENAMED
|
File without changes
|