tarandm_analytics 1.0.0.dev1__tar.gz → 1.0.0.dev2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (25) hide show
  1. {tarandm_analytics-1.0.0.dev1 → tarandm_analytics-1.0.0.dev2}/PKG-INFO +1 -1
  2. {tarandm_analytics-1.0.0.dev1 → tarandm_analytics-1.0.0.dev2}/pyproject.toml +1 -1
  3. {tarandm_analytics-1.0.0.dev1 → tarandm_analytics-1.0.0.dev2}/tarandm_analytics/predictive_models/builder.py +78 -77
  4. {tarandm_analytics-1.0.0.dev1 → tarandm_analytics-1.0.0.dev2}/tarandm_analytics/predictive_models/expert_score.py +3 -3
  5. {tarandm_analytics-1.0.0.dev1 → tarandm_analytics-1.0.0.dev2}/tarandm_analytics/predictive_models/extended_predictive_model.py +103 -1
  6. {tarandm_analytics-1.0.0.dev1 → tarandm_analytics-1.0.0.dev2}/tarandm_analytics/predictive_models/extreme_gradient_boosting.py +2 -2
  7. {tarandm_analytics-1.0.0.dev1 → tarandm_analytics-1.0.0.dev2}/tarandm_analytics/predictive_models/logistic_regression.py +2 -2
  8. {tarandm_analytics-1.0.0.dev1 → tarandm_analytics-1.0.0.dev2}/tarandm_analytics/predictive_models/random_forest.py +2 -2
  9. {tarandm_analytics-1.0.0.dev1 → tarandm_analytics-1.0.0.dev2}/README.md +0 -0
  10. {tarandm_analytics-1.0.0.dev1 → tarandm_analytics-1.0.0.dev2}/tarandm_analytics/__init__.py +0 -0
  11. {tarandm_analytics-1.0.0.dev1 → tarandm_analytics-1.0.0.dev2}/tarandm_analytics/attribute_evaluator/__init__.py +0 -0
  12. {tarandm_analytics-1.0.0.dev1 → tarandm_analytics-1.0.0.dev2}/tarandm_analytics/attribute_evaluator/evaluate_attributes.py +0 -0
  13. {tarandm_analytics-1.0.0.dev1 → tarandm_analytics-1.0.0.dev2}/tarandm_analytics/base_class.py +0 -0
  14. {tarandm_analytics-1.0.0.dev1 → tarandm_analytics-1.0.0.dev2}/tarandm_analytics/examples/tarandm_model_development.ipynb +0 -0
  15. {tarandm_analytics-1.0.0.dev1 → tarandm_analytics-1.0.0.dev2}/tarandm_analytics/export_predictive_model/__init__.py +0 -0
  16. {tarandm_analytics-1.0.0.dev1 → tarandm_analytics-1.0.0.dev2}/tarandm_analytics/export_predictive_model/model_visualization.py +0 -0
  17. {tarandm_analytics-1.0.0.dev1 → tarandm_analytics-1.0.0.dev2}/tarandm_analytics/export_predictive_model/upload_model_to_gitlab.py +0 -0
  18. {tarandm_analytics-1.0.0.dev1 → tarandm_analytics-1.0.0.dev2}/tarandm_analytics/plots/__init__.py +0 -0
  19. {tarandm_analytics-1.0.0.dev1 → tarandm_analytics-1.0.0.dev2}/tarandm_analytics/plots/plot_functions.py +0 -0
  20. {tarandm_analytics-1.0.0.dev1 → tarandm_analytics-1.0.0.dev2}/tarandm_analytics/predictive_models/__init__.py +0 -0
  21. {tarandm_analytics-1.0.0.dev1 → tarandm_analytics-1.0.0.dev2}/tarandm_analytics/predictive_models/abstract_predictive_model.py +0 -0
  22. {tarandm_analytics-1.0.0.dev1 → tarandm_analytics-1.0.0.dev2}/tarandm_analytics/predictive_models/documentation.py +0 -0
  23. {tarandm_analytics-1.0.0.dev1 → tarandm_analytics-1.0.0.dev2}/tarandm_analytics/predictive_models/pmml_model.py +0 -0
  24. {tarandm_analytics-1.0.0.dev1 → tarandm_analytics-1.0.0.dev2}/tarandm_analytics/utils/__init__.py +0 -0
  25. {tarandm_analytics-1.0.0.dev1 → tarandm_analytics-1.0.0.dev2}/tarandm_analytics/utils/formatting.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: tarandm_analytics
3
- Version: 1.0.0.dev1
3
+ Version: 1.0.0.dev2
4
4
  Summary: Package links analytics in Python with TaranDM software.
5
5
  Author: Marek Teller
6
6
  Author-email: mteller@taran.ai
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "tarandm_analytics"
3
- version = "1.0.0.dev1"
3
+ version = "1.0.0.dev2"
4
4
  description = "Package links analytics in Python with TaranDM software."
5
5
  authors = ["Marek Teller <mteller@taran.ai>"]
6
6
  readme = "README.md"
@@ -20,10 +20,7 @@ from tarandm_analytics_utils.predictive_models.attribute_preprocessing.attribute
20
20
  from tarandm_analytics_utils.predictive_models.attribute_preprocessing.attribute_transformation import (
21
21
  AttributeTransformation,
22
22
  )
23
- from tarandm_analytics_utils.predictive_models.extended_predictive_model import (
24
- ExtendedPredictiveModel,
25
- PredictiveModelType,
26
- )
23
+ from tarandm_analytics_utils.predictive_models.extended_predictive_model import PredictiveModelType
27
24
  from tarandm_analytics_utils.predictive_models.model_description.model_description import (
28
25
  AttachedImage,
29
26
  PredictiveModelDescription,
@@ -53,6 +50,7 @@ if TYPE_CHECKING:
53
50
  import pandas as pd
54
51
  import polars as pl
55
52
  import numpy as np
53
+ from tarandm_analytics.predictive_models.extended_predictive_model import ExtendedPredictiveModel
56
54
  else:
57
55
  RandomForestClassifier = Any
58
56
  LogisticRegression = Any
@@ -135,12 +133,7 @@ class PredictiveModelBuilder:
135
133
  model=model,
136
134
  )
137
135
 
138
- # 2. Serialize model
139
- serialized_model = self._get_dumped_model(model, model_type_final)
140
- if model_type_final == PredictiveModelType.PMML and "feature_names" not in serialized_model:
141
- serialized_model["feature_names"] = attributes
142
-
143
- # 3. Get descriptive data about data samples used in model development
136
+ # 2. Get descriptive data about data samples used in model development
144
137
  sample_description_data = self._get_data_sample_description(
145
138
  data=data,
146
139
  column_name_label=label_name,
@@ -148,7 +141,7 @@ class PredictiveModelBuilder:
148
141
  column_name_date=column_name_date,
149
142
  )
150
143
 
151
- # 4. Get model performance over different samples
144
+ # 3. Get model performance over different samples
152
145
  model_performance = self._get_predictive_model_performance(
153
146
  data=data,
154
147
  column_name_sample=column_name_sample,
@@ -156,7 +149,7 @@ class PredictiveModelBuilder:
156
149
  evaluate_performance=evaluate_performance,
157
150
  )
158
151
 
159
- # 5. Generate images
152
+ # 4. Generate images
160
153
  images = self._generate_images(
161
154
  data=data,
162
155
  model=model,
@@ -165,7 +158,7 @@ class PredictiveModelBuilder:
165
158
  learning_curves_data=learning_curves_data,
166
159
  )
167
160
 
168
- # 6. Prepare request data
161
+ # 5. Prepare request data
169
162
  return RequestData(
170
163
  model_type=model_type_final,
171
164
  target_class=target_class,
@@ -481,7 +474,9 @@ class PredictiveModelBuilder:
481
474
 
482
475
  return binning
483
476
 
484
- def create_model_from_data_frame(self, df: "pl.DataFrame") -> ExtendedPredictiveModel:
477
+ def create_model_from_data_frame(self, df: "pl.DataFrame") -> "ExtendedPredictiveModel":
478
+ from tarandm_analytics.predictive_models.extended_predictive_model import ExtendedPredictiveModel
479
+
485
480
  mandatory_columns = ["attribute", "bin_from", "bin_to", "categories", "value"]
486
481
  if any([p not in df.columns for p in mandatory_columns]):
487
482
  raise TypeError(f"Expert score csv missing one of the following columns: {mandatory_columns}.")
@@ -622,7 +617,9 @@ class PredictiveModelBuilder:
622
617
 
623
618
  return extended_predictive_model
624
619
 
625
- def create_model_from_csv(self, filename: Union[io.StringIO, str], delimiter: str = ",") -> ExtendedPredictiveModel:
620
+ def create_model_from_csv(
621
+ self, filename: Union[io.StringIO, str], delimiter: str = ","
622
+ ) -> "ExtendedPredictiveModel":
626
623
  """Expert score model can be defined by csv file. Function 'create_model_from_csv' loads expert score model from csv
627
624
  and create internal representation of the model compatible with TaranDM.
628
625
 
@@ -652,33 +649,43 @@ class PredictiveModelBuilder:
652
649
 
653
650
  def _build_predictive_model(
654
651
  self, predictive_model: ModelType, request_data: RequestData
655
- ) -> ExtendedPredictiveModel:
656
- serialized_model = self._get_dumped_model(
657
- model=predictive_model, model_type=request_data.model_type, attributes=request_data.predictors
658
- )
652
+ ) -> "ExtendedPredictiveModel":
653
+ from tarandm_analytics.predictive_models.extended_predictive_model import ExtendedPredictiveModel
659
654
 
660
- # 1. basic validation of provided data
661
- if request_data.target_class is None and request_data.model_type == PredictiveModelType.RANDOM_FOREST:
662
- try:
663
- request_data.target_class = serialized_model["random_forest_model"]["classes_"][-1]
664
- logger.warning(
665
- f"Parameter 'target_class' was not provided for RandomForest model. Value was automatically set "
666
- f"to '{request_data.target_class}'"
667
- )
668
- except Exception as e:
669
- raise ValueError(
670
- f"Parameter 'target_class' was not provided for random forest model and could not be "
671
- f"auto-detected (Error in auto-detection: {e})."
672
- )
655
+ if request_data.model_type == PredictiveModelType.EXPERT_SCORE:
656
+ extended_model = self.create_model_from_data_frame(df=cast("pl.DataFrame", predictive_model))
657
+ extended_model.target = request_data.label_name
658
+ extended_model.target_class = request_data.target_class
659
+ extended_model.description = request_data.description
660
+ extended_model.performance = request_data.model_performance
661
+ else:
662
+ serialized_model = self._get_dumped_model(
663
+ model=predictive_model, model_type=request_data.model_type, attributes=request_data.predictors
664
+ )
665
+ if request_data.model_type == PredictiveModelType.PMML and "feature_names" not in serialized_model:
666
+ serialized_model["feature_names"] = request_data.predictors
673
667
 
674
- # 2. Prepare attribute preprocessing data
675
- # for validating if the binning is defined for valid attribute, we first need to get available attributes
676
- # (original attributes + attributes created in transformations)
677
- attribute_binning_preprocessed = self._prepare_attribute_binning(
678
- attribute_binning=request_data.attribute_binning
679
- )
668
+ # 1. basic validation of provided data
669
+ if request_data.model_type == PredictiveModelType.RANDOM_FOREST and request_data.target_class is None:
670
+ try:
671
+ request_data.target_class = serialized_model["random_forest_model"]["classes_"][-1]
672
+ logger.warning(
673
+ f"Parameter 'target_class' was not provided for RandomForest model. Value was automatically set "
674
+ f"to '{request_data.target_class}'"
675
+ )
676
+ except Exception as e:
677
+ raise ValueError(
678
+ f"Parameter 'target_class' was not provided for random forest model and could not be "
679
+ f"auto-detected (Error in auto-detection: {e})."
680
+ )
681
+
682
+ # 2. Prepare attribute preprocessing data
683
+ # for validating if the binning is defined for valid attribute, we first need to get available attributes
684
+ # (original attributes + attributes created in transformations)
685
+ attribute_binning_preprocessed = self._prepare_attribute_binning(
686
+ attribute_binning=request_data.attribute_binning
687
+ )
680
688
 
681
- if request_data.model_type != PredictiveModelType.EXPERT_SCORE:
682
689
  # Detect attributes if not provided
683
690
  if request_data.predictors is None or len(request_data.predictors) == 0:
684
691
  request_data.predictors = self.automated_attribute_detection(
@@ -709,36 +716,28 @@ class PredictiveModelBuilder:
709
716
  "dummy_encoding": request_data.dummy_encoding or [],
710
717
  }
711
718
  )
719
+ if request_data.model_type == PredictiveModelType.PMML:
720
+ extended_model_dict = {
721
+ "external_model": serialized_model,
722
+ "attributes": serialized_model["feature_names"],
723
+ "predictive_model_type": request_data.model_type,
724
+ "target": request_data.label_name,
725
+ }
712
726
 
713
- # 3. Build extended model and send its content as response
714
- if request_data.model_type == PredictiveModelType.EXPERT_SCORE:
715
- extended_model = self.create_model_from_data_frame(df=cast("pl.DataFrame", predictive_model))
716
- extended_model.target = request_data.label_name
717
- extended_model.target_class = request_data.target_class
718
- extended_model.description = request_data.description
719
- extended_model.performance = request_data.model_performance
720
- elif request_data.model_type == PredictiveModelType.PMML:
721
- extended_model_dict = {
722
- "external_model": serialized_model,
723
- "attributes": serialized_model["feature_names"],
724
- "predictive_model_type": request_data.model_type,
725
- "target": request_data.label_name,
726
- }
727
-
728
- extended_model = ExtendedPredictiveModel.model_validate(extended_model_dict)
729
- else:
730
- extended_model_dict = {
731
- "external_model": serialized_model,
732
- "predictive_model_type": request_data.model_type,
733
- "attributes": request_data.predictors,
734
- "target": request_data.label_name,
735
- "target_class": request_data.target_class,
736
- "attribute_preprocessing": attribute_preprocessing,
737
- "description": request_data.description,
738
- "performance": request_data.model_performance,
739
- "monitoring": request_data.monitoring_data,
740
- }
741
- extended_model = ExtendedPredictiveModel.model_validate(extended_model_dict)
727
+ extended_model = ExtendedPredictiveModel.model_validate(extended_model_dict)
728
+ else:
729
+ extended_model_dict = {
730
+ "external_model": serialized_model,
731
+ "predictive_model_type": request_data.model_type,
732
+ "attributes": request_data.predictors,
733
+ "target": request_data.label_name,
734
+ "target_class": request_data.target_class,
735
+ "attribute_preprocessing": attribute_preprocessing,
736
+ "description": request_data.description,
737
+ "performance": request_data.model_performance,
738
+ "monitoring": request_data.monitoring_data,
739
+ }
740
+ extended_model = ExtendedPredictiveModel.model_validate(extended_model_dict)
742
741
  return extended_model
743
742
 
744
743
  def build(
@@ -767,7 +766,7 @@ class PredictiveModelBuilder:
767
766
  evaluate_performance: Optional[Dict[str, Union[str, List[str]]]] = None,
768
767
  learning_curves_data: Optional[Dict] = None,
769
768
  created_date: Optional[datetime.date] = None,
770
- ) -> ExtendedPredictiveModel:
769
+ ) -> "ExtendedPredictiveModel":
771
770
  """
772
771
  Function prepares input data for build model zip file, that is ready to be implemented in TaranDM software.
773
772
  Created input data will be sent to the TaranDM endpoint, through which final model zip file is returned.
@@ -1152,13 +1151,13 @@ class PredictiveModelBuilder:
1152
1151
  model_attributes = list(model.feature_names_in_)
1153
1152
  else:
1154
1153
  raise ValueError(
1155
- "Model predictors names were not provided and could not be detected automatically. Model "
1156
- "was recognized as scikit-learn estimator. Tried to collect model predictors names from "
1154
+ "Model attribute names were not provided and could not be detected automatically. Model "
1155
+ "was recognized as scikit-learn estimator. Tried to collect model attribute names from "
1157
1156
  "property 'feature_names_in_'. This property is available in scikit-learn since version "
1158
1157
  "0.24."
1159
1158
  )
1160
1159
  else:
1161
- raise ValueError("Model predictors names were not provided and could not be detected automatically.")
1160
+ raise ValueError("Model attribute names were not provided and could not be detected automatically.")
1162
1161
 
1163
1162
  # We detected feature that enters the model. First, we detect features as they were before dummy encoding
1164
1163
  names_encoded_to_orig = {}
@@ -1176,7 +1175,7 @@ class PredictiveModelBuilder:
1176
1175
  if binned_attribute_name is not None and binned_attribute_name != binning.attribute:
1177
1176
  if binned_attribute_name not in model_attributes:
1178
1177
  raise ValueError(
1179
- "Model predictors names were not provided and could not be detected automatically."
1178
+ "Model attribute names were not provided and could not be detected automatically."
1180
1179
  )
1181
1180
  model_attributes = [a for a in model_attributes if a != binned_attribute_name]
1182
1181
  if binning.attribute not in model_attributes:
@@ -1192,7 +1191,7 @@ class PredictiveModelBuilder:
1192
1191
  if transformed_attribute_name is not None and transformed_attribute_name != transformation.attribute:
1193
1192
  if transformed_attribute_name not in model_attributes:
1194
1193
  raise ValueError(
1195
- "Model predictors names were not provided and could not be detected automatically."
1194
+ "Model attribute names were not provided and could not be detected automatically."
1196
1195
  )
1197
1196
  model_attributes = [a for a in model_attributes if a != transformed_attribute_name]
1198
1197
  if transformation.attribute not in model_attributes:
@@ -1995,7 +1994,7 @@ class PredictiveModelBuilder:
1995
1994
  if len(categories_str) > 30:
1996
1995
  categories_str = categories_str[0:27] + "..."
1997
1996
  logger.info(
1998
- f"Target rate for predictor {col_attribute} and group of categories {{{categories_str}}} is zero."
1997
+ f"Target rate for attribute {col_attribute} and group of categories {{{categories_str}}} is zero."
1999
1998
  )
2000
1999
  return bin_frequency / total_count, 0.0
2001
2000
 
@@ -2033,7 +2032,7 @@ class PredictiveModelBuilder:
2033
2032
  encoded_feature_name = single_dummy.encoded_feature_name
2034
2033
  if encoded_feature_name not in model_attrs:
2035
2034
  logger.warning(
2036
- f"Dummy encoding for predictor {attr} defines feature {encoded_feature_name}. This "
2035
+ f"Dummy encoding for attribute {attr} defines feature {encoded_feature_name}. This "
2037
2036
  f"feature is not used in model. Please check dummy encoding for typos."
2038
2037
  )
2039
2038
  else:
@@ -2084,7 +2083,7 @@ class PredictiveModelBuilder:
2084
2083
  elif set(attributes) != set(orig_attrs):
2085
2084
  logger.warning(
2086
2085
  f"Expected original features (features before transformation and encodings) are different "
2087
- f"from predictors provided in 'attributes' parameter. Expected: {orig_attrs}; Provided: "
2086
+ f"from attributes provided in 'attributes' parameter. Expected: {orig_attrs}; Provided: "
2088
2087
  f"{attributes}. Expected original features will be used in exported model. Please check "
2089
2088
  f"that this is a correct behavior."
2090
2089
  )
@@ -2169,6 +2168,8 @@ class PredictiveModelBuilder:
2169
2168
  return unique_attributes
2170
2169
 
2171
2170
  def expert_score_model_dump(self, model: "pl.DataFrame") -> Dict[str, Any]:
2171
+ import polars as pl
2172
+
2172
2173
  unique_attributes = self._get_unique_values_from_df_col(model, "attribute")
2173
2174
  feature_names = self._get_unique_values_from_df_col(model.filter(pl.col("is_intercept") == 0), "attribute")
2174
2175
 
@@ -6,7 +6,7 @@ from tarandm_analytics.predictive_models.abstract_predictive_model import Abstra
6
6
 
7
7
 
8
8
  class ModelExpertScore(AbstractPredictiveModel):
9
- """Internal representation of expertly defined score. Expert score defines binning of predictors and assigns a value
9
+ """Internal representation of expertly defined score. Expert score defines binning of attributes and assigns a value
10
10
  to every bin. Prediction is simple sum of assigned values.
11
11
  """
12
12
 
@@ -48,13 +48,13 @@ class ModelExpertScore(AbstractPredictiveModel):
48
48
  values = attribute_values.get(feature, None)
49
49
  if not isinstance(values, list):
50
50
  raise TypeError(
51
- f"'predict_batch' method expect values of each predictor provided in list. Values for "
51
+ f"'predict_batch' method expect values of each attribute provided in list. Values for "
52
52
  f"{feature} was provided as {type(values)}."
53
53
  )
54
54
  elif not all(isinstance(val, (int, float)) for val in values):
55
55
  raise TypeError("Some of values provided to predict_batch method are not numerical.")
56
56
  elif n_obs != len(values):
57
- raise ValueError("Number of values provided to predict_batch is inconsistent across predictors.")
57
+ raise ValueError("Number of values provided to predict_batch is inconsistent across attributes.")
58
58
  data_for_predict.append(values)
59
59
 
60
60
  intercept = self.intercept or 0.0
@@ -2,8 +2,9 @@
2
2
  # duplication or any other usage without previous written agreement of Taran Advisory is
3
3
  # prohibited.
4
4
 
5
+ from enum import Enum
5
6
  from io import StringIO
6
- from typing import Any, Dict, List, Optional, Union, cast
7
+ from typing import Any, Dict, List, Optional, Tuple, Union, cast
7
8
 
8
9
  from pydantic import Field
9
10
 
@@ -17,6 +18,10 @@ from tarandm_analytics_utils.predictive_models.extended_predictive_model import
17
18
  PredictiveModelType,
18
19
  )
19
20
  from tarandm_analytics_utils.utils.dump import safe_dumps_json
21
+ from tarandm_analytics_utils.predictive_models.attribute_preprocessing.attribute_binning import AttributeDataType
22
+ from tarandm_analytics_utils.predictive_models.attribute_preprocessing.attribute_transformation import (
23
+ AttributeTransformation,
24
+ )
20
25
 
21
26
 
22
27
  class ExtendedPredictiveModel(AbstractExtendedPredictiveModel):
@@ -133,3 +138,100 @@ class ExtendedPredictiveModel(AbstractExtendedPredictiveModel):
133
138
  }
134
139
 
135
140
  return {}
141
+
142
+ def convert_attribute(self, attribute_value: Any) -> Any:
143
+ if isinstance(attribute_value, Enum):
144
+ return attribute_value.value
145
+ return attribute_value
146
+
147
+ def apply_attribute_transformation(
148
+ self, transformation: AttributeTransformation, attribute_value: Union[int, float]
149
+ ) -> Optional[Union[int, float]]:
150
+ raise NotImplementedError() # Implemented only in core because of QueryEvaluator
151
+
152
+ def prepare_all_preprocessed_attributes(self, attribute_values: Dict[str, Any]) -> Dict[str, Any]: # noqa: C901
153
+ """
154
+ Method to preprocess attributes - convert from Money and apply binning/transformations if needed
155
+ """
156
+ attribute_values_preprocessed = {}
157
+
158
+ # convert money to float
159
+ for attribute in attribute_values:
160
+ attribute_values_preprocessed[attribute] = self.convert_attribute(attribute_values[attribute])
161
+
162
+ # if boolean values convert to either "true"/"false" (CATEGORICAL) or 1/0 (NUMERICAL)
163
+ if isinstance(attribute_values_preprocessed[attribute], bool):
164
+ binning_found = False
165
+ for binning in self.attribute_preprocessing.binning:
166
+ if binning.attribute == attribute:
167
+ if binning.attribute_data_type == AttributeDataType.CATEGORICAL:
168
+ attribute_values_preprocessed[attribute] = str(attribute_values_preprocessed[attribute])
169
+ binning_found = True
170
+ else:
171
+ attribute_values_preprocessed[attribute] = int(attribute_values_preprocessed[attribute])
172
+ binning_found = True
173
+
174
+ if not binning_found:
175
+ attribute_values_preprocessed[attribute] = int(attribute_values_preprocessed[attribute])
176
+
177
+ # apply transformations
178
+ if self.attribute_preprocessing is not None and self.attribute_preprocessing.transformations is not None:
179
+ for transformation in self.attribute_preprocessing.transformations:
180
+ if transformation.attribute in attribute_values_preprocessed.keys():
181
+ attribute_values_preprocessed[
182
+ transformation.transformed_attribute_name or transformation.attribute
183
+ ] = self.apply_attribute_transformation(
184
+ transformation=transformation,
185
+ attribute_value=attribute_values_preprocessed[transformation.attribute],
186
+ )
187
+
188
+ # apply binning
189
+ if self.attribute_preprocessing is not None and self.attribute_preprocessing.binning is not None:
190
+ for binning in self.attribute_preprocessing.binning:
191
+ if binning.attribute in attribute_values_preprocessed.keys():
192
+ attribute_values_preprocessed[binning.binned_attribute_name or binning.attribute] = (
193
+ self.attribute_preprocessing.apply_attribute_binning(
194
+ attribute=binning.attribute,
195
+ attribute_value=attribute_values_preprocessed[binning.attribute],
196
+ )
197
+ )
198
+
199
+ # apply dummy encoding
200
+ if self.attribute_preprocessing is not None:
201
+ for dummy_encoding in self.attribute_preprocessing.dummy_encoding:
202
+ attribute_value = attribute_values_preprocessed[dummy_encoding.attribute]
203
+ encoding = self.attribute_preprocessing.apply_dummy_encoding(
204
+ attribute=dummy_encoding.attribute, attribute_value=attribute_value
205
+ )
206
+ if encoding is not None:
207
+ attribute_values_preprocessed.update(encoding)
208
+
209
+ return attribute_values_preprocessed
210
+
211
+ def filter_final_model_attributes(self, attribute_values_preprocessed: Dict[str, Any]) -> Dict[str, Any]:
212
+ # return only attributes that enter final model
213
+ model_attribute_values_preprocessed = {}
214
+ for attr in self.external_model.feature_names:
215
+ model_attribute_values_preprocessed[attr] = attribute_values_preprocessed[attr]
216
+ return model_attribute_values_preprocessed
217
+
218
+ def apply_preprocessing(self, attribute_values: Dict[str, Any]) -> Dict[str, Any]: # noqa: C901
219
+ all_preprocessed_attributes = self.prepare_all_preprocessed_attributes(attribute_values=attribute_values)
220
+ return self.filter_final_model_attributes(attribute_values_preprocessed=all_preprocessed_attributes)
221
+
222
+ def predict(self, attribute_values: Dict[str, Any]) -> Tuple[Optional[float], Dict[str, Any]]:
223
+ """
224
+ Method predict is responsible for computing prediction of models.
225
+
226
+ :param attribute_values: Dictionary {attribute: its value}.
227
+ :return: Prediction of the models.
228
+ """
229
+ attribute_values_preprocessed = self.apply_preprocessing(attribute_values=attribute_values)
230
+
231
+ if self.external_model is not None:
232
+ return (
233
+ self.external_model.predict(attribute_values=attribute_values_preprocessed),
234
+ attribute_values_preprocessed,
235
+ )
236
+
237
+ raise Exception("Model is not defined")
@@ -89,14 +89,14 @@ class ModelXGBoost(AbstractPredictiveModel):
89
89
  values = attribute_values.get(feature, None)
90
90
  if not isinstance(values, list):
91
91
  raise TypeError(
92
- f"'predict_batch' method expect values of each predictor provided in list. Values for "
92
+ f"'predict_batch' method expect values of each attribute provided in list. Values for "
93
93
  f"{feature} was provided as {type(values)}."
94
94
  )
95
95
  elif not all(val is None or isinstance(val, (int, float)) for val in values):
96
96
  raise TypeError("Some of values provided to predict_batch method are not numerical or None.")
97
97
  elif n_obs is not None and n_obs != len(values):
98
98
  raise ValueError(
99
- "Values provided to predict_batch method do not have the same size for all predictors."
99
+ "Values provided to predict_batch method do not have the same size for all attributes."
100
100
  )
101
101
  data_for_predict.append(values)
102
102
 
@@ -94,14 +94,14 @@ class ModelLogisticRegression(AbstractPredictiveModel):
94
94
  values = attribute_values.get(feature, None)
95
95
  if not isinstance(values, list):
96
96
  raise TypeError(
97
- f"'predict_batch' method expect values of each predictor provided in list. Values for "
97
+ f"'predict_batch' method expect values of each attribute provided in list. Values for "
98
98
  f"{feature} was provided as {type(values)}."
99
99
  )
100
100
  elif not all(isinstance(val, (int, float)) for val in values):
101
101
  raise TypeError("Some of values provided to predict_batch method are not numerical.")
102
102
  elif n_obs is not None and n_obs != len(values):
103
103
  raise ValueError(
104
- "Values provided to predict_batch method do not have the same size for all predictors."
104
+ "Values provided to predict_batch method do not have the same size for all attributes."
105
105
  )
106
106
  data_for_predict.append(values)
107
107
 
@@ -76,14 +76,14 @@ class ModelRandomForest(AbstractPredictiveModel):
76
76
  values = attribute_values.get(feature, None)
77
77
  if not isinstance(values, list):
78
78
  raise TypeError(
79
- f"'predict_batch' method expect values of each predictor provided in list. Values for "
79
+ f"'predict_batch' method expect values of each attribute provided in list. Values for "
80
80
  f"{feature} was provided as {type(values)}."
81
81
  )
82
82
  elif not all(isinstance(val, (int, float)) for val in values):
83
83
  raise TypeError("Some of values provided to predict_batch method are not numerical.")
84
84
  elif n_obs is not None and n_obs != len(values):
85
85
  raise ValueError(
86
- "Values provided to predict_batch method do not have the same size for all predictors."
86
+ "Values provided to predict_batch method do not have the same size for all attributes."
87
87
  )
88
88
  data_for_predict.append(values)
89
89