upgini 1.2.71a3810.dev3__py3-none-any.whl → 1.2.71a3832.dev4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of upgini might be problematic. Click here for more details.
- upgini/__about__.py +1 -1
- upgini/autofe/timeseries/base.py +2 -2
- upgini/autofe/timeseries/cross.py +1 -1
- upgini/autofe/unary.py +1 -38
- upgini/dataset.py +1 -1
- upgini/features_enricher.py +13 -10
- upgini/http.py +9 -4
- upgini/metrics.py +147 -48
- upgini/resource_bundle/strings.properties +1 -0
- upgini/utils/deduplicate_utils.py +2 -0
- upgini/utils/feature_info.py +2 -1
- upgini/utils/sklearn_ext.py +20 -2
- upgini/utils/sort.py +1 -1
- {upgini-1.2.71a3810.dev3.dist-info → upgini-1.2.71a3832.dev4.dist-info}/METADATA +5 -4
- {upgini-1.2.71a3810.dev3.dist-info → upgini-1.2.71a3832.dev4.dist-info}/RECORD +17 -18
- {upgini-1.2.71a3810.dev3.dist-info → upgini-1.2.71a3832.dev4.dist-info}/WHEEL +1 -1
- upgini/lazy_import.py +0 -35
- {upgini-1.2.71a3810.dev3.dist-info → upgini-1.2.71a3832.dev4.dist-info}/licenses/LICENSE +0 -0
upgini/__about__.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "1.2.
|
|
1
|
+
__version__ = "1.2.71a3832.dev4"
|
upgini/autofe/timeseries/base.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import abc
|
|
2
|
-
from typing import Dict, List, Optional
|
|
2
|
+
from typing import Dict, List, Optional, Tuple
|
|
3
3
|
|
|
4
4
|
import pandas as pd
|
|
5
5
|
from upgini.autofe.operator import PandasOperator
|
|
@@ -64,7 +64,7 @@ class TimeSeriesBase(PandasOperator, abc.ABC):
|
|
|
64
64
|
return base_formula
|
|
65
65
|
|
|
66
66
|
@classmethod
|
|
67
|
-
def _parse_offset_from_formula(cls, formula: str, base_regex: str) ->
|
|
67
|
+
def _parse_offset_from_formula(cls, formula: str, base_regex: str) -> Tuple[Optional[Dict], Optional[str]]:
|
|
68
68
|
"""
|
|
69
69
|
Parse the offset component from a formula.
|
|
70
70
|
|
upgini/autofe/unary.py
CHANGED
|
@@ -1,10 +1,8 @@
|
|
|
1
|
-
import
|
|
2
|
-
from typing import Dict, List, Optional
|
|
1
|
+
from typing import Dict, Optional
|
|
3
2
|
import numpy as np
|
|
4
3
|
import pandas as pd
|
|
5
4
|
|
|
6
5
|
from upgini.autofe.operator import PandasOperator, VectorizableMixin
|
|
7
|
-
from upgini.autofe.utils import pydantic_validator
|
|
8
6
|
|
|
9
7
|
|
|
10
8
|
class Abs(PandasOperator, VectorizableMixin):
|
|
@@ -155,38 +153,3 @@ class Embeddings(PandasOperator):
|
|
|
155
153
|
is_unary: bool = True
|
|
156
154
|
input_type: Optional[str] = "string"
|
|
157
155
|
output_type: Optional[str] = "vector"
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
class Bin(PandasOperator):
|
|
161
|
-
name: str = "bin"
|
|
162
|
-
is_unary: bool = True
|
|
163
|
-
output_type: Optional[str] = "string"
|
|
164
|
-
bin_bounds: List[int] = []
|
|
165
|
-
is_categorical: bool = True
|
|
166
|
-
|
|
167
|
-
def calculate_unary(self, data: pd.Series) -> pd.Series:
|
|
168
|
-
return data.apply(self._bin, bounds=self.bin_bounds).fillna(-1).astype(int).astype(str)
|
|
169
|
-
|
|
170
|
-
def _bin(self, f, bounds):
|
|
171
|
-
if f is None or np.isnan(f):
|
|
172
|
-
return np.nan
|
|
173
|
-
hit = np.where(f >= np.array(bounds))[0]
|
|
174
|
-
if hit.size > 0:
|
|
175
|
-
return np.max(hit) + 1
|
|
176
|
-
else:
|
|
177
|
-
return np.nan
|
|
178
|
-
|
|
179
|
-
def get_params(self) -> Dict[str, Optional[str]]:
|
|
180
|
-
res = super().get_params()
|
|
181
|
-
res.update(
|
|
182
|
-
{
|
|
183
|
-
"bin_bounds": json.dumps(self.bin_bounds),
|
|
184
|
-
}
|
|
185
|
-
)
|
|
186
|
-
return res
|
|
187
|
-
|
|
188
|
-
@pydantic_validator("bin_bounds", mode="before")
|
|
189
|
-
def parse_bin_bounds(cls, value):
|
|
190
|
-
if isinstance(value, str):
|
|
191
|
-
return json.loads(value)
|
|
192
|
-
return value
|
upgini/dataset.py
CHANGED
|
@@ -389,7 +389,7 @@ class Dataset: # (pd.DataFrame):
|
|
|
389
389
|
for col in columns_to_validate:
|
|
390
390
|
self.data[f"{col}_is_valid"] = ~self.data[col].isnull()
|
|
391
391
|
if validate_target and target is not None and col == target:
|
|
392
|
-
self.data.loc[self.data[target] == np.
|
|
392
|
+
self.data.loc[self.data[target] == np.inf, f"{col}_is_valid"] = False
|
|
393
393
|
|
|
394
394
|
if col in mandatory_columns:
|
|
395
395
|
self.data["valid_mandatory"] = self.data["valid_mandatory"] & self.data[f"{col}_is_valid"]
|
upgini/features_enricher.py
CHANGED
|
@@ -841,7 +841,7 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
841
841
|
max_features: Optional[int] = None,
|
|
842
842
|
remove_outliers_calc_metrics: Optional[bool] = None,
|
|
843
843
|
trace_id: Optional[str] = None,
|
|
844
|
-
|
|
844
|
+
internal_call: bool = False,
|
|
845
845
|
progress_bar: Optional[ProgressBar] = None,
|
|
846
846
|
progress_callback: Optional[Callable[[SearchProgress], Any]] = None,
|
|
847
847
|
**kwargs,
|
|
@@ -1095,7 +1095,7 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
1095
1095
|
enriched_shaps = enriched_cv_result.shap_values
|
|
1096
1096
|
|
|
1097
1097
|
if enriched_shaps is not None:
|
|
1098
|
-
self._update_shap_values(trace_id, fitting_X, enriched_shaps)
|
|
1098
|
+
self._update_shap_values(trace_id, fitting_X, enriched_shaps, silent=not internal_call)
|
|
1099
1099
|
|
|
1100
1100
|
if enriched_metric is None:
|
|
1101
1101
|
self.logger.warning(
|
|
@@ -1256,14 +1256,14 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
1256
1256
|
if self.raise_validation_error:
|
|
1257
1257
|
raise e
|
|
1258
1258
|
else:
|
|
1259
|
-
if not
|
|
1259
|
+
if not internal_call:
|
|
1260
1260
|
self._dump_python_libs()
|
|
1261
1261
|
self.__display_support_link()
|
|
1262
1262
|
raise e
|
|
1263
1263
|
finally:
|
|
1264
1264
|
self.logger.info(f"Calculating metrics elapsed time: {time.time() - start_time}")
|
|
1265
1265
|
|
|
1266
|
-
def _update_shap_values(self, trace_id: str, df: pd.DataFrame, new_shaps: Dict[str, float]):
|
|
1266
|
+
def _update_shap_values(self, trace_id: str, df: pd.DataFrame, new_shaps: Dict[str, float], silent: bool = False):
|
|
1267
1267
|
renaming = self.fit_columns_renaming or {}
|
|
1268
1268
|
new_shaps = {
|
|
1269
1269
|
renaming.get(feature, feature): _round_shap_value(shap)
|
|
@@ -1272,7 +1272,7 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
1272
1272
|
}
|
|
1273
1273
|
self.__prepare_feature_importances(trace_id, df, new_shaps)
|
|
1274
1274
|
|
|
1275
|
-
if self.features_info_display_handle is not None:
|
|
1275
|
+
if not silent and self.features_info_display_handle is not None:
|
|
1276
1276
|
try:
|
|
1277
1277
|
_ = get_ipython() # type: ignore
|
|
1278
1278
|
|
|
@@ -1284,7 +1284,7 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
1284
1284
|
)
|
|
1285
1285
|
except (ImportError, NameError):
|
|
1286
1286
|
pass
|
|
1287
|
-
if self.data_sources_display_handle is not None:
|
|
1287
|
+
if not silent and self.data_sources_display_handle is not None:
|
|
1288
1288
|
try:
|
|
1289
1289
|
_ = get_ipython() # type: ignore
|
|
1290
1290
|
|
|
@@ -1296,7 +1296,7 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
1296
1296
|
)
|
|
1297
1297
|
except (ImportError, NameError):
|
|
1298
1298
|
pass
|
|
1299
|
-
if self.autofe_features_display_handle is not None:
|
|
1299
|
+
if not silent and self.autofe_features_display_handle is not None:
|
|
1300
1300
|
try:
|
|
1301
1301
|
_ = get_ipython() # type: ignore
|
|
1302
1302
|
autofe_descriptions_df = self.get_autofe_features_description()
|
|
@@ -1309,7 +1309,7 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
1309
1309
|
)
|
|
1310
1310
|
except (ImportError, NameError):
|
|
1311
1311
|
pass
|
|
1312
|
-
if self.report_button_handle is not None:
|
|
1312
|
+
if not silent and self.report_button_handle is not None:
|
|
1313
1313
|
try:
|
|
1314
1314
|
_ = get_ipython() # type: ignore
|
|
1315
1315
|
|
|
@@ -4084,7 +4084,10 @@ if response.status_code == 200:
|
|
|
4084
4084
|
)
|
|
4085
4085
|
|
|
4086
4086
|
if all(k == SearchKey.CUSTOM_KEY for k in valid_search_keys.values()):
|
|
4087
|
-
|
|
4087
|
+
if self.__is_registered:
|
|
4088
|
+
msg = self.bundle.get("only_custom_keys")
|
|
4089
|
+
else:
|
|
4090
|
+
msg = self.bundle.get("unregistered_only_personal_keys")
|
|
4088
4091
|
self.logger.warning(msg + f" Provided search keys: {search_keys}")
|
|
4089
4092
|
raise ValidationError(msg)
|
|
4090
4093
|
|
|
@@ -4135,7 +4138,7 @@ if response.status_code == 200:
|
|
|
4135
4138
|
max_features=max_features,
|
|
4136
4139
|
remove_outliers_calc_metrics=remove_outliers_calc_metrics,
|
|
4137
4140
|
trace_id=trace_id,
|
|
4138
|
-
|
|
4141
|
+
internal_call=True,
|
|
4139
4142
|
progress_bar=progress_bar,
|
|
4140
4143
|
progress_callback=progress_callback,
|
|
4141
4144
|
)
|
upgini/http.py
CHANGED
|
@@ -16,6 +16,7 @@ from typing import Any, Dict, List, Optional, Tuple
|
|
|
16
16
|
from urllib.parse import urljoin
|
|
17
17
|
|
|
18
18
|
import jwt
|
|
19
|
+
|
|
19
20
|
# import pandas as pd
|
|
20
21
|
import requests
|
|
21
22
|
from pydantic import BaseModel
|
|
@@ -342,7 +343,9 @@ class _RestClient:
|
|
|
342
343
|
else:
|
|
343
344
|
return self._syncronized_refresh_access_token()
|
|
344
345
|
|
|
345
|
-
def _with_unauth_retry(
|
|
346
|
+
def _with_unauth_retry(
|
|
347
|
+
self, request, try_number: int = 0, need_connection_retry: bool = True, silent: bool = False
|
|
348
|
+
):
|
|
346
349
|
try:
|
|
347
350
|
return request()
|
|
348
351
|
except RequestException as e:
|
|
@@ -373,8 +376,9 @@ class _RestClient:
|
|
|
373
376
|
elif "more than one concurrent search request" in e.message.lower():
|
|
374
377
|
raise ValidationError(bundle.get("concurrent_request"))
|
|
375
378
|
else:
|
|
376
|
-
|
|
377
|
-
|
|
379
|
+
if not silent:
|
|
380
|
+
print(e)
|
|
381
|
+
show_status_error()
|
|
378
382
|
raise e
|
|
379
383
|
|
|
380
384
|
@staticmethod
|
|
@@ -706,6 +710,7 @@ class _RestClient:
|
|
|
706
710
|
silent=True,
|
|
707
711
|
),
|
|
708
712
|
need_connection_retry=False,
|
|
713
|
+
silent=True,
|
|
709
714
|
)
|
|
710
715
|
except Exception:
|
|
711
716
|
self.send_log_event_unauth(log_event)
|
|
@@ -716,7 +721,7 @@ class _RestClient:
|
|
|
716
721
|
try:
|
|
717
722
|
requests.post(
|
|
718
723
|
url=urljoin(_RestClient.PROD_BACKEND_URL, api_path),
|
|
719
|
-
json=log_event.
|
|
724
|
+
json=log_event.model_dump(exclude_none=True),
|
|
720
725
|
headers=_RestClient._get_base_headers(content_type="application/json"),
|
|
721
726
|
)
|
|
722
727
|
except Exception:
|
upgini/metrics.py
CHANGED
|
@@ -1,17 +1,18 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
from dataclasses import dataclass
|
|
4
3
|
import inspect
|
|
5
4
|
import logging
|
|
6
5
|
import re
|
|
6
|
+
import warnings
|
|
7
7
|
from collections import defaultdict
|
|
8
8
|
from copy import deepcopy
|
|
9
|
+
from dataclasses import dataclass
|
|
9
10
|
from typing import Any, Callable, Dict, List, Optional, Tuple, Union
|
|
10
11
|
|
|
11
|
-
import catboost
|
|
12
12
|
import numpy as np
|
|
13
13
|
import pandas as pd
|
|
14
|
-
from
|
|
14
|
+
from lightgbm import LGBMClassifier, LGBMRegressor
|
|
15
|
+
import lightgbm as lgb
|
|
15
16
|
from numpy import log1p
|
|
16
17
|
from pandas.api.types import is_numeric_dtype
|
|
17
18
|
from sklearn.metrics import check_scoring, get_scorer, make_scorer, roc_auc_score
|
|
@@ -27,11 +28,8 @@ except ImportError:
|
|
|
27
28
|
from sklearn.metrics._scorer import SCORERS
|
|
28
29
|
|
|
29
30
|
available_scorers = SCORERS
|
|
30
|
-
from sklearn.metrics._regression import (
|
|
31
|
-
_check_reg_targets,
|
|
32
|
-
check_consistent_length,
|
|
33
|
-
)
|
|
34
31
|
from sklearn.metrics import mean_squared_error
|
|
32
|
+
from sklearn.metrics._regression import _check_reg_targets, check_consistent_length
|
|
35
33
|
from sklearn.model_selection import BaseCrossValidator
|
|
36
34
|
|
|
37
35
|
from upgini.errors import ValidationError
|
|
@@ -88,11 +86,73 @@ CATBOOST_MULTICLASS_PARAMS = {
|
|
|
88
86
|
|
|
89
87
|
LIGHTGBM_PARAMS = {
|
|
90
88
|
"random_state": DEFAULT_RANDOM_STATE,
|
|
91
|
-
"num_leaves": 16,
|
|
89
|
+
# "num_leaves": 16,
|
|
90
|
+
# "n_estimators": 150,
|
|
91
|
+
# "min_child_weight": 1,
|
|
92
92
|
"max_depth": 4,
|
|
93
|
-
"
|
|
93
|
+
"max_cat_threshold": 80,
|
|
94
|
+
"min_data_per_group": 25,
|
|
95
|
+
"num_boost_round": 150,
|
|
96
|
+
"cat_l2": 10,
|
|
97
|
+
"cat_smooth": 12,
|
|
98
|
+
"learning_rate": 0.05,
|
|
99
|
+
"feature_fraction": 1.0,
|
|
100
|
+
"min_sum_hessian_in_leaf": 0.01,
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
LIGHTGBM_REGRESSION_PARAMS = {
|
|
104
|
+
"random_state": DEFAULT_RANDOM_STATE,
|
|
105
|
+
"deterministic": True,
|
|
106
|
+
"min_gain_to_split": 0.001,
|
|
107
|
+
"n_estimators": 275,
|
|
108
|
+
"max_depth": 5,
|
|
109
|
+
"max_cat_threshold": 80,
|
|
110
|
+
"min_data_per_group": 25,
|
|
111
|
+
"cat_l2": 10,
|
|
112
|
+
"cat_smooth": 12,
|
|
94
113
|
"learning_rate": 0.05,
|
|
95
|
-
"
|
|
114
|
+
"feature_fraction": 1.0,
|
|
115
|
+
"min_sum_hessian_in_leaf": 0.01,
|
|
116
|
+
"objective": "huber",
|
|
117
|
+
"verbosity": -1,
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
LIGHTGBM_MULTICLASS_PARAMS = {
|
|
121
|
+
"random_state": DEFAULT_RANDOM_STATE,
|
|
122
|
+
"deterministic": True,
|
|
123
|
+
"min_gain_to_split": 0.001,
|
|
124
|
+
"n_estimators": 275,
|
|
125
|
+
"max_depth": 3,
|
|
126
|
+
"max_cat_threshold": 80,
|
|
127
|
+
"min_data_per_group": 25,
|
|
128
|
+
"cat_l2": 10,
|
|
129
|
+
"cat_smooth": 12,
|
|
130
|
+
"learning_rate": 0.25, # CatBoost 0.25
|
|
131
|
+
"min_sum_hessian_in_leaf": 0.01,
|
|
132
|
+
"class_weight": "balanced", # TODO pass dict with weights for each class
|
|
133
|
+
"objective": "multiclass",
|
|
134
|
+
"use_quantized_grad": "true",
|
|
135
|
+
"num_grad_quant_bins": "8",
|
|
136
|
+
"stochastic_rounding": "true",
|
|
137
|
+
"verbosity": -1,
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
LIGHTGBM_BINARY_PARAMS = {
|
|
141
|
+
"random_state": DEFAULT_RANDOM_STATE,
|
|
142
|
+
"deterministic": True,
|
|
143
|
+
"min_gain_to_split": 0.001,
|
|
144
|
+
"n_estimators": 275,
|
|
145
|
+
"max_depth": 5,
|
|
146
|
+
"max_cat_threshold": 80,
|
|
147
|
+
"min_data_per_group": 25,
|
|
148
|
+
"cat_l2": 10,
|
|
149
|
+
"cat_smooth": 12,
|
|
150
|
+
"learning_rate": 0.05,
|
|
151
|
+
"feature_fraction": 1.0,
|
|
152
|
+
"min_sum_hessian_in_leaf": 0.01,
|
|
153
|
+
"objective": "binary",
|
|
154
|
+
"class_weight": "balanced", # TODO pass dict with weights for each class
|
|
155
|
+
"verbosity": -1,
|
|
96
156
|
}
|
|
97
157
|
|
|
98
158
|
N_FOLDS = 5
|
|
@@ -211,6 +271,15 @@ SUPPORTED_CATBOOST_METRICS = {
|
|
|
211
271
|
}
|
|
212
272
|
|
|
213
273
|
|
|
274
|
+
def is_catboost_estimator(estimator):
|
|
275
|
+
try:
|
|
276
|
+
from catboost import CatBoostClassifier, CatBoostRegressor
|
|
277
|
+
|
|
278
|
+
return isinstance(estimator, (CatBoostClassifier, CatBoostRegressor))
|
|
279
|
+
except ImportError:
|
|
280
|
+
return False
|
|
281
|
+
|
|
282
|
+
|
|
214
283
|
@dataclass
|
|
215
284
|
class _CrossValResults:
|
|
216
285
|
metric: Optional[float]
|
|
@@ -292,7 +361,7 @@ class EstimatorWrapper:
|
|
|
292
361
|
self.logger.info(f"After preparing data columns: {x.columns.to_list()}")
|
|
293
362
|
return x, y, groups
|
|
294
363
|
|
|
295
|
-
def _remove_empty_target_rows(self, x: pd.DataFrame, y: pd.Series) -> Tuple[pd.DataFrame,
|
|
364
|
+
def _remove_empty_target_rows(self, x: pd.DataFrame, y: pd.Series) -> Tuple[pd.DataFrame, np.ndarray]:
|
|
296
365
|
joined = pd.concat([x, y], axis=1)
|
|
297
366
|
joined = joined[joined[y.name].notna()]
|
|
298
367
|
joined = joined.reset_index(drop=True)
|
|
@@ -346,12 +415,15 @@ class EstimatorWrapper:
|
|
|
346
415
|
for estimator, split in zip(self.cv_estimators, splits):
|
|
347
416
|
_, validation_idx = split
|
|
348
417
|
cv_x = x.iloc[validation_idx]
|
|
349
|
-
|
|
418
|
+
if isinstance(y, pd.Series):
|
|
419
|
+
cv_y = y.iloc[validation_idx]
|
|
420
|
+
else:
|
|
421
|
+
cv_y = y[validation_idx]
|
|
350
422
|
shaps = self.calculate_shap(cv_x, cv_y, estimator)
|
|
351
423
|
if shaps is not None:
|
|
352
424
|
for feature, shap_value in shaps.items():
|
|
353
425
|
# shap_values_all_folds[feature] = shap_values_all_folds.get(feature, []) + shap_value.tolist()
|
|
354
|
-
shap_values_all_folds[feature].
|
|
426
|
+
shap_values_all_folds[feature].append(shap_value)
|
|
355
427
|
|
|
356
428
|
if shap_values_all_folds:
|
|
357
429
|
average_shap_values = {
|
|
@@ -427,21 +499,18 @@ class EstimatorWrapper:
|
|
|
427
499
|
}
|
|
428
500
|
if estimator is None:
|
|
429
501
|
params = {}
|
|
430
|
-
params["has_time"] = has_date
|
|
431
|
-
# if metric_name.upper() in SUPPORTED_CATBOOST_METRICS:
|
|
432
|
-
# params["eval_metric"] = SUPPORTED_CATBOOST_METRICS[metric_name.upper()]
|
|
433
502
|
if target_type == ModelTaskType.MULTICLASS:
|
|
434
|
-
params = _get_add_params(params,
|
|
503
|
+
params = _get_add_params(params, LIGHTGBM_MULTICLASS_PARAMS)
|
|
435
504
|
params = _get_add_params(params, add_params)
|
|
436
|
-
estimator =
|
|
505
|
+
estimator = LightGBMWrapper(LGBMClassifier(**params), **kwargs)
|
|
437
506
|
elif target_type == ModelTaskType.BINARY:
|
|
438
|
-
params = _get_add_params(params,
|
|
507
|
+
params = _get_add_params(params, LIGHTGBM_BINARY_PARAMS)
|
|
439
508
|
params = _get_add_params(params, add_params)
|
|
440
|
-
estimator =
|
|
509
|
+
estimator = LightGBMWrapper(LGBMClassifier(**params), **kwargs)
|
|
441
510
|
elif target_type == ModelTaskType.REGRESSION:
|
|
442
|
-
params = _get_add_params(params,
|
|
511
|
+
params = _get_add_params(params, LIGHTGBM_REGRESSION_PARAMS)
|
|
443
512
|
params = _get_add_params(params, add_params)
|
|
444
|
-
estimator =
|
|
513
|
+
estimator = LightGBMWrapper(LGBMRegressor(**params), **kwargs)
|
|
445
514
|
else:
|
|
446
515
|
raise Exception(bundle.get("metrics_unsupported_target_type").format(target_type))
|
|
447
516
|
else:
|
|
@@ -450,31 +519,21 @@ class EstimatorWrapper:
|
|
|
450
519
|
else:
|
|
451
520
|
estimator_copy = deepcopy(estimator)
|
|
452
521
|
kwargs["estimator"] = estimator_copy
|
|
453
|
-
if
|
|
522
|
+
if is_catboost_estimator(estimator):
|
|
454
523
|
if cat_features is not None:
|
|
455
524
|
for cat_feature in cat_features:
|
|
456
525
|
if cat_feature not in x.columns:
|
|
457
526
|
logger.error(
|
|
458
527
|
f"Client cat_feature `{cat_feature}` not found in x columns: {x.columns.to_list()}"
|
|
459
528
|
)
|
|
460
|
-
estimator_copy.set_params(
|
|
461
|
-
# cat_features=[x.columns.get_loc(cat_feature) for cat_feature in cat_features]
|
|
462
|
-
cat_features=cat_features
|
|
463
|
-
)
|
|
529
|
+
estimator_copy.set_params(cat_features=cat_features, has_time=has_date)
|
|
464
530
|
estimator = CatBoostWrapper(**kwargs)
|
|
465
531
|
else:
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
else:
|
|
472
|
-
logger.warning(
|
|
473
|
-
f"Unexpected estimator is used for metrics: {estimator}. "
|
|
474
|
-
"Default strategy for category features will be used"
|
|
475
|
-
)
|
|
476
|
-
estimator = OtherEstimatorWrapper(**kwargs)
|
|
477
|
-
except ModuleNotFoundError:
|
|
532
|
+
if isinstance(estimator, (LGBMClassifier, LGBMRegressor)):
|
|
533
|
+
estimator = LightGBMWrapper(**kwargs)
|
|
534
|
+
elif is_catboost_estimator(estimator):
|
|
535
|
+
estimator = CatBoostWrapper(**kwargs)
|
|
536
|
+
else:
|
|
478
537
|
logger.warning(
|
|
479
538
|
f"Unexpected estimator is used for metrics: {estimator}. "
|
|
480
539
|
"Default strategy for category features will be used"
|
|
@@ -487,7 +546,7 @@ class EstimatorWrapper:
|
|
|
487
546
|
class CatBoostWrapper(EstimatorWrapper):
|
|
488
547
|
def __init__(
|
|
489
548
|
self,
|
|
490
|
-
estimator
|
|
549
|
+
estimator,
|
|
491
550
|
scorer: Callable,
|
|
492
551
|
metric_name: str,
|
|
493
552
|
multiplier: int,
|
|
@@ -517,6 +576,9 @@ class CatBoostWrapper(EstimatorWrapper):
|
|
|
517
576
|
x, y, groups, params = super()._prepare_to_fit(x, y)
|
|
518
577
|
|
|
519
578
|
# Find embeddings
|
|
579
|
+
import catboost
|
|
580
|
+
from catboost import CatBoostClassifier
|
|
581
|
+
|
|
520
582
|
if hasattr(CatBoostClassifier, "get_embedding_feature_indices"):
|
|
521
583
|
emb_pattern = r"(.+)_emb\d+"
|
|
522
584
|
self.emb_features = [c for c in x.columns if re.match(emb_pattern, c) and is_numeric_dtype(x[c])]
|
|
@@ -637,8 +699,10 @@ class CatBoostWrapper(EstimatorWrapper):
|
|
|
637
699
|
else:
|
|
638
700
|
raise e
|
|
639
701
|
|
|
640
|
-
def calculate_shap(self, x: pd.DataFrame, y: pd.Series, estimator
|
|
702
|
+
def calculate_shap(self, x: pd.DataFrame, y: pd.Series, estimator) -> Optional[Dict[str, float]]:
|
|
641
703
|
try:
|
|
704
|
+
from catboost import Pool
|
|
705
|
+
|
|
642
706
|
# Create Pool for fold data, if need (for example, when categorical features are present)
|
|
643
707
|
fold_pool = Pool(
|
|
644
708
|
x,
|
|
@@ -695,25 +759,60 @@ class LightGBMWrapper(EstimatorWrapper):
|
|
|
695
759
|
self.cat_features = None
|
|
696
760
|
|
|
697
761
|
def _prepare_to_fit(self, x: pd.DataFrame, y: pd.Series) -> Tuple[pd.DataFrame, pd.Series, np.ndarray, dict]:
|
|
698
|
-
x,
|
|
762
|
+
x, y_numpy, groups, params = super()._prepare_to_fit(x, y)
|
|
763
|
+
params["callbacks"] = [lgb.early_stopping(stopping_rounds=20)]
|
|
699
764
|
self.cat_features = _get_cat_features(x)
|
|
700
765
|
x = fill_na_cat_features(x, self.cat_features)
|
|
701
766
|
for feature in self.cat_features:
|
|
702
767
|
x[feature] = x[feature].astype("category").cat.codes
|
|
703
|
-
if not is_numeric_dtype(
|
|
704
|
-
|
|
768
|
+
if not is_numeric_dtype(y_numpy):
|
|
769
|
+
y_numpy = correct_string_target(y_numpy)
|
|
705
770
|
|
|
706
|
-
return x,
|
|
771
|
+
return x, y_numpy, groups, params
|
|
707
772
|
|
|
708
773
|
def _prepare_to_calculate(self, x: pd.DataFrame, y: pd.Series) -> Tuple[pd.DataFrame, np.ndarray, dict]:
|
|
709
|
-
x,
|
|
774
|
+
x, y_numpy, params = super()._prepare_to_calculate(x, y)
|
|
710
775
|
if self.cat_features is not None:
|
|
711
776
|
x = fill_na_cat_features(x, self.cat_features)
|
|
712
777
|
for feature in self.cat_features:
|
|
713
778
|
x[feature] = x[feature].astype("category").cat.codes
|
|
714
779
|
if not is_numeric_dtype(y):
|
|
715
|
-
|
|
716
|
-
return x,
|
|
780
|
+
y_numpy = correct_string_target(y_numpy)
|
|
781
|
+
return x, y_numpy, params
|
|
782
|
+
|
|
783
|
+
def calculate_shap(self, x: pd.DataFrame, y: pd.Series, estimator) -> Optional[Dict[str, float]]:
|
|
784
|
+
try:
|
|
785
|
+
# Suppress specific warning from SHAP for LightGBM binary classifier
|
|
786
|
+
warnings.filterwarnings(
|
|
787
|
+
"ignore",
|
|
788
|
+
message=(
|
|
789
|
+
"LightGBM binary classifier with TreeExplainer shap values output has changed to a list of ndarray"
|
|
790
|
+
),
|
|
791
|
+
)
|
|
792
|
+
from shap import TreeExplainer
|
|
793
|
+
|
|
794
|
+
if not isinstance(estimator, (LGBMRegressor, LGBMClassifier)):
|
|
795
|
+
return None
|
|
796
|
+
|
|
797
|
+
explainer = TreeExplainer(estimator)
|
|
798
|
+
|
|
799
|
+
shap_values = explainer.shap_values(x)
|
|
800
|
+
|
|
801
|
+
# For classification, shap_values is returned as a list for each class
|
|
802
|
+
# Take values for the positive class
|
|
803
|
+
if isinstance(shap_values, list):
|
|
804
|
+
shap_values = shap_values[1]
|
|
805
|
+
|
|
806
|
+
# Calculate mean absolute SHAP value for each feature
|
|
807
|
+
feature_importance = {}
|
|
808
|
+
for i, col in enumerate(x.columns):
|
|
809
|
+
feature_importance[col] = np.mean(np.abs(shap_values[:, i]))
|
|
810
|
+
|
|
811
|
+
return feature_importance
|
|
812
|
+
|
|
813
|
+
except Exception as e:
|
|
814
|
+
self.logger.warning(f"Failed to calculate SHAP values: {str(e)}")
|
|
815
|
+
return None
|
|
717
816
|
|
|
718
817
|
|
|
719
818
|
class OtherEstimatorWrapper(EstimatorWrapper):
|
|
@@ -80,6 +80,7 @@ email_and_hem_simultanious=EMAIL and HEM search keys cannot be used simultaneous
|
|
|
80
80
|
postal_code_without_country=COUNTRY search key required if POSTAL_CODE is present
|
|
81
81
|
multiple_search_key=Search key {} passed multiple times
|
|
82
82
|
unregistered_only_personal_keys=Only personal search keys used. Api_key from profile.upgini.com required for EMAIL/HEM, PHONE NUMBER or IPv4/IPv6 search keys\nSee docs https://github.com/upgini/upgini#-open-up-all-capabilities-of-upgini
|
|
83
|
+
only_custom_keys=Only CUSTOM_KEY search keys were provided. At least one of DATE, COUNTRY, POSTAL_CODE, PHONE, EMAIL, HEM, IP should be provided
|
|
83
84
|
search_key_not_found=Column `{}` from search_keys was not found in X dataframe: {}
|
|
84
85
|
numeric_search_key_not_found=Index {} in search_keys is out of bounds for {} columns of X dataframe
|
|
85
86
|
unsupported_search_key_type=Unsupported type of key in search_keys: {}
|
|
@@ -74,6 +74,8 @@ def remove_fintech_duplicates(
|
|
|
74
74
|
# Checking for different dates by the same personal keys
|
|
75
75
|
uniques = grouped_by_personal_cols[date_col].nunique()
|
|
76
76
|
total = len(uniques)
|
|
77
|
+
if total == 0:
|
|
78
|
+
return segment_df, None
|
|
77
79
|
diff_dates = len(uniques[uniques > 1])
|
|
78
80
|
if diff_dates / total >= 0.6:
|
|
79
81
|
return segment_df, None
|
upgini/utils/feature_info.py
CHANGED
|
@@ -90,7 +90,8 @@ class FeatureInfo:
|
|
|
90
90
|
def _get_feature_sample(feature_meta: FeaturesMetadataV2, data: Optional[pd.DataFrame]) -> str:
|
|
91
91
|
if data is not None and len(data) > 0 and feature_meta.name in data.columns:
|
|
92
92
|
if len(data) > 3:
|
|
93
|
-
|
|
93
|
+
rand = np.random.RandomState(42)
|
|
94
|
+
feature_sample = rand.choice(data[feature_meta.name].dropna().unique(), 3).tolist()
|
|
94
95
|
else:
|
|
95
96
|
feature_sample = data[feature_meta.name].dropna().unique().tolist()
|
|
96
97
|
if len(feature_sample) > 0 and isinstance(feature_sample[0], float):
|
upgini/utils/sklearn_ext.py
CHANGED
|
@@ -9,7 +9,6 @@ from traceback import format_exc
|
|
|
9
9
|
|
|
10
10
|
import numpy as np
|
|
11
11
|
import scipy.sparse as sp
|
|
12
|
-
from catboost import CatBoostClassifier, CatBoostRegressor
|
|
13
12
|
from joblib import Parallel, logger
|
|
14
13
|
from scipy.sparse import issparse
|
|
15
14
|
from sklearn import config_context, get_config
|
|
@@ -342,6 +341,22 @@ def cross_validate(
|
|
|
342
341
|
raise e
|
|
343
342
|
|
|
344
343
|
|
|
344
|
+
def is_catboost_estimator(estimator):
|
|
345
|
+
try:
|
|
346
|
+
from catboost import CatBoostClassifier, CatBoostRegressor
|
|
347
|
+
return isinstance(estimator, (CatBoostClassifier, CatBoostRegressor))
|
|
348
|
+
except ImportError:
|
|
349
|
+
return False
|
|
350
|
+
|
|
351
|
+
|
|
352
|
+
def is_lightgbm_estimator(estimator):
|
|
353
|
+
try:
|
|
354
|
+
from lightgbm import LGBMClassifier, LGBMRegressor
|
|
355
|
+
return isinstance(estimator, (LGBMClassifier, LGBMRegressor))
|
|
356
|
+
except ImportError:
|
|
357
|
+
return False
|
|
358
|
+
|
|
359
|
+
|
|
345
360
|
def _fit_and_score(
|
|
346
361
|
estimator,
|
|
347
362
|
X,
|
|
@@ -497,7 +512,10 @@ def _fit_and_score(
|
|
|
497
512
|
if y_train is None:
|
|
498
513
|
estimator.fit(X_train, **fit_params)
|
|
499
514
|
else:
|
|
500
|
-
if
|
|
515
|
+
if is_catboost_estimator(estimator):
|
|
516
|
+
fit_params = fit_params.copy()
|
|
517
|
+
fit_params["eval_set"] = [(X_test, y_test)]
|
|
518
|
+
elif is_lightgbm_estimator(estimator):
|
|
501
519
|
fit_params = fit_params.copy()
|
|
502
520
|
fit_params["eval_set"] = [(X_test, y_test)]
|
|
503
521
|
estimator.fit(X_train, y_train, **fit_params)
|
upgini/utils/sort.py
CHANGED
|
@@ -87,7 +87,7 @@ def get_sort_columns_dict(
|
|
|
87
87
|
df_with_target = df_with_target.loc[~target.isna()]
|
|
88
88
|
df = df_with_target.iloc[:, :-1]
|
|
89
89
|
target = df_with_target.iloc[:, -1]
|
|
90
|
-
df = df.fillna(df.mean())
|
|
90
|
+
df = df.fillna(df.apply(lambda x: int(x.mean()) if pd.api.types.is_integer_dtype(x) else x.mean()))
|
|
91
91
|
omit_nan = False
|
|
92
92
|
hashes = [hash_series(df[col]) for col in columns_for_sort]
|
|
93
93
|
df = np.asarray(df, dtype=np.float32)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: upgini
|
|
3
|
-
Version: 1.2.
|
|
3
|
+
Version: 1.2.71a3832.dev4
|
|
4
4
|
Summary: Intelligent data search & enrichment for Machine Learning
|
|
5
5
|
Project-URL: Bug Reports, https://github.com/upgini/upgini/issues
|
|
6
6
|
Project-URL: Homepage, https://upgini.com/
|
|
@@ -22,14 +22,14 @@ Classifier: Programming Language :: Python :: 3.11
|
|
|
22
22
|
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
23
23
|
Classifier: Topic :: Scientific/Engineering :: Information Analysis
|
|
24
24
|
Requires-Python: <3.12,>=3.10
|
|
25
|
-
Requires-Dist: catboost>=1.0.3
|
|
26
25
|
Requires-Dist: fastparquet>=0.8.1
|
|
27
26
|
Requires-Dist: ipywidgets>=8.1.0
|
|
28
27
|
Requires-Dist: jarowinkler>=2.0.0
|
|
29
28
|
Requires-Dist: levenshtein>=0.25.1
|
|
30
|
-
Requires-Dist:
|
|
29
|
+
Requires-Dist: lightgbm>=4.6.0
|
|
30
|
+
Requires-Dist: numpy<3.0.0,>=1.19.0
|
|
31
31
|
Requires-Dist: pandas<3.0.0,>=1.1.0
|
|
32
|
-
Requires-Dist: psutil>=
|
|
32
|
+
Requires-Dist: psutil>=5.9.0
|
|
33
33
|
Requires-Dist: pydantic<3.0.0,>1.0.0
|
|
34
34
|
Requires-Dist: pyjwt>=2.8.0
|
|
35
35
|
Requires-Dist: python-bidi==0.4.2
|
|
@@ -38,6 +38,7 @@ Requires-Dist: python-json-logger>=3.3.0
|
|
|
38
38
|
Requires-Dist: requests>=2.8.0
|
|
39
39
|
Requires-Dist: scikit-learn>=1.3.0
|
|
40
40
|
Requires-Dist: scipy>=1.10.0
|
|
41
|
+
Requires-Dist: shap>=0.44.0
|
|
41
42
|
Requires-Dist: xhtml2pdf<0.3.0,>=0.2.11
|
|
42
43
|
Description-Content-Type: text/markdown
|
|
43
44
|
|
|
@@ -1,13 +1,12 @@
|
|
|
1
|
-
upgini/__about__.py,sha256=
|
|
1
|
+
upgini/__about__.py,sha256=xZJ4YiYa1ZXgmCQ3SYjASYcXSx3CrMdke97pR0PB16E,33
|
|
2
2
|
upgini/__init__.py,sha256=LXSfTNU0HnlOkE69VCxkgIKDhWP-JFo_eBQ71OxTr5Y,261
|
|
3
3
|
upgini/ads.py,sha256=nvuRxRx5MHDMgPr9SiU-fsqRdFaBv8p4_v1oqiysKpc,2714
|
|
4
|
-
upgini/dataset.py,sha256=
|
|
4
|
+
upgini/dataset.py,sha256=aspri7ZAgwkNNUiIgQ1GRXvw8XQii3F4RfNXSrF4wrw,35365
|
|
5
5
|
upgini/errors.py,sha256=2b_Wbo0OYhLUbrZqdLIx5jBnAsiD1Mcenh-VjR4HCTw,950
|
|
6
|
-
upgini/features_enricher.py,sha256=
|
|
7
|
-
upgini/http.py,sha256=
|
|
8
|
-
upgini/lazy_import.py,sha256=74gQ8JuA48BGRLxAo7lNHNKY2D2emMxrUxKGdxVGhuY,1012
|
|
6
|
+
upgini/features_enricher.py,sha256=Z6RSjqcqneGwWflsq1Q5rjf83awPNYqKpAgHRh7jils,204680
|
|
7
|
+
upgini/http.py,sha256=RvzcShpDXssLs6ycGN8xilkKi8ZV9XGUrrk8bwdUzbw,43607
|
|
9
8
|
upgini/metadata.py,sha256=Yd6iW2f7Wz6vUkg5uvR4xylN16ANnCKVKqAsAkap7p8,12354
|
|
10
|
-
upgini/metrics.py,sha256=
|
|
9
|
+
upgini/metrics.py,sha256=LI0wwTUSnxX62lVSM7J8Pq_RSbruq93QUhbMXilWM30,38301
|
|
11
10
|
upgini/search_task.py,sha256=EuCGp0iCWz2fpuJgN6M47aP_CtIi3Oq9zw78w0mkKiU,17595
|
|
12
11
|
upgini/spinner.py,sha256=4iMd-eIe_BnkqFEMIliULTbj6rNI2HkN_VJ4qYe0cUc,1118
|
|
13
12
|
upgini/version_validator.py,sha256=DvbaAvuYFoJqYt0fitpsk6Xcv-H1BYDJYHUMxaKSH_Y,1509
|
|
@@ -20,12 +19,12 @@ upgini/autofe/date.py,sha256=C86F7sPiscUGq2a45UtQA9ADWBWg0kt54mePHHzjbLE,10633
|
|
|
20
19
|
upgini/autofe/feature.py,sha256=y1x3wijhTVBmloayQAHiscqKU9Ll8kLcGm1PdvS357I,14910
|
|
21
20
|
upgini/autofe/groupby.py,sha256=IYmQV9uoCdRcpkeWZj_kI3ObzoNCNx3ff3h8sTL01tk,3603
|
|
22
21
|
upgini/autofe/operator.py,sha256=EOffJw6vKXpEh5yymqb1RFNJPxGxmnHdFRo9dB5SCFo,4969
|
|
23
|
-
upgini/autofe/unary.py,sha256=
|
|
22
|
+
upgini/autofe/unary.py,sha256=yVgPvtfnPSOhrii0YgezddmgWPwyOBCR0JutaIkdTTc,4658
|
|
24
23
|
upgini/autofe/utils.py,sha256=fK1am2_tQj3fL2vDslblye8lmyfWgGIUOX1beYVBz4k,2420
|
|
25
24
|
upgini/autofe/vector.py,sha256=l0KdKg-txlZxDSE4hPPfCtfGQofYbl7oaABPr830sPI,667
|
|
26
25
|
upgini/autofe/timeseries/__init__.py,sha256=PGwwDAMwvkXl3el12tXVEmZUgDUvlmIPlXtROm6bD18,738
|
|
27
|
-
upgini/autofe/timeseries/base.py,sha256=
|
|
28
|
-
upgini/autofe/timeseries/cross.py,sha256=
|
|
26
|
+
upgini/autofe/timeseries/base.py,sha256=rWJqRuFAzTZEsUdWG5s1Vhif9zzRRmalASXvarufRxI,3610
|
|
27
|
+
upgini/autofe/timeseries/cross.py,sha256=BTINVwuZSbm_4NKkVm0FGM68SrvZLENZKXN7-UyvhYI,5319
|
|
29
28
|
upgini/autofe/timeseries/delta.py,sha256=h0YhmI1TlPJnjwFpN_GQxLb6r59DQuucnG5tQAXSgjU,3520
|
|
30
29
|
upgini/autofe/timeseries/lag.py,sha256=LfQtg484vuqM0mgY4Wft1swHX_Srq7OKKgZswCXoiXI,1882
|
|
31
30
|
upgini/autofe/timeseries/roll.py,sha256=zADKXU-eYWQnQ5R3am1yEal8uU6Tm0jLAixwPb_aCHg,2794
|
|
@@ -39,7 +38,7 @@ upgini/normalizer/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU
|
|
|
39
38
|
upgini/normalizer/normalize_utils.py,sha256=Ft2MwSgVoBilXAORAOYAuwPD79GOLfwn4qQE3IUFzzg,7218
|
|
40
39
|
upgini/resource_bundle/__init__.py,sha256=S5F2G47pnJd2LDpmFsjDqEwiKkP8Hm-hcseDbMka6Ko,8345
|
|
41
40
|
upgini/resource_bundle/exceptions.py,sha256=5fRvx0_vWdE1-7HcSgF0tckB4A9AKyf5RiinZkInTsI,621
|
|
42
|
-
upgini/resource_bundle/strings.properties,sha256=
|
|
41
|
+
upgini/resource_bundle/strings.properties,sha256=mwQrerdJj3adzT-fHqvs6Qjf-rqDccsUzELDIXJKAmY,27791
|
|
43
42
|
upgini/resource_bundle/strings_widget.properties,sha256=gOdqvZWntP2LCza_tyVk1_yRYcG4c04K9sQOAVhF_gw,1577
|
|
44
43
|
upgini/sampler/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
45
44
|
upgini/sampler/base.py,sha256=7GpjYqjOp58vYcJLiX__1R5wjUlyQbxvHJ2klFnup_M,6389
|
|
@@ -53,11 +52,11 @@ upgini/utils/country_utils.py,sha256=lY-eXWwFVegdVENFttbvLcgGDjFO17Sex8hd2PyJaRk
|
|
|
53
52
|
upgini/utils/custom_loss_utils.py,sha256=kieNZYBYZm5ZGBltF1F_jOSF4ea6C29rYuCyiDcqVNY,3857
|
|
54
53
|
upgini/utils/cv_utils.py,sha256=w6FQb9nO8BWDx88EF83NpjPLarK4eR4ia0Wg0kLBJC4,3525
|
|
55
54
|
upgini/utils/datetime_utils.py,sha256=_jq-kn_dGNFfs-DGXcWCGzy9bkplfAjrZ8SsmN28zXc,13535
|
|
56
|
-
upgini/utils/deduplicate_utils.py,sha256=
|
|
55
|
+
upgini/utils/deduplicate_utils.py,sha256=AcMLoObMjhOTQ_fMS1LWy0GKp6WXnZ-FNux_8V3nbZU,8914
|
|
57
56
|
upgini/utils/display_utils.py,sha256=DsBjJ8jEYAh8BPgfAbzq5imoGFV6IACP20PQ78BQCX0,11964
|
|
58
57
|
upgini/utils/email_utils.py,sha256=pZ2vCfNxLIPUhxr0-OlABNXm12jjU44isBk8kGmqQzA,5277
|
|
59
58
|
upgini/utils/fallback_progress_bar.py,sha256=PDaKb8dYpVZaWMroNcOHsTc3pSjgi9mOm0--cOFTwJ0,1074
|
|
60
|
-
upgini/utils/feature_info.py,sha256=
|
|
59
|
+
upgini/utils/feature_info.py,sha256=Q9HN6A-fvfVD-irFWrmOqqZG9RsUSvh5MTY_k0xu-tE,7287
|
|
61
60
|
upgini/utils/features_validator.py,sha256=lEfmk4DoxZ4ooOE1HC0ZXtUb_lFKRFHIrnFULZ4_rL8,3746
|
|
62
61
|
upgini/utils/format.py,sha256=Yv5cvvSs2bOLUzzNu96Pu33VMDNbabio92QepUj41jU,243
|
|
63
62
|
upgini/utils/ip_utils.py,sha256=TSQ_qDsLlVnm09X1HacpabEf_HNqSWpxBF4Sdc2xs08,6580
|
|
@@ -65,13 +64,13 @@ upgini/utils/mstats.py,sha256=u3gQVUtDRbyrOQK6V1UJ2Rx1QbkSNYGjXa6m3Z_dPVs,6286
|
|
|
65
64
|
upgini/utils/phone_utils.py,sha256=IrbztLuOJBiePqqxllfABWfYlfAjYevPhXKipl95wUI,10432
|
|
66
65
|
upgini/utils/postal_code_utils.py,sha256=5M0sUqH2DAr33kARWCTXR-ACyzWbjDq_-0mmEml6ZcU,1716
|
|
67
66
|
upgini/utils/progress_bar.py,sha256=N-Sfdah2Hg8lXP_fV9EfUTXz_PyRt4lo9fAHoUDOoLc,1550
|
|
68
|
-
upgini/utils/sklearn_ext.py,sha256=
|
|
69
|
-
upgini/utils/sort.py,sha256=
|
|
67
|
+
upgini/utils/sklearn_ext.py,sha256=HpaNQaKJisgNE7IZ71n7uswxTj7kbPglU2G3s1sORAc,45042
|
|
68
|
+
upgini/utils/sort.py,sha256=8uuHs2nfSMVnz8GgvbOmgMB1PgEIZP1uhmeRFxcwnYw,7039
|
|
70
69
|
upgini/utils/target_utils.py,sha256=b1GzO8_gMcwXSZ2v98CY50MJJBzKbWHId_BJGybXfkM,16579
|
|
71
70
|
upgini/utils/track_info.py,sha256=G5Lu1xxakg2_TQjKZk4b5SvrHsATTXNVV3NbvWtT8k8,5663
|
|
72
71
|
upgini/utils/ts_utils.py,sha256=26vhC0pN7vLXK6R09EEkMK3Lwb9IVPH7LRdqFIQ3kPs,1383
|
|
73
72
|
upgini/utils/warning_counter.py,sha256=-GRY8EUggEBKODPSuXAkHn9KnEQwAORC0mmz_tim-PM,254
|
|
74
|
-
upgini-1.2.
|
|
75
|
-
upgini-1.2.
|
|
76
|
-
upgini-1.2.
|
|
77
|
-
upgini-1.2.
|
|
73
|
+
upgini-1.2.71a3832.dev4.dist-info/METADATA,sha256=XWxCzwoYpOeebCAtVb_H4-x-9VeHLDwYc7DkputGaAc,49101
|
|
74
|
+
upgini-1.2.71a3832.dev4.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
|
|
75
|
+
upgini-1.2.71a3832.dev4.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
|
|
76
|
+
upgini-1.2.71a3832.dev4.dist-info/RECORD,,
|
upgini/lazy_import.py
DELETED
|
@@ -1,35 +0,0 @@
|
|
|
1
|
-
import importlib
|
|
2
|
-
import importlib.util
|
|
3
|
-
import importlib.machinery
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
class LazyImport:
|
|
7
|
-
def __init__(self, module_name, class_name):
|
|
8
|
-
self.module_name = module_name
|
|
9
|
-
self.class_name = class_name
|
|
10
|
-
self._module = None
|
|
11
|
-
self._class = None
|
|
12
|
-
|
|
13
|
-
def _load(self):
|
|
14
|
-
if self._module is None:
|
|
15
|
-
# Load module and save link to it
|
|
16
|
-
spec = importlib.util.find_spec(self.module_name)
|
|
17
|
-
if spec is None:
|
|
18
|
-
raise ImportError(f"Module {self.module_name} not found")
|
|
19
|
-
|
|
20
|
-
# Create module
|
|
21
|
-
self._module = importlib.util.module_from_spec(spec)
|
|
22
|
-
|
|
23
|
-
# Execute module
|
|
24
|
-
spec.loader.exec_module(self._module)
|
|
25
|
-
|
|
26
|
-
# Get class from module
|
|
27
|
-
self._class = getattr(self._module, self.class_name)
|
|
28
|
-
|
|
29
|
-
def __call__(self, *args, **kwargs):
|
|
30
|
-
self._load()
|
|
31
|
-
return self._class(*args, **kwargs)
|
|
32
|
-
|
|
33
|
-
def __getattr__(self, name):
|
|
34
|
-
self._load()
|
|
35
|
-
return getattr(self._class, name)
|
|
File without changes
|