upgini 1.1.285__tar.gz → 1.1.285a3418.post1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of upgini might be problematic. Click here for more details.
- {upgini-1.1.285 → upgini-1.1.285a3418.post1}/PKG-INFO +1 -1
- upgini-1.1.285a3418.post1/src/upgini/__about__.py +1 -0
- {upgini-1.1.285 → upgini-1.1.285a3418.post1}/src/upgini/autofe/all_operands.py +2 -1
- {upgini-1.1.285 → upgini-1.1.285a3418.post1}/src/upgini/autofe/date.py +85 -2
- {upgini-1.1.285 → upgini-1.1.285a3418.post1}/src/upgini/autofe/feature.py +22 -5
- {upgini-1.1.285 → upgini-1.1.285a3418.post1}/src/upgini/autofe/operand.py +4 -2
- {upgini-1.1.285 → upgini-1.1.285a3418.post1}/src/upgini/features_enricher.py +21 -17
- {upgini-1.1.285 → upgini-1.1.285a3418.post1}/src/upgini/metadata.py +3 -1
- {upgini-1.1.285 → upgini-1.1.285a3418.post1}/src/upgini/utils/custom_loss_utils.py +36 -39
- {upgini-1.1.285 → upgini-1.1.285a3418.post1}/src/upgini/version_validator.py +1 -1
- upgini-1.1.285/src/upgini/__about__.py +0 -1
- {upgini-1.1.285 → upgini-1.1.285a3418.post1}/.gitignore +0 -0
- {upgini-1.1.285 → upgini-1.1.285a3418.post1}/LICENSE +0 -0
- {upgini-1.1.285 → upgini-1.1.285a3418.post1}/README.md +0 -0
- {upgini-1.1.285 → upgini-1.1.285a3418.post1}/pyproject.toml +0 -0
- {upgini-1.1.285 → upgini-1.1.285a3418.post1}/src/upgini/__init__.py +0 -0
- {upgini-1.1.285 → upgini-1.1.285a3418.post1}/src/upgini/ads.py +0 -0
- {upgini-1.1.285 → upgini-1.1.285a3418.post1}/src/upgini/ads_management/__init__.py +0 -0
- {upgini-1.1.285 → upgini-1.1.285a3418.post1}/src/upgini/ads_management/ads_manager.py +0 -0
- {upgini-1.1.285 → upgini-1.1.285a3418.post1}/src/upgini/autofe/__init__.py +0 -0
- {upgini-1.1.285 → upgini-1.1.285a3418.post1}/src/upgini/autofe/binary.py +0 -0
- {upgini-1.1.285 → upgini-1.1.285a3418.post1}/src/upgini/autofe/groupby.py +0 -0
- {upgini-1.1.285 → upgini-1.1.285a3418.post1}/src/upgini/autofe/unary.py +0 -0
- {upgini-1.1.285 → upgini-1.1.285a3418.post1}/src/upgini/autofe/vector.py +0 -0
- {upgini-1.1.285 → upgini-1.1.285a3418.post1}/src/upgini/data_source/__init__.py +0 -0
- {upgini-1.1.285 → upgini-1.1.285a3418.post1}/src/upgini/data_source/data_source_publisher.py +0 -0
- {upgini-1.1.285 → upgini-1.1.285a3418.post1}/src/upgini/dataset.py +0 -0
- {upgini-1.1.285 → upgini-1.1.285a3418.post1}/src/upgini/errors.py +0 -0
- {upgini-1.1.285 → upgini-1.1.285a3418.post1}/src/upgini/http.py +0 -0
- {upgini-1.1.285 → upgini-1.1.285a3418.post1}/src/upgini/mdc/__init__.py +0 -0
- {upgini-1.1.285 → upgini-1.1.285a3418.post1}/src/upgini/mdc/context.py +0 -0
- {upgini-1.1.285 → upgini-1.1.285a3418.post1}/src/upgini/metrics.py +0 -0
- {upgini-1.1.285 → upgini-1.1.285a3418.post1}/src/upgini/normalizer/__init__.py +0 -0
- {upgini-1.1.285 → upgini-1.1.285a3418.post1}/src/upgini/normalizer/phone_normalizer.py +0 -0
- {upgini-1.1.285 → upgini-1.1.285a3418.post1}/src/upgini/resource_bundle/__init__.py +0 -0
- {upgini-1.1.285 → upgini-1.1.285a3418.post1}/src/upgini/resource_bundle/exceptions.py +0 -0
- {upgini-1.1.285 → upgini-1.1.285a3418.post1}/src/upgini/resource_bundle/strings.properties +0 -0
- {upgini-1.1.285 → upgini-1.1.285a3418.post1}/src/upgini/resource_bundle/strings_widget.properties +0 -0
- {upgini-1.1.285 → upgini-1.1.285a3418.post1}/src/upgini/sampler/__init__.py +0 -0
- {upgini-1.1.285 → upgini-1.1.285a3418.post1}/src/upgini/sampler/base.py +0 -0
- {upgini-1.1.285 → upgini-1.1.285a3418.post1}/src/upgini/sampler/random_under_sampler.py +0 -0
- {upgini-1.1.285 → upgini-1.1.285a3418.post1}/src/upgini/sampler/utils.py +0 -0
- {upgini-1.1.285 → upgini-1.1.285a3418.post1}/src/upgini/search_task.py +0 -0
- {upgini-1.1.285 → upgini-1.1.285a3418.post1}/src/upgini/spinner.py +0 -0
- {upgini-1.1.285 → upgini-1.1.285a3418.post1}/src/upgini/utils/__init__.py +0 -0
- {upgini-1.1.285 → upgini-1.1.285a3418.post1}/src/upgini/utils/base_search_key_detector.py +0 -0
- {upgini-1.1.285 → upgini-1.1.285a3418.post1}/src/upgini/utils/blocked_time_series.py +0 -0
- {upgini-1.1.285 → upgini-1.1.285a3418.post1}/src/upgini/utils/country_utils.py +0 -0
- {upgini-1.1.285 → upgini-1.1.285a3418.post1}/src/upgini/utils/cv_utils.py +0 -0
- {upgini-1.1.285 → upgini-1.1.285a3418.post1}/src/upgini/utils/datetime_utils.py +0 -0
- {upgini-1.1.285 → upgini-1.1.285a3418.post1}/src/upgini/utils/deduplicate_utils.py +0 -0
- {upgini-1.1.285 → upgini-1.1.285a3418.post1}/src/upgini/utils/display_utils.py +0 -0
- {upgini-1.1.285 → upgini-1.1.285a3418.post1}/src/upgini/utils/email_utils.py +0 -0
- {upgini-1.1.285 → upgini-1.1.285a3418.post1}/src/upgini/utils/fallback_progress_bar.py +0 -0
- {upgini-1.1.285 → upgini-1.1.285a3418.post1}/src/upgini/utils/features_validator.py +0 -0
- {upgini-1.1.285 → upgini-1.1.285a3418.post1}/src/upgini/utils/format.py +0 -0
- {upgini-1.1.285 → upgini-1.1.285a3418.post1}/src/upgini/utils/ip_utils.py +0 -0
- {upgini-1.1.285 → upgini-1.1.285a3418.post1}/src/upgini/utils/phone_utils.py +0 -0
- {upgini-1.1.285 → upgini-1.1.285a3418.post1}/src/upgini/utils/postal_code_utils.py +0 -0
- {upgini-1.1.285 → upgini-1.1.285a3418.post1}/src/upgini/utils/progress_bar.py +0 -0
- {upgini-1.1.285 → upgini-1.1.285a3418.post1}/src/upgini/utils/sklearn_ext.py +0 -0
- {upgini-1.1.285 → upgini-1.1.285a3418.post1}/src/upgini/utils/target_utils.py +0 -0
- {upgini-1.1.285 → upgini-1.1.285a3418.post1}/src/upgini/utils/track_info.py +0 -0
- {upgini-1.1.285 → upgini-1.1.285a3418.post1}/src/upgini/utils/warning_counter.py +0 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "1.1.285a3418-1"
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
from typing import Dict
|
|
2
2
|
|
|
3
3
|
from upgini.autofe.binary import Add, Divide, Max, Min, Multiply, Sim, Subtract
|
|
4
|
-
from upgini.autofe.date import DateDiff, DateDiffType2, DateListDiff, DateListDiffBounded
|
|
4
|
+
from upgini.autofe.date import DateDiff, DateDiffType2, DateListDiff, DateListDiffBounded, DatePercentile
|
|
5
5
|
from upgini.autofe.groupby import GroupByThenAgg, GroupByThenRank
|
|
6
6
|
from upgini.autofe.operand import Operand
|
|
7
7
|
from upgini.autofe.unary import Abs, Floor, Freq, Log, Residual, Sigmoid, Sqrt, Square
|
|
@@ -49,6 +49,7 @@ ALL_OPERANDS: Dict[str, Operand] = {
|
|
|
49
49
|
DateListDiffBounded(diff_unit="Y", aggregation="count", lower_bound=30, upper_bound=45),
|
|
50
50
|
DateListDiffBounded(diff_unit="Y", aggregation="count", lower_bound=45, upper_bound=60),
|
|
51
51
|
DateListDiffBounded(diff_unit="Y", aggregation="count", lower_bound=60),
|
|
52
|
+
DatePercentile(),
|
|
52
53
|
]
|
|
53
54
|
}
|
|
54
55
|
|
|
@@ -1,9 +1,9 @@
|
|
|
1
|
-
from typing import Any, Optional, Union
|
|
1
|
+
from typing import Any, Dict, List, Optional, Union
|
|
2
2
|
|
|
3
3
|
import numpy as np
|
|
4
4
|
import pandas as pd
|
|
5
5
|
from pandas.core.arrays.timedeltas import TimedeltaArray
|
|
6
|
-
from pydantic import BaseModel
|
|
6
|
+
from pydantic import BaseModel, validator
|
|
7
7
|
|
|
8
8
|
from upgini.autofe.operand import PandasOperand
|
|
9
9
|
|
|
@@ -27,6 +27,17 @@ class DateDiff(PandasOperand, DateDiffMixin):
|
|
|
27
27
|
is_binary = True
|
|
28
28
|
has_symmetry_importance = True
|
|
29
29
|
|
|
30
|
+
def get_params(self) -> Dict[str, Optional[str]]:
|
|
31
|
+
res = super().get_params()
|
|
32
|
+
res.update(
|
|
33
|
+
{
|
|
34
|
+
"diff_unit": self.diff_unit,
|
|
35
|
+
"left_unit": self.left_unit,
|
|
36
|
+
"right_unit": self.right_unit,
|
|
37
|
+
}
|
|
38
|
+
)
|
|
39
|
+
return res
|
|
40
|
+
|
|
30
41
|
def calculate_binary(self, left: pd.Series, right: pd.Series) -> pd.Series:
|
|
31
42
|
left = self._convert_to_date(left, self.left_unit)
|
|
32
43
|
right = self._convert_to_date(right, self.right_unit)
|
|
@@ -42,6 +53,17 @@ class DateDiffType2(PandasOperand, DateDiffMixin):
|
|
|
42
53
|
is_binary = True
|
|
43
54
|
has_symmetry_importance = True
|
|
44
55
|
|
|
56
|
+
def get_params(self) -> Dict[str, Optional[str]]:
|
|
57
|
+
res = super().get_params()
|
|
58
|
+
res.update(
|
|
59
|
+
{
|
|
60
|
+
"diff_unit": self.diff_unit,
|
|
61
|
+
"left_unit": self.left_unit,
|
|
62
|
+
"right_unit": self.right_unit,
|
|
63
|
+
}
|
|
64
|
+
)
|
|
65
|
+
return res
|
|
66
|
+
|
|
45
67
|
def calculate_binary(self, left: pd.Series, right: pd.Series) -> pd.Series:
|
|
46
68
|
left = self._convert_to_date(left, self.left_unit)
|
|
47
69
|
right = self._convert_to_date(right, self.right_unit)
|
|
@@ -64,6 +86,15 @@ class DateListDiff(PandasOperand, DateDiffMixin):
|
|
|
64
86
|
has_symmetry_importance = True
|
|
65
87
|
aggregation: str
|
|
66
88
|
|
|
89
|
+
def get_params(self) -> Dict[str, Optional[str]]:
|
|
90
|
+
res = super().get_params()
|
|
91
|
+
res.update(
|
|
92
|
+
{
|
|
93
|
+
"aggregation": self.aggregation,
|
|
94
|
+
}
|
|
95
|
+
)
|
|
96
|
+
return res
|
|
97
|
+
|
|
67
98
|
def __init__(self, **data: Any) -> None:
|
|
68
99
|
if "name" not in data:
|
|
69
100
|
data["name"] = f"date_diff_{data.get('aggregation')}"
|
|
@@ -116,3 +147,55 @@ class DateListDiffBounded(DateListDiff):
|
|
|
116
147
|
def _agg(self, x):
|
|
117
148
|
x = x[(x >= (self.lower_bound or -np.inf)) & (x < (self.upper_bound or np.inf))]
|
|
118
149
|
return super()._agg(x)
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
class DatePercentile(PandasOperand):
|
|
153
|
+
name = "date_per"
|
|
154
|
+
is_binary = True
|
|
155
|
+
output_type = "float"
|
|
156
|
+
|
|
157
|
+
date_unit: Optional[str] = None
|
|
158
|
+
zero_month: Optional[int]
|
|
159
|
+
zero_year: Optional[int]
|
|
160
|
+
zero_bounds: Optional[List[float]]
|
|
161
|
+
step: int = 30
|
|
162
|
+
|
|
163
|
+
def get_params(self) -> Dict[str, Optional[str]]:
|
|
164
|
+
res = super().get_params()
|
|
165
|
+
res.update(
|
|
166
|
+
{
|
|
167
|
+
"date_unit": self.date_unit,
|
|
168
|
+
"zero_month": self.zero_month,
|
|
169
|
+
"zero_year": self.zero_year,
|
|
170
|
+
"zero_bounds": self.zero_bounds,
|
|
171
|
+
"step": self.step,
|
|
172
|
+
}
|
|
173
|
+
)
|
|
174
|
+
return res
|
|
175
|
+
|
|
176
|
+
@validator("zero_bounds", pre=True)
|
|
177
|
+
def validate_bounds(cls, value):
|
|
178
|
+
if value is None or isinstance(value, list):
|
|
179
|
+
return value
|
|
180
|
+
elif isinstance(value, str):
|
|
181
|
+
return value[1:-1].split(", ")
|
|
182
|
+
|
|
183
|
+
def calculate_binary(self, left: pd.Series, right: pd.Series) -> pd.Series:
|
|
184
|
+
# Assuming that left is a date column, right is a feature column
|
|
185
|
+
left = pd.to_datetime(left, unit=self.date_unit)
|
|
186
|
+
months = left.dt.month
|
|
187
|
+
years = left.dt.year
|
|
188
|
+
|
|
189
|
+
month_diffs = 12 * (years - (self.zero_year or 0)) + (months - (self.zero_month or 0))
|
|
190
|
+
bounds = month_diffs.apply(
|
|
191
|
+
lambda d: np.array(self.zero_bounds if self.zero_bounds is not None else []) + d * 30
|
|
192
|
+
)
|
|
193
|
+
|
|
194
|
+
return right.index.to_series().apply(lambda i: self.__perc(right[i], bounds[i]))
|
|
195
|
+
|
|
196
|
+
def __perc(self, f, bounds):
|
|
197
|
+
hit = np.where(f >= bounds)[0]
|
|
198
|
+
if hit.size > 0:
|
|
199
|
+
return np.max(hit) + 1
|
|
200
|
+
else:
|
|
201
|
+
return np.nan
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import hashlib
|
|
2
2
|
import itertools
|
|
3
|
-
from typing import Dict, List, Optional, Tuple, Union
|
|
3
|
+
from typing import Dict, List, Optional, Set, Tuple, Union
|
|
4
4
|
|
|
5
5
|
import numpy as np
|
|
6
6
|
import pandas as pd
|
|
@@ -16,6 +16,12 @@ class Column:
|
|
|
16
16
|
self.data = data
|
|
17
17
|
self.calculate_all = calculate_all
|
|
18
18
|
|
|
19
|
+
def get_display_name(self, cache: bool = True, shorten: bool = False, **kwargs) -> str:
|
|
20
|
+
return self.name
|
|
21
|
+
|
|
22
|
+
def set_op_params(self, params: Dict[str, str]) -> "Column":
|
|
23
|
+
return self
|
|
24
|
+
|
|
19
25
|
def rename_columns(self, mapping: Dict[str, str]) -> "Column":
|
|
20
26
|
self.name = self._unhash(mapping.get(self.name) or self.name)
|
|
21
27
|
return self
|
|
@@ -69,19 +75,30 @@ class Feature:
|
|
|
69
75
|
self.cached_display_name = cached_display_name
|
|
70
76
|
self.alias = alias
|
|
71
77
|
|
|
72
|
-
def set_op_params(self, params: Dict[str, str]) -> "Feature":
|
|
78
|
+
def set_op_params(self, params: Optional[Dict[str, str]]) -> "Feature":
|
|
79
|
+
obj_dict = self.op.dict().copy()
|
|
80
|
+
obj_dict.update(params or {})
|
|
81
|
+
self.op = self.op.__class__.parse_obj(obj_dict)
|
|
73
82
|
self.op.set_params(params)
|
|
83
|
+
|
|
84
|
+
for child in self.children:
|
|
85
|
+
child.set_op_params(params)
|
|
74
86
|
return self
|
|
75
87
|
|
|
76
88
|
def get_hash(self) -> str:
|
|
77
|
-
return hashlib.sha256(
|
|
78
|
-
|
|
79
|
-
]
|
|
89
|
+
return hashlib.sha256(
|
|
90
|
+
"_".join([self.op.name] + [ch.get_display_name() for ch in self.children]).encode("utf-8")
|
|
91
|
+
).hexdigest()[:8]
|
|
80
92
|
|
|
81
93
|
def set_alias(self, alias: str) -> "Feature":
|
|
82
94
|
self.alias = alias
|
|
83
95
|
return self
|
|
84
96
|
|
|
97
|
+
def get_all_operand_names(self) -> Set[str]:
|
|
98
|
+
return {self.op.name}.union(
|
|
99
|
+
{n for f in self.children if isinstance(f, Feature) for n in f.get_all_operand_names()}
|
|
100
|
+
)
|
|
101
|
+
|
|
85
102
|
def rename_columns(self, mapping: Dict[str, str]) -> "Feature":
|
|
86
103
|
for child in self.children:
|
|
87
104
|
child.rename_columns(mapping)
|
|
@@ -25,8 +25,10 @@ class Operand(BaseModel):
|
|
|
25
25
|
self.params = params
|
|
26
26
|
return self
|
|
27
27
|
|
|
28
|
-
def get_params(self) -> Dict[str, str]:
|
|
29
|
-
|
|
28
|
+
def get_params(self) -> Dict[str, Optional[str]]:
|
|
29
|
+
res = {"alias": self.alias}
|
|
30
|
+
res.update(self.params or {})
|
|
31
|
+
return res
|
|
30
32
|
|
|
31
33
|
|
|
32
34
|
MAIN_COLUMN = "main_column"
|
|
@@ -423,7 +423,7 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
423
423
|
|
|
424
424
|
self.logger.info("Start fit")
|
|
425
425
|
|
|
426
|
-
self.__validate_search_keys(self.search_keys)
|
|
426
|
+
self.__validate_search_keys(self.search_keys, self.search_id)
|
|
427
427
|
|
|
428
428
|
# Validate client estimator params
|
|
429
429
|
self._get_client_cat_features(estimator, X, self.search_keys)
|
|
@@ -557,7 +557,7 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
557
557
|
|
|
558
558
|
self.logger.info("Start fit_transform")
|
|
559
559
|
|
|
560
|
-
self.__validate_search_keys(self.search_keys)
|
|
560
|
+
self.__validate_search_keys(self.search_keys, self.search_id)
|
|
561
561
|
|
|
562
562
|
search_progress = SearchProgress(0.0, ProgressStage.START_FIT)
|
|
563
563
|
if progress_callback is not None:
|
|
@@ -972,8 +972,6 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
972
972
|
|
|
973
973
|
# 2 Fit and predict with KFold estimator on enriched tds
|
|
974
974
|
# and calculate final metric (and uplift)
|
|
975
|
-
enriched_metric = None
|
|
976
|
-
uplift = None
|
|
977
975
|
enriched_estimator = None
|
|
978
976
|
if set(fitting_X.columns) != set(fitting_enriched_X.columns):
|
|
979
977
|
self.logger.info(
|
|
@@ -994,15 +992,18 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
994
992
|
has_date=has_date,
|
|
995
993
|
)
|
|
996
994
|
enriched_metric = enriched_estimator.cross_val_predict(fitting_enriched_X, enriched_y_sorted)
|
|
997
|
-
if
|
|
995
|
+
if etalon_metric is None:
|
|
998
996
|
self.logger.warning(
|
|
999
997
|
f"Enriched {metric} on train combined features is None (maybe all features was removed)"
|
|
1000
998
|
)
|
|
1001
999
|
enriched_estimator = None
|
|
1000
|
+
uplift = None
|
|
1002
1001
|
else:
|
|
1003
1002
|
self.logger.info(f"Enriched {metric} on train combined features: {enriched_metric}")
|
|
1004
|
-
if etalon_metric is not None and enriched_metric is not None:
|
|
1005
1003
|
uplift = (enriched_metric - etalon_metric) * multiplier
|
|
1004
|
+
else:
|
|
1005
|
+
enriched_metric = None
|
|
1006
|
+
uplift = None
|
|
1006
1007
|
|
|
1007
1008
|
train_metrics = {
|
|
1008
1009
|
self.bundle.get("quality_metrics_segment_header"): self.bundle.get(
|
|
@@ -1451,15 +1452,12 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
1451
1452
|
if len(decimal_columns_to_fix) > 0:
|
|
1452
1453
|
for col in decimal_columns_to_fix:
|
|
1453
1454
|
fitting_eval_X[col] = (
|
|
1454
|
-
fitting_eval_X[col]
|
|
1455
|
-
.astype("string").str
|
|
1456
|
-
.replace(",", ".", regex=False)
|
|
1457
|
-
.astype(np.float64)
|
|
1455
|
+
fitting_eval_X[col].astype("string").str.replace(",", ".", regex=False).astype(np.float64)
|
|
1458
1456
|
)
|
|
1459
1457
|
fitting_enriched_eval_X[col] = (
|
|
1460
1458
|
fitting_enriched_eval_X[col]
|
|
1461
|
-
.astype("string")
|
|
1462
|
-
.replace(",", ".", regex=False)
|
|
1459
|
+
.astype("string")
|
|
1460
|
+
.str.replace(",", ".", regex=False)
|
|
1463
1461
|
.astype(np.float64)
|
|
1464
1462
|
)
|
|
1465
1463
|
|
|
@@ -2149,7 +2147,7 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
2149
2147
|
]
|
|
2150
2148
|
return excluded_features[feature_name_header].values.tolist()
|
|
2151
2149
|
|
|
2152
|
-
def __validate_search_keys(self, search_keys: Dict[str, SearchKey], search_id: Optional[str]
|
|
2150
|
+
def __validate_search_keys(self, search_keys: Dict[str, SearchKey], search_id: Optional[str]):
|
|
2153
2151
|
if (search_keys is None or len(search_keys) == 0) and self.country_code is None:
|
|
2154
2152
|
if search_id:
|
|
2155
2153
|
self.logger.debug(f"search_id {search_id} provided without search_keys")
|
|
@@ -3278,10 +3276,16 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
3278
3276
|
|
|
3279
3277
|
descriptions = []
|
|
3280
3278
|
for m in autofe_meta:
|
|
3281
|
-
autofe_feature = Feature.from_formula(m.formula)
|
|
3282
3279
|
orig_to_hashed = {base_column.original_name: base_column.hashed_name for base_column in m.base_columns}
|
|
3283
|
-
|
|
3284
|
-
autofe_feature
|
|
3280
|
+
|
|
3281
|
+
autofe_feature = (
|
|
3282
|
+
Feature.from_formula(m.formula)
|
|
3283
|
+
.set_display_index(m.display_index)
|
|
3284
|
+
.set_alias(m.alias)
|
|
3285
|
+
.set_op_params(m.operator_params or {})
|
|
3286
|
+
.rename_columns(orig_to_hashed)
|
|
3287
|
+
)
|
|
3288
|
+
|
|
3285
3289
|
if autofe_feature.op.is_vector:
|
|
3286
3290
|
continue
|
|
3287
3291
|
|
|
@@ -3302,7 +3306,7 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
3302
3306
|
description[f"Feature {feature_idx}"] = bc.hashed_name
|
|
3303
3307
|
feature_idx += 1
|
|
3304
3308
|
|
|
3305
|
-
description["Function"] = autofe_feature.
|
|
3309
|
+
description["Function"] = ",".join(sorted(autofe_feature.get_all_operand_names()))
|
|
3306
3310
|
|
|
3307
3311
|
descriptions.append(description)
|
|
3308
3312
|
|
|
@@ -256,9 +256,11 @@ class BaseColumnMetadata(BaseModel):
|
|
|
256
256
|
|
|
257
257
|
|
|
258
258
|
class GeneratedFeatureMetadata(BaseModel):
|
|
259
|
-
|
|
259
|
+
alias: Optional[str]
|
|
260
|
+
formula: str
|
|
260
261
|
display_index: str
|
|
261
262
|
base_columns: List[BaseColumnMetadata]
|
|
263
|
+
operator_params: Optional[Dict[str, str]]
|
|
262
264
|
|
|
263
265
|
|
|
264
266
|
class ProviderTaskMetadataV2(BaseModel):
|
|
@@ -11,49 +11,46 @@ def get_runtime_params_custom_loss(
|
|
|
11
11
|
runtime_parameters: RuntimeParameters,
|
|
12
12
|
logger: Optional[logging.Logger] = None,
|
|
13
13
|
) -> RuntimeParameters:
|
|
14
|
-
if not loss:
|
|
15
|
-
return runtime_parameters
|
|
16
|
-
|
|
17
14
|
if logger is None:
|
|
18
15
|
logger = logging.getLogger()
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
16
|
+
if loss is not None:
|
|
17
|
+
selection_loss_reg = [
|
|
18
|
+
"regression",
|
|
19
|
+
"regression_l1",
|
|
20
|
+
"huber",
|
|
21
|
+
"poisson",
|
|
22
|
+
"quantile",
|
|
23
|
+
"mape",
|
|
24
|
+
"mean_absolute_percentage_error",
|
|
25
|
+
"gamma",
|
|
26
|
+
"tweedie",
|
|
27
|
+
]
|
|
28
|
+
selection_loss_binary = ["binary"]
|
|
29
|
+
selection_loss_multi_clf = ["multiclass", "multiclassova", "multiclass_ova", "ova", "ovr"]
|
|
30
|
+
use_custom_loss = (
|
|
31
|
+
True
|
|
32
|
+
if (
|
|
33
|
+
(model_task_type == ModelTaskType.REGRESSION)
|
|
34
|
+
and (loss in selection_loss_reg)
|
|
35
|
+
or (model_task_type == ModelTaskType.BINARY)
|
|
36
|
+
and (loss in selection_loss_binary)
|
|
37
|
+
or (model_task_type == ModelTaskType.MULTICLASS)
|
|
38
|
+
and (loss in selection_loss_multi_clf)
|
|
39
|
+
)
|
|
40
|
+
else False
|
|
42
41
|
)
|
|
43
|
-
else False
|
|
44
|
-
)
|
|
45
42
|
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
43
|
+
if use_custom_loss:
|
|
44
|
+
runtime_parameters.properties["lightgbm_params_preselection.objective"] = loss
|
|
45
|
+
runtime_parameters.properties["lightgbm_params_base.objective"] = loss
|
|
46
|
+
runtime_parameters.properties["lightgbm_params_segment.objective"] = loss
|
|
47
|
+
msg = bundle.get("loss_selection_info").format(loss)
|
|
48
|
+
logger.info(msg)
|
|
49
|
+
print(msg)
|
|
50
|
+
else:
|
|
51
|
+
msg = bundle.get("loss_selection_warn").format(loss, model_task_type)
|
|
52
|
+
logger.warning(msg)
|
|
53
|
+
print(msg)
|
|
57
54
|
|
|
58
55
|
return runtime_parameters
|
|
59
56
|
|
|
@@ -35,7 +35,7 @@ def validate_version(logger: logging.Logger):
|
|
|
35
35
|
try:
|
|
36
36
|
current_version = parse(__version__)
|
|
37
37
|
latest_version = get_version("upgini")
|
|
38
|
-
if current_version < latest_version:
|
|
38
|
+
if current_version < latest_version: # type: ignore
|
|
39
39
|
msg = bundle.get("version_warning").format(current_version, latest_version)
|
|
40
40
|
logger.warning(msg)
|
|
41
41
|
print(msg)
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
__version__ = "1.1.285"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{upgini-1.1.285 → upgini-1.1.285a3418.post1}/src/upgini/data_source/data_source_publisher.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{upgini-1.1.285 → upgini-1.1.285a3418.post1}/src/upgini/resource_bundle/strings_widget.properties
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|