upgini 1.1.282a3418.post2__tar.gz → 1.1.283__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of upgini might be problematic. Click here for more details.
- {upgini-1.1.282a3418.post2 → upgini-1.1.283}/PKG-INFO +2 -2
- {upgini-1.1.282a3418.post2 → upgini-1.1.283}/README.md +1 -1
- upgini-1.1.283/src/upgini/__about__.py +1 -0
- {upgini-1.1.282a3418.post2 → upgini-1.1.283}/src/upgini/autofe/all_operands.py +1 -2
- {upgini-1.1.282a3418.post2 → upgini-1.1.283}/src/upgini/autofe/date.py +2 -86
- {upgini-1.1.282a3418.post2 → upgini-1.1.283}/src/upgini/autofe/feature.py +5 -22
- {upgini-1.1.282a3418.post2 → upgini-1.1.283}/src/upgini/autofe/operand.py +2 -4
- {upgini-1.1.282a3418.post2 → upgini-1.1.283}/src/upgini/features_enricher.py +24 -11
- {upgini-1.1.282a3418.post2 → upgini-1.1.283}/src/upgini/metrics.py +1 -0
- {upgini-1.1.282a3418.post2 → upgini-1.1.283}/src/upgini/utils/sklearn_ext.py +28 -19
- upgini-1.1.282a3418.post2/src/upgini/__about__.py +0 -1
- {upgini-1.1.282a3418.post2 → upgini-1.1.283}/.gitignore +0 -0
- {upgini-1.1.282a3418.post2 → upgini-1.1.283}/LICENSE +0 -0
- {upgini-1.1.282a3418.post2 → upgini-1.1.283}/pyproject.toml +0 -0
- {upgini-1.1.282a3418.post2 → upgini-1.1.283}/src/upgini/__init__.py +0 -0
- {upgini-1.1.282a3418.post2 → upgini-1.1.283}/src/upgini/ads.py +0 -0
- {upgini-1.1.282a3418.post2 → upgini-1.1.283}/src/upgini/ads_management/__init__.py +0 -0
- {upgini-1.1.282a3418.post2 → upgini-1.1.283}/src/upgini/ads_management/ads_manager.py +0 -0
- {upgini-1.1.282a3418.post2 → upgini-1.1.283}/src/upgini/autofe/__init__.py +0 -0
- {upgini-1.1.282a3418.post2 → upgini-1.1.283}/src/upgini/autofe/binary.py +0 -0
- {upgini-1.1.282a3418.post2 → upgini-1.1.283}/src/upgini/autofe/groupby.py +0 -0
- {upgini-1.1.282a3418.post2 → upgini-1.1.283}/src/upgini/autofe/unary.py +0 -0
- {upgini-1.1.282a3418.post2 → upgini-1.1.283}/src/upgini/autofe/vector.py +0 -0
- {upgini-1.1.282a3418.post2 → upgini-1.1.283}/src/upgini/data_source/__init__.py +0 -0
- {upgini-1.1.282a3418.post2 → upgini-1.1.283}/src/upgini/data_source/data_source_publisher.py +0 -0
- {upgini-1.1.282a3418.post2 → upgini-1.1.283}/src/upgini/dataset.py +0 -0
- {upgini-1.1.282a3418.post2 → upgini-1.1.283}/src/upgini/errors.py +0 -0
- {upgini-1.1.282a3418.post2 → upgini-1.1.283}/src/upgini/http.py +0 -0
- {upgini-1.1.282a3418.post2 → upgini-1.1.283}/src/upgini/mdc/__init__.py +0 -0
- {upgini-1.1.282a3418.post2 → upgini-1.1.283}/src/upgini/mdc/context.py +0 -0
- {upgini-1.1.282a3418.post2 → upgini-1.1.283}/src/upgini/metadata.py +0 -0
- {upgini-1.1.282a3418.post2 → upgini-1.1.283}/src/upgini/normalizer/__init__.py +0 -0
- {upgini-1.1.282a3418.post2 → upgini-1.1.283}/src/upgini/normalizer/phone_normalizer.py +0 -0
- {upgini-1.1.282a3418.post2 → upgini-1.1.283}/src/upgini/resource_bundle/__init__.py +0 -0
- {upgini-1.1.282a3418.post2 → upgini-1.1.283}/src/upgini/resource_bundle/exceptions.py +0 -0
- {upgini-1.1.282a3418.post2 → upgini-1.1.283}/src/upgini/resource_bundle/strings.properties +0 -0
- {upgini-1.1.282a3418.post2 → upgini-1.1.283}/src/upgini/resource_bundle/strings_widget.properties +0 -0
- {upgini-1.1.282a3418.post2 → upgini-1.1.283}/src/upgini/sampler/__init__.py +0 -0
- {upgini-1.1.282a3418.post2 → upgini-1.1.283}/src/upgini/sampler/base.py +0 -0
- {upgini-1.1.282a3418.post2 → upgini-1.1.283}/src/upgini/sampler/random_under_sampler.py +0 -0
- {upgini-1.1.282a3418.post2 → upgini-1.1.283}/src/upgini/sampler/utils.py +0 -0
- {upgini-1.1.282a3418.post2 → upgini-1.1.283}/src/upgini/search_task.py +0 -0
- {upgini-1.1.282a3418.post2 → upgini-1.1.283}/src/upgini/spinner.py +0 -0
- {upgini-1.1.282a3418.post2 → upgini-1.1.283}/src/upgini/utils/__init__.py +0 -0
- {upgini-1.1.282a3418.post2 → upgini-1.1.283}/src/upgini/utils/base_search_key_detector.py +0 -0
- {upgini-1.1.282a3418.post2 → upgini-1.1.283}/src/upgini/utils/blocked_time_series.py +0 -0
- {upgini-1.1.282a3418.post2 → upgini-1.1.283}/src/upgini/utils/country_utils.py +0 -0
- {upgini-1.1.282a3418.post2 → upgini-1.1.283}/src/upgini/utils/custom_loss_utils.py +0 -0
- {upgini-1.1.282a3418.post2 → upgini-1.1.283}/src/upgini/utils/cv_utils.py +0 -0
- {upgini-1.1.282a3418.post2 → upgini-1.1.283}/src/upgini/utils/datetime_utils.py +0 -0
- {upgini-1.1.282a3418.post2 → upgini-1.1.283}/src/upgini/utils/deduplicate_utils.py +0 -0
- {upgini-1.1.282a3418.post2 → upgini-1.1.283}/src/upgini/utils/display_utils.py +0 -0
- {upgini-1.1.282a3418.post2 → upgini-1.1.283}/src/upgini/utils/email_utils.py +0 -0
- {upgini-1.1.282a3418.post2 → upgini-1.1.283}/src/upgini/utils/fallback_progress_bar.py +0 -0
- {upgini-1.1.282a3418.post2 → upgini-1.1.283}/src/upgini/utils/features_validator.py +0 -0
- {upgini-1.1.282a3418.post2 → upgini-1.1.283}/src/upgini/utils/format.py +0 -0
- {upgini-1.1.282a3418.post2 → upgini-1.1.283}/src/upgini/utils/ip_utils.py +0 -0
- {upgini-1.1.282a3418.post2 → upgini-1.1.283}/src/upgini/utils/phone_utils.py +0 -0
- {upgini-1.1.282a3418.post2 → upgini-1.1.283}/src/upgini/utils/postal_code_utils.py +0 -0
- {upgini-1.1.282a3418.post2 → upgini-1.1.283}/src/upgini/utils/progress_bar.py +0 -0
- {upgini-1.1.282a3418.post2 → upgini-1.1.283}/src/upgini/utils/target_utils.py +0 -0
- {upgini-1.1.282a3418.post2 → upgini-1.1.283}/src/upgini/utils/track_info.py +0 -0
- {upgini-1.1.282a3418.post2 → upgini-1.1.283}/src/upgini/utils/warning_counter.py +0 -0
- {upgini-1.1.282a3418.post2 → upgini-1.1.283}/src/upgini/version_validator.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: upgini
|
|
3
|
-
Version: 1.1.
|
|
3
|
+
Version: 1.1.283
|
|
4
4
|
Summary: Intelligent data search & enrichment for Machine Learning
|
|
5
5
|
Project-URL: Bug Reports, https://github.com/upgini/upgini/issues
|
|
6
6
|
Project-URL: Homepage, https://upgini.com/
|
|
@@ -839,4 +839,4 @@ Some convenient ways to start contributing are:
|
|
|
839
839
|
- [More perks for registered users](https://profile.upgini.com)
|
|
840
840
|
|
|
841
841
|
<sup>😔 Found mistype or a bug in code snippet? Our bad! <a href="https://github.com/upgini/upgini/issues/new?assignees=&title=readme%2Fbug">
|
|
842
|
-
Please report it here
|
|
842
|
+
Please report it here</a></sup>
|
|
@@ -799,4 +799,4 @@ Some convenient ways to start contributing are:
|
|
|
799
799
|
- [More perks for registered users](https://profile.upgini.com)
|
|
800
800
|
|
|
801
801
|
<sup>😔 Found mistype or a bug in code snippet? Our bad! <a href="https://github.com/upgini/upgini/issues/new?assignees=&title=readme%2Fbug">
|
|
802
|
-
Please report it here
|
|
802
|
+
Please report it here</a></sup>
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "1.1.283"
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
from typing import Dict
|
|
2
2
|
|
|
3
3
|
from upgini.autofe.binary import Add, Divide, Max, Min, Multiply, Sim, Subtract
|
|
4
|
-
from upgini.autofe.date import DateDiff, DateDiffType2, DateListDiff, DateListDiffBounded
|
|
4
|
+
from upgini.autofe.date import DateDiff, DateDiffType2, DateListDiff, DateListDiffBounded
|
|
5
5
|
from upgini.autofe.groupby import GroupByThenAgg, GroupByThenRank
|
|
6
6
|
from upgini.autofe.operand import Operand
|
|
7
7
|
from upgini.autofe.unary import Abs, Floor, Freq, Log, Residual, Sigmoid, Sqrt, Square
|
|
@@ -49,7 +49,6 @@ ALL_OPERANDS: Dict[str, Operand] = {
|
|
|
49
49
|
DateListDiffBounded(diff_unit="Y", aggregation="count", lower_bound=30, upper_bound=45),
|
|
50
50
|
DateListDiffBounded(diff_unit="Y", aggregation="count", lower_bound=45, upper_bound=60),
|
|
51
51
|
DateListDiffBounded(diff_unit="Y", aggregation="count", lower_bound=60),
|
|
52
|
-
DatePercentile(),
|
|
53
52
|
]
|
|
54
53
|
}
|
|
55
54
|
|
|
@@ -1,10 +1,9 @@
|
|
|
1
|
-
from
|
|
2
|
-
from typing import Any, Dict, List, Optional, Union
|
|
1
|
+
from typing import Any, Optional, Union
|
|
3
2
|
|
|
4
3
|
import numpy as np
|
|
5
4
|
import pandas as pd
|
|
6
5
|
from pandas.core.arrays.timedeltas import TimedeltaArray
|
|
7
|
-
from pydantic import BaseModel
|
|
6
|
+
from pydantic import BaseModel
|
|
8
7
|
|
|
9
8
|
from upgini.autofe.operand import PandasOperand
|
|
10
9
|
|
|
@@ -28,17 +27,6 @@ class DateDiff(PandasOperand, DateDiffMixin):
|
|
|
28
27
|
is_binary = True
|
|
29
28
|
has_symmetry_importance = True
|
|
30
29
|
|
|
31
|
-
def get_params(self) -> Dict[str, Optional[str]]:
|
|
32
|
-
res = super().get_params()
|
|
33
|
-
res.update(
|
|
34
|
-
{
|
|
35
|
-
"diff_unit": self.diff_unit,
|
|
36
|
-
"left_unit": self.left_unit,
|
|
37
|
-
"right_unit": self.right_unit,
|
|
38
|
-
}
|
|
39
|
-
)
|
|
40
|
-
return res
|
|
41
|
-
|
|
42
30
|
def calculate_binary(self, left: pd.Series, right: pd.Series) -> pd.Series:
|
|
43
31
|
left = self._convert_to_date(left, self.left_unit)
|
|
44
32
|
right = self._convert_to_date(right, self.right_unit)
|
|
@@ -54,17 +42,6 @@ class DateDiffType2(PandasOperand, DateDiffMixin):
|
|
|
54
42
|
is_binary = True
|
|
55
43
|
has_symmetry_importance = True
|
|
56
44
|
|
|
57
|
-
def get_params(self) -> Dict[str, Optional[str]]:
|
|
58
|
-
res = super().get_params()
|
|
59
|
-
res.update(
|
|
60
|
-
{
|
|
61
|
-
"diff_unit": self.diff_unit,
|
|
62
|
-
"left_unit": self.left_unit,
|
|
63
|
-
"right_unit": self.right_unit,
|
|
64
|
-
}
|
|
65
|
-
)
|
|
66
|
-
return res
|
|
67
|
-
|
|
68
45
|
def calculate_binary(self, left: pd.Series, right: pd.Series) -> pd.Series:
|
|
69
46
|
left = self._convert_to_date(left, self.left_unit)
|
|
70
47
|
right = self._convert_to_date(right, self.right_unit)
|
|
@@ -87,15 +64,6 @@ class DateListDiff(PandasOperand, DateDiffMixin):
|
|
|
87
64
|
has_symmetry_importance = True
|
|
88
65
|
aggregation: str
|
|
89
66
|
|
|
90
|
-
def get_params(self) -> Dict[str, Optional[str]]:
|
|
91
|
-
res = super().get_params()
|
|
92
|
-
res.update(
|
|
93
|
-
{
|
|
94
|
-
"aggregation": self.aggregation,
|
|
95
|
-
}
|
|
96
|
-
)
|
|
97
|
-
return res
|
|
98
|
-
|
|
99
67
|
def __init__(self, **data: Any) -> None:
|
|
100
68
|
if "name" not in data:
|
|
101
69
|
data["name"] = f"date_diff_{data.get('aggregation')}"
|
|
@@ -148,55 +116,3 @@ class DateListDiffBounded(DateListDiff):
|
|
|
148
116
|
def _agg(self, x):
|
|
149
117
|
x = x[(x >= (self.lower_bound or -np.inf)) & (x < (self.upper_bound or np.inf))]
|
|
150
118
|
return super()._agg(x)
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
class DatePercentile(PandasOperand):
|
|
154
|
-
name = "date_per"
|
|
155
|
-
is_binary = True
|
|
156
|
-
output_type = "float"
|
|
157
|
-
|
|
158
|
-
date_unit: Optional[str] = None
|
|
159
|
-
zero_month: Optional[int]
|
|
160
|
-
zero_year: Optional[int]
|
|
161
|
-
zero_bounds: Optional[List[float]]
|
|
162
|
-
step: int = 30
|
|
163
|
-
|
|
164
|
-
def get_params(self) -> Dict[str, Optional[str]]:
|
|
165
|
-
res = super().get_params()
|
|
166
|
-
res.update(
|
|
167
|
-
{
|
|
168
|
-
"date_unit": self.date_unit,
|
|
169
|
-
"zero_month": self.zero_month,
|
|
170
|
-
"zero_year": self.zero_year,
|
|
171
|
-
"zero_bounds": self.zero_bounds,
|
|
172
|
-
"step": self.step,
|
|
173
|
-
}
|
|
174
|
-
)
|
|
175
|
-
return res
|
|
176
|
-
|
|
177
|
-
@validator("zero_bounds", pre=True)
|
|
178
|
-
def validate_bounds(cls, value):
|
|
179
|
-
if value is None or isinstance(value, list):
|
|
180
|
-
return value
|
|
181
|
-
elif isinstance(value, str):
|
|
182
|
-
return value[1:-1].split(", ")
|
|
183
|
-
|
|
184
|
-
def calculate_binary(self, left: pd.Series, right: pd.Series) -> pd.Series:
|
|
185
|
-
# Assuming that left is a date column, right is a feature column
|
|
186
|
-
left = pd.to_datetime(left, unit=self.date_unit)
|
|
187
|
-
months = left.dt.month
|
|
188
|
-
years = left.dt.year
|
|
189
|
-
|
|
190
|
-
month_diffs = 12 * (years - (self.zero_year or 0)) + (months - (self.zero_month or 0))
|
|
191
|
-
bounds = month_diffs.apply(
|
|
192
|
-
lambda d: np.array(self.zero_bounds if self.zero_bounds is not None else []) + d * 30
|
|
193
|
-
)
|
|
194
|
-
|
|
195
|
-
return right.index.to_series().apply(lambda i: self.__perc(right[i], bounds[i]))
|
|
196
|
-
|
|
197
|
-
def __perc(self, f, bounds):
|
|
198
|
-
hit = np.where(f >= bounds)[0]
|
|
199
|
-
if hit.size > 0:
|
|
200
|
-
return np.max(hit) * 10
|
|
201
|
-
else:
|
|
202
|
-
return np.nan
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import hashlib
|
|
2
2
|
import itertools
|
|
3
|
-
from typing import Dict, List, Optional,
|
|
3
|
+
from typing import Dict, List, Optional, Tuple, Union
|
|
4
4
|
|
|
5
5
|
import numpy as np
|
|
6
6
|
import pandas as pd
|
|
@@ -16,12 +16,6 @@ class Column:
|
|
|
16
16
|
self.data = data
|
|
17
17
|
self.calculate_all = calculate_all
|
|
18
18
|
|
|
19
|
-
def get_display_name(self, cache: bool = True, shorten: bool = False, **kwargs) -> str:
|
|
20
|
-
return self.name
|
|
21
|
-
|
|
22
|
-
def set_op_params(self, params: Dict[str, str]) -> "Column":
|
|
23
|
-
return self
|
|
24
|
-
|
|
25
19
|
def rename_columns(self, mapping: Dict[str, str]) -> "Column":
|
|
26
20
|
self.name = self._unhash(mapping.get(self.name) or self.name)
|
|
27
21
|
return self
|
|
@@ -75,30 +69,19 @@ class Feature:
|
|
|
75
69
|
self.cached_display_name = cached_display_name
|
|
76
70
|
self.alias = alias
|
|
77
71
|
|
|
78
|
-
def set_op_params(self, params:
|
|
79
|
-
obj_dict = self.op.dict().copy()
|
|
80
|
-
obj_dict.update(params or {})
|
|
81
|
-
self.op = self.op.__class__.parse_obj(obj_dict)
|
|
72
|
+
def set_op_params(self, params: Dict[str, str]) -> "Feature":
|
|
82
73
|
self.op.set_params(params)
|
|
83
|
-
|
|
84
|
-
for child in self.children:
|
|
85
|
-
child.set_op_params(params)
|
|
86
74
|
return self
|
|
87
75
|
|
|
88
76
|
def get_hash(self) -> str:
|
|
89
|
-
return hashlib.sha256(
|
|
90
|
-
|
|
91
|
-
|
|
77
|
+
return hashlib.sha256("_".join([self.op.name] + [ch.name for ch in self.children]).encode("utf-8")).hexdigest()[
|
|
78
|
+
:8
|
|
79
|
+
]
|
|
92
80
|
|
|
93
81
|
def set_alias(self, alias: str) -> "Feature":
|
|
94
82
|
self.alias = alias
|
|
95
83
|
return self
|
|
96
84
|
|
|
97
|
-
def get_all_operand_names(self) -> Set[str]:
|
|
98
|
-
return {self.op.name}.union(
|
|
99
|
-
{n for f in self.children if isinstance(f, Feature) for n in f.get_all_operand_names()}
|
|
100
|
-
)
|
|
101
|
-
|
|
102
85
|
def rename_columns(self, mapping: Dict[str, str]) -> "Feature":
|
|
103
86
|
for child in self.children:
|
|
104
87
|
child.rename_columns(mapping)
|
|
@@ -25,10 +25,8 @@ class Operand(BaseModel):
|
|
|
25
25
|
self.params = params
|
|
26
26
|
return self
|
|
27
27
|
|
|
28
|
-
def get_params(self) -> Dict[str,
|
|
29
|
-
|
|
30
|
-
res.update(self.params or {})
|
|
31
|
-
return res
|
|
28
|
+
def get_params(self) -> Dict[str, str]:
|
|
29
|
+
return self.params
|
|
32
30
|
|
|
33
31
|
|
|
34
32
|
MAIN_COLUMN = "main_column"
|
|
@@ -935,7 +935,7 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
935
935
|
metric = wrapper.metric_name
|
|
936
936
|
multiplier = wrapper.multiplier
|
|
937
937
|
|
|
938
|
-
# 1 If client features are presented - fit and predict with KFold
|
|
938
|
+
# 1 If client features are presented - fit and predict with KFold estimator
|
|
939
939
|
# on etalon features and calculate baseline metric
|
|
940
940
|
etalon_metric = None
|
|
941
941
|
baseline_estimator = None
|
|
@@ -962,9 +962,15 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
962
962
|
etalon_metric = baseline_estimator.cross_val_predict(
|
|
963
963
|
fitting_X, y_sorted, self.baseline_score_column
|
|
964
964
|
)
|
|
965
|
-
|
|
965
|
+
if etalon_metric is None:
|
|
966
|
+
self.logger.info(
|
|
967
|
+
f"Baseline {metric} on train client features is None (maybe all features was removed)"
|
|
968
|
+
)
|
|
969
|
+
baseline_estimator = None
|
|
970
|
+
else:
|
|
971
|
+
self.logger.info(f"Baseline {metric} on train client features: {etalon_metric}")
|
|
966
972
|
|
|
967
|
-
# 2 Fit and predict with KFold
|
|
973
|
+
# 2 Fit and predict with KFold estimator on enriched tds
|
|
968
974
|
# and calculate final metric (and uplift)
|
|
969
975
|
enriched_estimator = None
|
|
970
976
|
if set(fitting_X.columns) != set(fitting_enriched_X.columns):
|
|
@@ -986,11 +992,15 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
986
992
|
has_date=has_date,
|
|
987
993
|
)
|
|
988
994
|
enriched_metric = enriched_estimator.cross_val_predict(fitting_enriched_X, enriched_y_sorted)
|
|
989
|
-
|
|
990
|
-
|
|
991
|
-
|
|
992
|
-
|
|
995
|
+
if etalon_metric is None:
|
|
996
|
+
self.logger.warning(
|
|
997
|
+
f"Enriched {metric} on train combined features is None (maybe all features was removed)"
|
|
998
|
+
)
|
|
999
|
+
enriched_estimator = None
|
|
993
1000
|
uplift = None
|
|
1001
|
+
else:
|
|
1002
|
+
self.logger.info(f"Enriched {metric} on train combined features: {enriched_metric}")
|
|
1003
|
+
uplift = (enriched_metric - etalon_metric) * multiplier
|
|
994
1004
|
else:
|
|
995
1005
|
enriched_metric = None
|
|
996
1006
|
uplift = None
|
|
@@ -1442,12 +1452,15 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
1442
1452
|
if len(decimal_columns_to_fix) > 0:
|
|
1443
1453
|
for col in decimal_columns_to_fix:
|
|
1444
1454
|
fitting_eval_X[col] = (
|
|
1445
|
-
fitting_eval_X[col]
|
|
1455
|
+
fitting_eval_X[col]
|
|
1456
|
+
.astype("string").str
|
|
1457
|
+
.replace(",", ".", regex=False)
|
|
1458
|
+
.astype(np.float64)
|
|
1446
1459
|
)
|
|
1447
1460
|
fitting_enriched_eval_X[col] = (
|
|
1448
1461
|
fitting_enriched_eval_X[col]
|
|
1449
|
-
.astype("string")
|
|
1450
|
-
.
|
|
1462
|
+
.astype("string").str
|
|
1463
|
+
.replace(",", ".", regex=False)
|
|
1451
1464
|
.astype(np.float64)
|
|
1452
1465
|
)
|
|
1453
1466
|
|
|
@@ -3290,7 +3303,7 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
3290
3303
|
description[f"Feature {feature_idx}"] = bc.hashed_name
|
|
3291
3304
|
feature_idx += 1
|
|
3292
3305
|
|
|
3293
|
-
description["Function"] =
|
|
3306
|
+
description["Function"] = autofe_feature.op.name
|
|
3294
3307
|
|
|
3295
3308
|
descriptions.append(description)
|
|
3296
3309
|
|
|
@@ -298,6 +298,7 @@ class EstimatorWrapper:
|
|
|
298
298
|
scorer = check_scoring(self.estimator, scoring=self.scorer)
|
|
299
299
|
|
|
300
300
|
if baseline_score_column is not None and self.metric_name == "GINI":
|
|
301
|
+
self.logger.info("Calculate baseline GINI on passed baseline_score_column and target")
|
|
301
302
|
metric = roc_auc_score(y, x[baseline_score_column])
|
|
302
303
|
else:
|
|
303
304
|
cv_results = cross_validate(
|
|
@@ -17,7 +17,7 @@ from sklearn.base import clone, is_classifier
|
|
|
17
17
|
from sklearn.exceptions import FitFailedWarning, NotFittedError
|
|
18
18
|
from sklearn.metrics import check_scoring
|
|
19
19
|
from sklearn.metrics._scorer import _MultimetricScorer
|
|
20
|
-
from sklearn.model_selection import check_cv
|
|
20
|
+
from sklearn.model_selection import StratifiedKFold, check_cv
|
|
21
21
|
from sklearn.utils.fixes import np_version, parse_version
|
|
22
22
|
from sklearn.utils.validation import indexable
|
|
23
23
|
|
|
@@ -312,25 +312,34 @@ def cross_validate(
|
|
|
312
312
|
ret[key] = train_scores_dict[name]
|
|
313
313
|
|
|
314
314
|
return ret
|
|
315
|
-
except
|
|
315
|
+
except ValueError as e:
|
|
316
316
|
# logging.exception("Failed to execute overriden cross_validate. Fallback to original")
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
317
|
+
if hasattr(e, "args") and len(e.args) > 0 and "Only one class present in y_true" in e.args[0]:
|
|
318
|
+
# Try change CV to StratifiedKFold and retry
|
|
319
|
+
if hasattr(cv, "shuffle"):
|
|
320
|
+
shuffle = cv.shuffle
|
|
321
|
+
else:
|
|
322
|
+
shuffle = False
|
|
323
|
+
if hasattr(cv, "random_state"):
|
|
324
|
+
random_state = cv.random_state
|
|
325
|
+
else:
|
|
326
|
+
random_state = None
|
|
327
|
+
return cross_validate(
|
|
328
|
+
estimator,
|
|
329
|
+
x,
|
|
330
|
+
y,
|
|
331
|
+
groups=groups,
|
|
332
|
+
scoring=scoring,
|
|
333
|
+
cv=StratifiedKFold(n_splits=cv.get_n_splits(), shuffle=shuffle, random_state=random_state),
|
|
334
|
+
n_jobs=n_jobs,
|
|
335
|
+
verbose=verbose,
|
|
336
|
+
fit_params=fit_params,
|
|
337
|
+
pre_dispatch=pre_dispatch,
|
|
338
|
+
return_train_score=return_train_score,
|
|
339
|
+
return_estimator=return_estimator,
|
|
340
|
+
error_score=error_score,
|
|
341
|
+
)
|
|
342
|
+
raise e
|
|
334
343
|
|
|
335
344
|
|
|
336
345
|
def _fit_and_score(
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
__version__ = "1.1.282a3418-2"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{upgini-1.1.282a3418.post2 → upgini-1.1.283}/src/upgini/data_source/data_source_publisher.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{upgini-1.1.282a3418.post2 → upgini-1.1.283}/src/upgini/resource_bundle/strings_widget.properties
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|