upgini 1.2.155.dev1__tar.gz → 1.2.156.dev1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {upgini-1.2.155.dev1 → upgini-1.2.156.dev1}/PKG-INFO +1 -1
- upgini-1.2.156.dev1/src/upgini/__about__.py +1 -0
- {upgini-1.2.155.dev1 → upgini-1.2.156.dev1}/src/upgini/autofe/date.py +30 -21
- {upgini-1.2.155.dev1 → upgini-1.2.156.dev1}/src/upgini/autofe/timeseries/delta.py +2 -0
- {upgini-1.2.155.dev1 → upgini-1.2.156.dev1}/src/upgini/autofe/timeseries/trend.py +5 -3
- {upgini-1.2.155.dev1 → upgini-1.2.156.dev1}/src/upgini/autofe/timeseries/volatility.py +4 -0
- {upgini-1.2.155.dev1 → upgini-1.2.156.dev1}/src/upgini/autofe/unary.py +6 -9
- {upgini-1.2.155.dev1 → upgini-1.2.156.dev1}/src/upgini/autofe/utils.py +43 -1
- {upgini-1.2.155.dev1 → upgini-1.2.156.dev1}/src/upgini/features_enricher.py +19 -3
- {upgini-1.2.155.dev1 → upgini-1.2.156.dev1}/src/upgini/utils/feature_info.py +2 -1
- upgini-1.2.155.dev1/src/upgini/__about__.py +0 -1
- {upgini-1.2.155.dev1 → upgini-1.2.156.dev1}/.gitignore +0 -0
- {upgini-1.2.155.dev1 → upgini-1.2.156.dev1}/LICENSE +0 -0
- {upgini-1.2.155.dev1 → upgini-1.2.156.dev1}/README.md +0 -0
- {upgini-1.2.155.dev1 → upgini-1.2.156.dev1}/pyproject.toml +0 -0
- {upgini-1.2.155.dev1 → upgini-1.2.156.dev1}/src/upgini/__init__.py +0 -0
- {upgini-1.2.155.dev1 → upgini-1.2.156.dev1}/src/upgini/ads.py +0 -0
- {upgini-1.2.155.dev1 → upgini-1.2.156.dev1}/src/upgini/ads_management/__init__.py +0 -0
- {upgini-1.2.155.dev1 → upgini-1.2.156.dev1}/src/upgini/ads_management/ads_manager.py +0 -0
- {upgini-1.2.155.dev1 → upgini-1.2.156.dev1}/src/upgini/autofe/__init__.py +0 -0
- {upgini-1.2.155.dev1 → upgini-1.2.156.dev1}/src/upgini/autofe/all_operators.py +0 -0
- {upgini-1.2.155.dev1 → upgini-1.2.156.dev1}/src/upgini/autofe/binary.py +0 -0
- {upgini-1.2.155.dev1 → upgini-1.2.156.dev1}/src/upgini/autofe/feature.py +0 -0
- {upgini-1.2.155.dev1 → upgini-1.2.156.dev1}/src/upgini/autofe/groupby.py +0 -0
- {upgini-1.2.155.dev1 → upgini-1.2.156.dev1}/src/upgini/autofe/operator.py +0 -0
- {upgini-1.2.155.dev1 → upgini-1.2.156.dev1}/src/upgini/autofe/timeseries/__init__.py +0 -0
- {upgini-1.2.155.dev1 → upgini-1.2.156.dev1}/src/upgini/autofe/timeseries/base.py +0 -0
- {upgini-1.2.155.dev1 → upgini-1.2.156.dev1}/src/upgini/autofe/timeseries/cross.py +0 -0
- {upgini-1.2.155.dev1 → upgini-1.2.156.dev1}/src/upgini/autofe/timeseries/lag.py +0 -0
- {upgini-1.2.155.dev1 → upgini-1.2.156.dev1}/src/upgini/autofe/timeseries/roll.py +0 -0
- {upgini-1.2.155.dev1 → upgini-1.2.156.dev1}/src/upgini/autofe/vector.py +0 -0
- {upgini-1.2.155.dev1 → upgini-1.2.156.dev1}/src/upgini/data_source/__init__.py +0 -0
- {upgini-1.2.155.dev1 → upgini-1.2.156.dev1}/src/upgini/data_source/data_source_publisher.py +0 -0
- {upgini-1.2.155.dev1 → upgini-1.2.156.dev1}/src/upgini/dataset.py +0 -0
- {upgini-1.2.155.dev1 → upgini-1.2.156.dev1}/src/upgini/errors.py +0 -0
- {upgini-1.2.155.dev1 → upgini-1.2.156.dev1}/src/upgini/http.py +0 -0
- {upgini-1.2.155.dev1 → upgini-1.2.156.dev1}/src/upgini/mdc/__init__.py +0 -0
- {upgini-1.2.155.dev1 → upgini-1.2.156.dev1}/src/upgini/mdc/context.py +0 -0
- {upgini-1.2.155.dev1 → upgini-1.2.156.dev1}/src/upgini/metadata.py +0 -0
- {upgini-1.2.155.dev1 → upgini-1.2.156.dev1}/src/upgini/metrics.py +0 -0
- {upgini-1.2.155.dev1 → upgini-1.2.156.dev1}/src/upgini/normalizer/__init__.py +0 -0
- {upgini-1.2.155.dev1 → upgini-1.2.156.dev1}/src/upgini/normalizer/normalize_utils.py +0 -0
- {upgini-1.2.155.dev1 → upgini-1.2.156.dev1}/src/upgini/resource_bundle/__init__.py +0 -0
- {upgini-1.2.155.dev1 → upgini-1.2.156.dev1}/src/upgini/resource_bundle/exceptions.py +0 -0
- {upgini-1.2.155.dev1 → upgini-1.2.156.dev1}/src/upgini/resource_bundle/strings.properties +0 -0
- {upgini-1.2.155.dev1 → upgini-1.2.156.dev1}/src/upgini/resource_bundle/strings_widget.properties +0 -0
- {upgini-1.2.155.dev1 → upgini-1.2.156.dev1}/src/upgini/sampler/__init__.py +0 -0
- {upgini-1.2.155.dev1 → upgini-1.2.156.dev1}/src/upgini/sampler/base.py +0 -0
- {upgini-1.2.155.dev1 → upgini-1.2.156.dev1}/src/upgini/sampler/random_under_sampler.py +0 -0
- {upgini-1.2.155.dev1 → upgini-1.2.156.dev1}/src/upgini/sampler/utils.py +0 -0
- {upgini-1.2.155.dev1 → upgini-1.2.156.dev1}/src/upgini/search_task.py +0 -0
- {upgini-1.2.155.dev1 → upgini-1.2.156.dev1}/src/upgini/spinner.py +0 -0
- {upgini-1.2.155.dev1 → upgini-1.2.156.dev1}/src/upgini/utils/Roboto-Regular.ttf +0 -0
- {upgini-1.2.155.dev1 → upgini-1.2.156.dev1}/src/upgini/utils/__init__.py +0 -0
- {upgini-1.2.155.dev1 → upgini-1.2.156.dev1}/src/upgini/utils/base_search_key_detector.py +0 -0
- {upgini-1.2.155.dev1 → upgini-1.2.156.dev1}/src/upgini/utils/blocked_time_series.py +0 -0
- {upgini-1.2.155.dev1 → upgini-1.2.156.dev1}/src/upgini/utils/config.py +0 -0
- {upgini-1.2.155.dev1 → upgini-1.2.156.dev1}/src/upgini/utils/country_utils.py +0 -0
- {upgini-1.2.155.dev1 → upgini-1.2.156.dev1}/src/upgini/utils/custom_loss_utils.py +0 -0
- {upgini-1.2.155.dev1 → upgini-1.2.156.dev1}/src/upgini/utils/cv_utils.py +0 -0
- {upgini-1.2.155.dev1 → upgini-1.2.156.dev1}/src/upgini/utils/datetime_utils.py +0 -0
- {upgini-1.2.155.dev1 → upgini-1.2.156.dev1}/src/upgini/utils/deduplicate_utils.py +0 -0
- {upgini-1.2.155.dev1 → upgini-1.2.156.dev1}/src/upgini/utils/display_utils.py +0 -0
- {upgini-1.2.155.dev1 → upgini-1.2.156.dev1}/src/upgini/utils/email_utils.py +0 -0
- {upgini-1.2.155.dev1 → upgini-1.2.156.dev1}/src/upgini/utils/fallback_progress_bar.py +0 -0
- {upgini-1.2.155.dev1 → upgini-1.2.156.dev1}/src/upgini/utils/features_validator.py +0 -0
- {upgini-1.2.155.dev1 → upgini-1.2.156.dev1}/src/upgini/utils/format.py +0 -0
- {upgini-1.2.155.dev1 → upgini-1.2.156.dev1}/src/upgini/utils/hash_utils.py +0 -0
- {upgini-1.2.155.dev1 → upgini-1.2.156.dev1}/src/upgini/utils/ip_utils.py +0 -0
- {upgini-1.2.155.dev1 → upgini-1.2.156.dev1}/src/upgini/utils/mstats.py +0 -0
- {upgini-1.2.155.dev1 → upgini-1.2.156.dev1}/src/upgini/utils/one_hot_encoder.py +0 -0
- {upgini-1.2.155.dev1 → upgini-1.2.156.dev1}/src/upgini/utils/phone_utils.py +0 -0
- {upgini-1.2.155.dev1 → upgini-1.2.156.dev1}/src/upgini/utils/postal_code_utils.py +0 -0
- {upgini-1.2.155.dev1 → upgini-1.2.156.dev1}/src/upgini/utils/progress_bar.py +0 -0
- {upgini-1.2.155.dev1 → upgini-1.2.156.dev1}/src/upgini/utils/psi.py +0 -0
- {upgini-1.2.155.dev1 → upgini-1.2.156.dev1}/src/upgini/utils/sample_utils.py +0 -0
- {upgini-1.2.155.dev1 → upgini-1.2.156.dev1}/src/upgini/utils/sklearn_ext.py +0 -0
- {upgini-1.2.155.dev1 → upgini-1.2.156.dev1}/src/upgini/utils/sort.py +0 -0
- {upgini-1.2.155.dev1 → upgini-1.2.156.dev1}/src/upgini/utils/target_utils.py +0 -0
- {upgini-1.2.155.dev1 → upgini-1.2.156.dev1}/src/upgini/utils/track_info.py +0 -0
- {upgini-1.2.155.dev1 → upgini-1.2.156.dev1}/src/upgini/utils/ts_utils.py +0 -0
- {upgini-1.2.155.dev1 → upgini-1.2.156.dev1}/src/upgini/utils/warning_counter.py +0 -0
- {upgini-1.2.155.dev1 → upgini-1.2.156.dev1}/src/upgini/version_validator.py +0 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "1.2.156.dev1"
|
|
@@ -8,7 +8,7 @@ from pandas.core.arrays.timedeltas import TimedeltaArray
|
|
|
8
8
|
from pydantic import BaseModel, __version__ as pydantic_version
|
|
9
9
|
|
|
10
10
|
from upgini.autofe.operator import PandasOperator, ParametrizedOperator
|
|
11
|
-
from upgini.autofe.utils import pydantic_validator
|
|
11
|
+
from upgini.autofe.utils import bin_index, bin_index_many, bin_index_vectorized, pydantic_validator
|
|
12
12
|
|
|
13
13
|
|
|
14
14
|
def get_pydantic_version():
|
|
@@ -254,26 +254,33 @@ class DatePercentileBase(PandasOperator, abc.ABC):
|
|
|
254
254
|
left = pd.to_datetime(left, unit=self.date_unit)
|
|
255
255
|
|
|
256
256
|
bounds = self._get_bounds(left)
|
|
257
|
+
values = pd.to_numeric(right, errors="coerce").to_numpy(dtype=np.float64, copy=False)
|
|
258
|
+
bounds_list = bounds.tolist()
|
|
259
|
+
result = np.full(len(values), np.nan)
|
|
260
|
+
|
|
261
|
+
if not bounds_list:
|
|
262
|
+
return pd.Series(result, index=right.index).astype(pd.Int64Dtype()).astype("category")
|
|
263
|
+
|
|
264
|
+
bounds_lengths = {len(b) for b in bounds_list if isinstance(b, (list, np.ndarray))}
|
|
265
|
+
if len(bounds_lengths) == 1 and all(isinstance(b, (list, np.ndarray)) for b in bounds_list):
|
|
266
|
+
bounds_2d = np.asarray(bounds_list, dtype=np.float64)
|
|
267
|
+
if bounds_2d.ndim == 1:
|
|
268
|
+
result = bin_index_vectorized(values, bounds_2d)
|
|
269
|
+
else:
|
|
270
|
+
result = bin_index_many(values, bounds_2d)
|
|
271
|
+
else:
|
|
272
|
+
for i, row_bounds in enumerate(bounds_list):
|
|
273
|
+
if isinstance(row_bounds, (list, np.ndarray)) and len(row_bounds) > 0:
|
|
274
|
+
result[i] = bin_index(values[i], row_bounds)
|
|
257
275
|
|
|
258
|
-
return (
|
|
259
|
-
right.index.to_series()
|
|
260
|
-
.apply(lambda i: self._perc(right[i], bounds[i]))
|
|
261
|
-
.astype(pd.Int64Dtype())
|
|
262
|
-
.astype("category")
|
|
263
|
-
)
|
|
276
|
+
return pd.Series(result, index=right.index).astype(pd.Int64Dtype()).astype("category")
|
|
264
277
|
|
|
265
278
|
@abc.abstractmethod
|
|
266
279
|
def _get_bounds(self, date_col: pd.Series) -> pd.Series:
|
|
267
280
|
pass
|
|
268
281
|
|
|
269
282
|
def _perc(self, f, bounds):
|
|
270
|
-
|
|
271
|
-
return np.nan
|
|
272
|
-
hit = np.where(f >= np.array(bounds))[0]
|
|
273
|
-
if hit.size > 0:
|
|
274
|
-
return np.max(hit) + 1
|
|
275
|
-
else:
|
|
276
|
-
return np.nan
|
|
283
|
+
return bin_index(f, bounds)
|
|
277
284
|
|
|
278
285
|
def get_params(self) -> Dict[str, Optional[str]]:
|
|
279
286
|
res = super().get_params()
|
|
@@ -313,13 +320,15 @@ class DatePercentile(DatePercentileBase):
|
|
|
313
320
|
return value
|
|
314
321
|
|
|
315
322
|
def _get_bounds(self, date_col: pd.Series) -> pd.Series:
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
)
|
|
323
|
+
zero_bounds = self.zero_bounds if self.zero_bounds is not None else []
|
|
324
|
+
if not zero_bounds:
|
|
325
|
+
return pd.Series([[] for _ in range(len(date_col))], index=date_col.index)
|
|
326
|
+
|
|
327
|
+
month_diffs = (
|
|
328
|
+
12 * (date_col.dt.year - (self.zero_year or 0)) + (date_col.dt.month - (self.zero_month or 0))
|
|
329
|
+
).to_numpy()
|
|
330
|
+
bounds_2d = np.asarray(zero_bounds, dtype=np.float64) + month_diffs[:, None] * self.step
|
|
331
|
+
return pd.Series(list(bounds_2d), index=date_col.index)
|
|
323
332
|
|
|
324
333
|
|
|
325
334
|
class DatePercentileMethod2(DatePercentileBase):
|
|
@@ -23,6 +23,8 @@ class DeltaBase(TimeSeriesBase):
|
|
|
23
23
|
def _calculate_delta(self, x: Union[pd.DataFrame, pd.Series]) -> Union[pd.DataFrame, pd.Series]:
|
|
24
24
|
return_series = isinstance(x, pd.Series)
|
|
25
25
|
x = pd.DataFrame(x)
|
|
26
|
+
value_col = x.columns[-1]
|
|
27
|
+
x[value_col] = pd.to_numeric(x[value_col], errors="coerce").astype("float64")
|
|
26
28
|
lag = Lag(lag_size=self.delta_size, lag_unit=self.delta_unit)
|
|
27
29
|
x.iloc[:, -1] = x.iloc[:, -1] - lag._aggregate(x.iloc[:, -1])
|
|
28
30
|
return x.iloc[:, -1] if return_series else x
|
|
@@ -54,12 +54,14 @@ class TrendCoefficient(TimeSeriesBase, ParametrizedOperator):
|
|
|
54
54
|
return_series = isinstance(x, pd.Series)
|
|
55
55
|
x = pd.DataFrame(x)
|
|
56
56
|
resampled = (
|
|
57
|
-
x.iloc[:, -1].resample(f"{self.step_size}{self.step_unit}").
|
|
57
|
+
x.iloc[:, -1].resample(f"{self.step_size}{self.step_unit}").ffill().bfill()
|
|
58
58
|
)
|
|
59
59
|
idx = np.arange(len(resampled))
|
|
60
|
+
value_col = x.columns[-1]
|
|
61
|
+
x[value_col] = pd.to_numeric(x[value_col], errors="coerce").astype("float64")
|
|
60
62
|
try:
|
|
61
63
|
coeffs = np.polyfit(idx, resampled, 1)
|
|
62
|
-
x.iloc[:, -1] = coeffs[0]
|
|
64
|
+
x.iloc[:, -1] = float(coeffs[0])
|
|
63
65
|
except np.linalg.LinAlgError:
|
|
64
|
-
x.iloc[:, -1] = 0
|
|
66
|
+
x.iloc[:, -1] = 0.0
|
|
65
67
|
return x.iloc[:, -1] if return_series else x
|
|
@@ -65,6 +65,8 @@ class EWMAVolatility(VolatilityBase, ParametrizedOperator):
|
|
|
65
65
|
def _ewma_vol(self, x):
|
|
66
66
|
return_series = isinstance(x, pd.Series)
|
|
67
67
|
x = pd.DataFrame(x)
|
|
68
|
+
value_col = x.columns[-1]
|
|
69
|
+
x[value_col] = pd.to_numeric(x[value_col], errors="coerce").astype("float64")
|
|
68
70
|
returns = self._get_returns(x.iloc[:, -1], f"{self.step_size}{self.step_unit}")
|
|
69
71
|
x.iloc[:, -1] = returns.ewm(span=self.window_size).std()
|
|
70
72
|
return x.iloc[:, -1] if return_series else x
|
|
@@ -93,6 +95,8 @@ class RollingVolBase(VolatilityBase):
|
|
|
93
95
|
) -> Union[pd.DataFrame, pd.Series]:
|
|
94
96
|
return_series = isinstance(x, pd.Series)
|
|
95
97
|
x = pd.DataFrame(x)
|
|
98
|
+
value_col = x.columns[-1]
|
|
99
|
+
x[value_col] = pd.to_numeric(x[value_col], errors="coerce").astype("float64")
|
|
96
100
|
returns = self._get_returns(x.iloc[:, -1], f"{self.step_size}{self.step_unit}")
|
|
97
101
|
if abs_returns:
|
|
98
102
|
returns = returns.abs()
|
|
@@ -4,7 +4,7 @@ import numpy as np
|
|
|
4
4
|
import pandas as pd
|
|
5
5
|
|
|
6
6
|
from upgini.autofe.operator import PandasOperator, ParametrizedOperator, VectorizableMixin
|
|
7
|
-
from upgini.autofe.utils import pydantic_validator
|
|
7
|
+
from upgini.autofe.utils import bin_index, bin_index_vectorized, pydantic_validator
|
|
8
8
|
|
|
9
9
|
|
|
10
10
|
class Abs(PandasOperator, VectorizableMixin):
|
|
@@ -163,16 +163,13 @@ class Bin(PandasOperator):
|
|
|
163
163
|
is_categorical: bool = True
|
|
164
164
|
|
|
165
165
|
def calculate_unary(self, data: pd.Series) -> pd.Series:
|
|
166
|
-
|
|
166
|
+
bounds_arr = np.asarray(self.bin_bounds, dtype=np.float64)
|
|
167
|
+
values = pd.to_numeric(data, errors="coerce").to_numpy(dtype=np.float64, copy=False)
|
|
168
|
+
result = bin_index_vectorized(values, bounds_arr)
|
|
169
|
+
return pd.Series(result, index=data.index).fillna(-1).astype(int).astype("category")
|
|
167
170
|
|
|
168
171
|
def _bin(self, f, bounds):
|
|
169
|
-
|
|
170
|
-
return np.nan
|
|
171
|
-
hit = np.where(f >= np.array(bounds))[0]
|
|
172
|
-
if hit.size > 0:
|
|
173
|
-
return np.max(hit) + 1
|
|
174
|
-
else:
|
|
175
|
-
return np.nan
|
|
172
|
+
return bin_index(f, bounds)
|
|
176
173
|
|
|
177
174
|
def get_params(self) -> Dict[str, Optional[str]]:
|
|
178
175
|
res = super().get_params()
|
|
@@ -3,8 +3,9 @@ Utility functions for autofe module.
|
|
|
3
3
|
"""
|
|
4
4
|
|
|
5
5
|
import functools
|
|
6
|
-
from typing import Callable
|
|
6
|
+
from typing import Callable, Union
|
|
7
7
|
|
|
8
|
+
import numpy as np
|
|
8
9
|
from pydantic import BaseModel
|
|
9
10
|
|
|
10
11
|
|
|
@@ -111,3 +112,44 @@ def pydantic_copy_method(obj):
|
|
|
111
112
|
return obj.model_copy
|
|
112
113
|
else:
|
|
113
114
|
return obj.copy
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
def bin_index(value: Union[float, int, None], bounds) -> float:
|
|
118
|
+
if value is None or (isinstance(value, float) and np.isnan(value)):
|
|
119
|
+
return np.nan
|
|
120
|
+
bounds_arr = np.asarray(bounds, dtype=np.float64)
|
|
121
|
+
if bounds_arr.size == 0 or value < bounds_arr[0]:
|
|
122
|
+
return np.nan
|
|
123
|
+
return np.searchsorted(bounds_arr, value, side="right")
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
def bin_index_vectorized(values: np.ndarray, bounds: np.ndarray) -> np.ndarray:
|
|
127
|
+
n = len(values)
|
|
128
|
+
result = np.full(n, np.nan)
|
|
129
|
+
bounds_arr = np.asarray(bounds, dtype=np.float64)
|
|
130
|
+
if bounds_arr.size == 0:
|
|
131
|
+
return result
|
|
132
|
+
valid = ~np.isnan(values)
|
|
133
|
+
if not valid.any():
|
|
134
|
+
return result
|
|
135
|
+
valid_values = values[valid]
|
|
136
|
+
idx = np.searchsorted(bounds_arr, valid_values, side="right").astype(np.float64)
|
|
137
|
+
below = valid_values < bounds_arr[0]
|
|
138
|
+
if below.any():
|
|
139
|
+
idx[below] = np.nan
|
|
140
|
+
result[valid] = idx
|
|
141
|
+
return result
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
def bin_index_many(values: np.ndarray, bounds_2d: np.ndarray) -> np.ndarray:
|
|
145
|
+
n = len(values)
|
|
146
|
+
result = np.full(n, np.nan)
|
|
147
|
+
for i in range(n):
|
|
148
|
+
v = values[i]
|
|
149
|
+
if np.isnan(v):
|
|
150
|
+
continue
|
|
151
|
+
bounds_row = bounds_2d[i]
|
|
152
|
+
if bounds_row.size == 0 or v < bounds_row[0]:
|
|
153
|
+
continue
|
|
154
|
+
result[i] = np.searchsorted(bounds_row, v, side="right")
|
|
155
|
+
return result
|
|
@@ -1537,10 +1537,15 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
1537
1537
|
|
|
1538
1538
|
checking_eval_set_df[date_column] = date_converter.to_date_ms(eval_set_dates[selected_eval_set_idx].to_frame())
|
|
1539
1539
|
|
|
1540
|
-
|
|
1540
|
+
baseline_score_column = self._get_renamed_baseline_score_column()
|
|
1541
|
+
psi_df = checking_eval_set_df
|
|
1542
|
+
if baseline_score_column and baseline_score_column in psi_df.columns:
|
|
1543
|
+
psi_df = psi_df.drop(columns=[baseline_score_column])
|
|
1544
|
+
|
|
1545
|
+
cat_features = [c for c in cat_features if c in psi_df.columns]
|
|
1541
1546
|
|
|
1542
1547
|
psi_values_sparse = calculate_sparsity_psi(
|
|
1543
|
-
|
|
1548
|
+
psi_df, cat_features, date_column, self.logger, model_task_type
|
|
1544
1549
|
)
|
|
1545
1550
|
|
|
1546
1551
|
self.logger.info(f"PSI values by sparsity: {psi_values_sparse}")
|
|
@@ -1550,7 +1555,7 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
1550
1555
|
self.logger.info(f"Unstable by sparsity features ({stability_threshold}): {sorted(unstable_by_sparsity)}")
|
|
1551
1556
|
|
|
1552
1557
|
psi_values = calculate_features_psi(
|
|
1553
|
-
|
|
1558
|
+
psi_df, cat_features, date_column, self.logger, model_task_type, stability_agg_func
|
|
1554
1559
|
)
|
|
1555
1560
|
|
|
1556
1561
|
self.logger.info(f"PSI values by value: {psi_values}")
|
|
@@ -1564,6 +1569,8 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
1564
1569
|
}
|
|
1565
1570
|
|
|
1566
1571
|
total_unstable_features = sorted(set(unstable_by_sparsity + unstable_by_value))
|
|
1572
|
+
if baseline_score_column:
|
|
1573
|
+
total_unstable_features = [f for f in total_unstable_features if f != baseline_score_column]
|
|
1567
1574
|
|
|
1568
1575
|
return total_unstable_features
|
|
1569
1576
|
|
|
@@ -1753,6 +1760,15 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
1753
1760
|
raise ValidationError(self.bundle.get("cat_feature_search_key").format(cat_feature))
|
|
1754
1761
|
return cat_features, search_keys_for_metrics
|
|
1755
1762
|
|
|
1763
|
+
def _get_renamed_baseline_score_column(self, columns_renaming: dict[str, str] | None = None) -> str | None:
|
|
1764
|
+
if self.baseline_score_column is None:
|
|
1765
|
+
return None
|
|
1766
|
+
if columns_renaming:
|
|
1767
|
+
return columns_renaming.get(self.baseline_score_column, self.baseline_score_column)
|
|
1768
|
+
if self.fit_columns_renaming:
|
|
1769
|
+
return self.fit_columns_renaming.get(self.baseline_score_column, self.baseline_score_column)
|
|
1770
|
+
return self.baseline_score_column
|
|
1771
|
+
|
|
1756
1772
|
def _get_cat_features_for_psi(
|
|
1757
1773
|
self,
|
|
1758
1774
|
client_cat_features: list[str] | None,
|
|
@@ -104,7 +104,8 @@ def _get_feature_sample(feature_meta: FeaturesMetadataV2, data: Optional[pd.Data
|
|
|
104
104
|
if data is not None and len(data) > 0 and feature_meta.name in data.columns:
|
|
105
105
|
if len(data) > 3:
|
|
106
106
|
rand = np.random.RandomState(42)
|
|
107
|
-
|
|
107
|
+
unique_values = sorted(data[feature_meta.name].dropna().unique(), key=str)
|
|
108
|
+
feature_sample = rand.choice(unique_values, 3, replace=False).tolist()
|
|
108
109
|
else:
|
|
109
110
|
feature_sample = data[feature_meta.name].dropna().unique().tolist()
|
|
110
111
|
if len(feature_sample) > 0 and isinstance(feature_sample[0], float):
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
__version__ = "1.2.155.dev1"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{upgini-1.2.155.dev1 → upgini-1.2.156.dev1}/src/upgini/resource_bundle/strings_widget.properties
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|