upgini 1.2.155.dev1__tar.gz → 1.2.156.dev1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (83) hide show
  1. {upgini-1.2.155.dev1 → upgini-1.2.156.dev1}/PKG-INFO +1 -1
  2. upgini-1.2.156.dev1/src/upgini/__about__.py +1 -0
  3. {upgini-1.2.155.dev1 → upgini-1.2.156.dev1}/src/upgini/autofe/date.py +30 -21
  4. {upgini-1.2.155.dev1 → upgini-1.2.156.dev1}/src/upgini/autofe/timeseries/delta.py +2 -0
  5. {upgini-1.2.155.dev1 → upgini-1.2.156.dev1}/src/upgini/autofe/timeseries/trend.py +5 -3
  6. {upgini-1.2.155.dev1 → upgini-1.2.156.dev1}/src/upgini/autofe/timeseries/volatility.py +4 -0
  7. {upgini-1.2.155.dev1 → upgini-1.2.156.dev1}/src/upgini/autofe/unary.py +6 -9
  8. {upgini-1.2.155.dev1 → upgini-1.2.156.dev1}/src/upgini/autofe/utils.py +43 -1
  9. {upgini-1.2.155.dev1 → upgini-1.2.156.dev1}/src/upgini/features_enricher.py +19 -3
  10. {upgini-1.2.155.dev1 → upgini-1.2.156.dev1}/src/upgini/utils/feature_info.py +2 -1
  11. upgini-1.2.155.dev1/src/upgini/__about__.py +0 -1
  12. {upgini-1.2.155.dev1 → upgini-1.2.156.dev1}/.gitignore +0 -0
  13. {upgini-1.2.155.dev1 → upgini-1.2.156.dev1}/LICENSE +0 -0
  14. {upgini-1.2.155.dev1 → upgini-1.2.156.dev1}/README.md +0 -0
  15. {upgini-1.2.155.dev1 → upgini-1.2.156.dev1}/pyproject.toml +0 -0
  16. {upgini-1.2.155.dev1 → upgini-1.2.156.dev1}/src/upgini/__init__.py +0 -0
  17. {upgini-1.2.155.dev1 → upgini-1.2.156.dev1}/src/upgini/ads.py +0 -0
  18. {upgini-1.2.155.dev1 → upgini-1.2.156.dev1}/src/upgini/ads_management/__init__.py +0 -0
  19. {upgini-1.2.155.dev1 → upgini-1.2.156.dev1}/src/upgini/ads_management/ads_manager.py +0 -0
  20. {upgini-1.2.155.dev1 → upgini-1.2.156.dev1}/src/upgini/autofe/__init__.py +0 -0
  21. {upgini-1.2.155.dev1 → upgini-1.2.156.dev1}/src/upgini/autofe/all_operators.py +0 -0
  22. {upgini-1.2.155.dev1 → upgini-1.2.156.dev1}/src/upgini/autofe/binary.py +0 -0
  23. {upgini-1.2.155.dev1 → upgini-1.2.156.dev1}/src/upgini/autofe/feature.py +0 -0
  24. {upgini-1.2.155.dev1 → upgini-1.2.156.dev1}/src/upgini/autofe/groupby.py +0 -0
  25. {upgini-1.2.155.dev1 → upgini-1.2.156.dev1}/src/upgini/autofe/operator.py +0 -0
  26. {upgini-1.2.155.dev1 → upgini-1.2.156.dev1}/src/upgini/autofe/timeseries/__init__.py +0 -0
  27. {upgini-1.2.155.dev1 → upgini-1.2.156.dev1}/src/upgini/autofe/timeseries/base.py +0 -0
  28. {upgini-1.2.155.dev1 → upgini-1.2.156.dev1}/src/upgini/autofe/timeseries/cross.py +0 -0
  29. {upgini-1.2.155.dev1 → upgini-1.2.156.dev1}/src/upgini/autofe/timeseries/lag.py +0 -0
  30. {upgini-1.2.155.dev1 → upgini-1.2.156.dev1}/src/upgini/autofe/timeseries/roll.py +0 -0
  31. {upgini-1.2.155.dev1 → upgini-1.2.156.dev1}/src/upgini/autofe/vector.py +0 -0
  32. {upgini-1.2.155.dev1 → upgini-1.2.156.dev1}/src/upgini/data_source/__init__.py +0 -0
  33. {upgini-1.2.155.dev1 → upgini-1.2.156.dev1}/src/upgini/data_source/data_source_publisher.py +0 -0
  34. {upgini-1.2.155.dev1 → upgini-1.2.156.dev1}/src/upgini/dataset.py +0 -0
  35. {upgini-1.2.155.dev1 → upgini-1.2.156.dev1}/src/upgini/errors.py +0 -0
  36. {upgini-1.2.155.dev1 → upgini-1.2.156.dev1}/src/upgini/http.py +0 -0
  37. {upgini-1.2.155.dev1 → upgini-1.2.156.dev1}/src/upgini/mdc/__init__.py +0 -0
  38. {upgini-1.2.155.dev1 → upgini-1.2.156.dev1}/src/upgini/mdc/context.py +0 -0
  39. {upgini-1.2.155.dev1 → upgini-1.2.156.dev1}/src/upgini/metadata.py +0 -0
  40. {upgini-1.2.155.dev1 → upgini-1.2.156.dev1}/src/upgini/metrics.py +0 -0
  41. {upgini-1.2.155.dev1 → upgini-1.2.156.dev1}/src/upgini/normalizer/__init__.py +0 -0
  42. {upgini-1.2.155.dev1 → upgini-1.2.156.dev1}/src/upgini/normalizer/normalize_utils.py +0 -0
  43. {upgini-1.2.155.dev1 → upgini-1.2.156.dev1}/src/upgini/resource_bundle/__init__.py +0 -0
  44. {upgini-1.2.155.dev1 → upgini-1.2.156.dev1}/src/upgini/resource_bundle/exceptions.py +0 -0
  45. {upgini-1.2.155.dev1 → upgini-1.2.156.dev1}/src/upgini/resource_bundle/strings.properties +0 -0
  46. {upgini-1.2.155.dev1 → upgini-1.2.156.dev1}/src/upgini/resource_bundle/strings_widget.properties +0 -0
  47. {upgini-1.2.155.dev1 → upgini-1.2.156.dev1}/src/upgini/sampler/__init__.py +0 -0
  48. {upgini-1.2.155.dev1 → upgini-1.2.156.dev1}/src/upgini/sampler/base.py +0 -0
  49. {upgini-1.2.155.dev1 → upgini-1.2.156.dev1}/src/upgini/sampler/random_under_sampler.py +0 -0
  50. {upgini-1.2.155.dev1 → upgini-1.2.156.dev1}/src/upgini/sampler/utils.py +0 -0
  51. {upgini-1.2.155.dev1 → upgini-1.2.156.dev1}/src/upgini/search_task.py +0 -0
  52. {upgini-1.2.155.dev1 → upgini-1.2.156.dev1}/src/upgini/spinner.py +0 -0
  53. {upgini-1.2.155.dev1 → upgini-1.2.156.dev1}/src/upgini/utils/Roboto-Regular.ttf +0 -0
  54. {upgini-1.2.155.dev1 → upgini-1.2.156.dev1}/src/upgini/utils/__init__.py +0 -0
  55. {upgini-1.2.155.dev1 → upgini-1.2.156.dev1}/src/upgini/utils/base_search_key_detector.py +0 -0
  56. {upgini-1.2.155.dev1 → upgini-1.2.156.dev1}/src/upgini/utils/blocked_time_series.py +0 -0
  57. {upgini-1.2.155.dev1 → upgini-1.2.156.dev1}/src/upgini/utils/config.py +0 -0
  58. {upgini-1.2.155.dev1 → upgini-1.2.156.dev1}/src/upgini/utils/country_utils.py +0 -0
  59. {upgini-1.2.155.dev1 → upgini-1.2.156.dev1}/src/upgini/utils/custom_loss_utils.py +0 -0
  60. {upgini-1.2.155.dev1 → upgini-1.2.156.dev1}/src/upgini/utils/cv_utils.py +0 -0
  61. {upgini-1.2.155.dev1 → upgini-1.2.156.dev1}/src/upgini/utils/datetime_utils.py +0 -0
  62. {upgini-1.2.155.dev1 → upgini-1.2.156.dev1}/src/upgini/utils/deduplicate_utils.py +0 -0
  63. {upgini-1.2.155.dev1 → upgini-1.2.156.dev1}/src/upgini/utils/display_utils.py +0 -0
  64. {upgini-1.2.155.dev1 → upgini-1.2.156.dev1}/src/upgini/utils/email_utils.py +0 -0
  65. {upgini-1.2.155.dev1 → upgini-1.2.156.dev1}/src/upgini/utils/fallback_progress_bar.py +0 -0
  66. {upgini-1.2.155.dev1 → upgini-1.2.156.dev1}/src/upgini/utils/features_validator.py +0 -0
  67. {upgini-1.2.155.dev1 → upgini-1.2.156.dev1}/src/upgini/utils/format.py +0 -0
  68. {upgini-1.2.155.dev1 → upgini-1.2.156.dev1}/src/upgini/utils/hash_utils.py +0 -0
  69. {upgini-1.2.155.dev1 → upgini-1.2.156.dev1}/src/upgini/utils/ip_utils.py +0 -0
  70. {upgini-1.2.155.dev1 → upgini-1.2.156.dev1}/src/upgini/utils/mstats.py +0 -0
  71. {upgini-1.2.155.dev1 → upgini-1.2.156.dev1}/src/upgini/utils/one_hot_encoder.py +0 -0
  72. {upgini-1.2.155.dev1 → upgini-1.2.156.dev1}/src/upgini/utils/phone_utils.py +0 -0
  73. {upgini-1.2.155.dev1 → upgini-1.2.156.dev1}/src/upgini/utils/postal_code_utils.py +0 -0
  74. {upgini-1.2.155.dev1 → upgini-1.2.156.dev1}/src/upgini/utils/progress_bar.py +0 -0
  75. {upgini-1.2.155.dev1 → upgini-1.2.156.dev1}/src/upgini/utils/psi.py +0 -0
  76. {upgini-1.2.155.dev1 → upgini-1.2.156.dev1}/src/upgini/utils/sample_utils.py +0 -0
  77. {upgini-1.2.155.dev1 → upgini-1.2.156.dev1}/src/upgini/utils/sklearn_ext.py +0 -0
  78. {upgini-1.2.155.dev1 → upgini-1.2.156.dev1}/src/upgini/utils/sort.py +0 -0
  79. {upgini-1.2.155.dev1 → upgini-1.2.156.dev1}/src/upgini/utils/target_utils.py +0 -0
  80. {upgini-1.2.155.dev1 → upgini-1.2.156.dev1}/src/upgini/utils/track_info.py +0 -0
  81. {upgini-1.2.155.dev1 → upgini-1.2.156.dev1}/src/upgini/utils/ts_utils.py +0 -0
  82. {upgini-1.2.155.dev1 → upgini-1.2.156.dev1}/src/upgini/utils/warning_counter.py +0 -0
  83. {upgini-1.2.155.dev1 → upgini-1.2.156.dev1}/src/upgini/version_validator.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: upgini
3
- Version: 1.2.155.dev1
3
+ Version: 1.2.156.dev1
4
4
  Summary: Intelligent data search & enrichment for Machine Learning
5
5
  Project-URL: Bug Reports, https://github.com/upgini/upgini/issues
6
6
  Project-URL: Homepage, https://upgini.com/
@@ -0,0 +1 @@
1
+ __version__ = "1.2.156.dev1"
@@ -8,7 +8,7 @@ from pandas.core.arrays.timedeltas import TimedeltaArray
8
8
  from pydantic import BaseModel, __version__ as pydantic_version
9
9
 
10
10
  from upgini.autofe.operator import PandasOperator, ParametrizedOperator
11
- from upgini.autofe.utils import pydantic_validator
11
+ from upgini.autofe.utils import bin_index, bin_index_many, bin_index_vectorized, pydantic_validator
12
12
 
13
13
 
14
14
  def get_pydantic_version():
@@ -254,26 +254,33 @@ class DatePercentileBase(PandasOperator, abc.ABC):
254
254
  left = pd.to_datetime(left, unit=self.date_unit)
255
255
 
256
256
  bounds = self._get_bounds(left)
257
+ values = pd.to_numeric(right, errors="coerce").to_numpy(dtype=np.float64, copy=False)
258
+ bounds_list = bounds.tolist()
259
+ result = np.full(len(values), np.nan)
260
+
261
+ if not bounds_list:
262
+ return pd.Series(result, index=right.index).astype(pd.Int64Dtype()).astype("category")
263
+
264
+ bounds_lengths = {len(b) for b in bounds_list if isinstance(b, (list, np.ndarray))}
265
+ if len(bounds_lengths) == 1 and all(isinstance(b, (list, np.ndarray)) for b in bounds_list):
266
+ bounds_2d = np.asarray(bounds_list, dtype=np.float64)
267
+ if bounds_2d.ndim == 1:
268
+ result = bin_index_vectorized(values, bounds_2d)
269
+ else:
270
+ result = bin_index_many(values, bounds_2d)
271
+ else:
272
+ for i, row_bounds in enumerate(bounds_list):
273
+ if isinstance(row_bounds, (list, np.ndarray)) and len(row_bounds) > 0:
274
+ result[i] = bin_index(values[i], row_bounds)
257
275
 
258
- return (
259
- right.index.to_series()
260
- .apply(lambda i: self._perc(right[i], bounds[i]))
261
- .astype(pd.Int64Dtype())
262
- .astype("category")
263
- )
276
+ return pd.Series(result, index=right.index).astype(pd.Int64Dtype()).astype("category")
264
277
 
265
278
  @abc.abstractmethod
266
279
  def _get_bounds(self, date_col: pd.Series) -> pd.Series:
267
280
  pass
268
281
 
269
282
  def _perc(self, f, bounds):
270
- if f is None or np.isnan(f):
271
- return np.nan
272
- hit = np.where(f >= np.array(bounds))[0]
273
- if hit.size > 0:
274
- return np.max(hit) + 1
275
- else:
276
- return np.nan
283
+ return bin_index(f, bounds)
277
284
 
278
285
  def get_params(self) -> Dict[str, Optional[str]]:
279
286
  res = super().get_params()
@@ -313,13 +320,15 @@ class DatePercentile(DatePercentileBase):
313
320
  return value
314
321
 
315
322
  def _get_bounds(self, date_col: pd.Series) -> pd.Series:
316
- months = date_col.dt.month
317
- years = date_col.dt.year
318
-
319
- month_diffs = 12 * (years - (self.zero_year or 0)) + (months - (self.zero_month or 0))
320
- return month_diffs.apply(
321
- lambda d: np.array(self.zero_bounds if self.zero_bounds is not None else []) + d * self.step
322
- )
323
+ zero_bounds = self.zero_bounds if self.zero_bounds is not None else []
324
+ if not zero_bounds:
325
+ return pd.Series([[] for _ in range(len(date_col))], index=date_col.index)
326
+
327
+ month_diffs = (
328
+ 12 * (date_col.dt.year - (self.zero_year or 0)) + (date_col.dt.month - (self.zero_month or 0))
329
+ ).to_numpy()
330
+ bounds_2d = np.asarray(zero_bounds, dtype=np.float64) + month_diffs[:, None] * self.step
331
+ return pd.Series(list(bounds_2d), index=date_col.index)
323
332
 
324
333
 
325
334
  class DatePercentileMethod2(DatePercentileBase):
@@ -23,6 +23,8 @@ class DeltaBase(TimeSeriesBase):
23
23
  def _calculate_delta(self, x: Union[pd.DataFrame, pd.Series]) -> Union[pd.DataFrame, pd.Series]:
24
24
  return_series = isinstance(x, pd.Series)
25
25
  x = pd.DataFrame(x)
26
+ value_col = x.columns[-1]
27
+ x[value_col] = pd.to_numeric(x[value_col], errors="coerce").astype("float64")
26
28
  lag = Lag(lag_size=self.delta_size, lag_unit=self.delta_unit)
27
29
  x.iloc[:, -1] = x.iloc[:, -1] - lag._aggregate(x.iloc[:, -1])
28
30
  return x.iloc[:, -1] if return_series else x
@@ -54,12 +54,14 @@ class TrendCoefficient(TimeSeriesBase, ParametrizedOperator):
54
54
  return_series = isinstance(x, pd.Series)
55
55
  x = pd.DataFrame(x)
56
56
  resampled = (
57
- x.iloc[:, -1].resample(f"{self.step_size}{self.step_unit}").fillna(method="ffill").fillna(method="bfill")
57
+ x.iloc[:, -1].resample(f"{self.step_size}{self.step_unit}").ffill().bfill()
58
58
  )
59
59
  idx = np.arange(len(resampled))
60
+ value_col = x.columns[-1]
61
+ x[value_col] = pd.to_numeric(x[value_col], errors="coerce").astype("float64")
60
62
  try:
61
63
  coeffs = np.polyfit(idx, resampled, 1)
62
- x.iloc[:, -1] = coeffs[0]
64
+ x.iloc[:, -1] = float(coeffs[0])
63
65
  except np.linalg.LinAlgError:
64
- x.iloc[:, -1] = 0
66
+ x.iloc[:, -1] = 0.0
65
67
  return x.iloc[:, -1] if return_series else x
@@ -65,6 +65,8 @@ class EWMAVolatility(VolatilityBase, ParametrizedOperator):
65
65
  def _ewma_vol(self, x):
66
66
  return_series = isinstance(x, pd.Series)
67
67
  x = pd.DataFrame(x)
68
+ value_col = x.columns[-1]
69
+ x[value_col] = pd.to_numeric(x[value_col], errors="coerce").astype("float64")
68
70
  returns = self._get_returns(x.iloc[:, -1], f"{self.step_size}{self.step_unit}")
69
71
  x.iloc[:, -1] = returns.ewm(span=self.window_size).std()
70
72
  return x.iloc[:, -1] if return_series else x
@@ -93,6 +95,8 @@ class RollingVolBase(VolatilityBase):
93
95
  ) -> Union[pd.DataFrame, pd.Series]:
94
96
  return_series = isinstance(x, pd.Series)
95
97
  x = pd.DataFrame(x)
98
+ value_col = x.columns[-1]
99
+ x[value_col] = pd.to_numeric(x[value_col], errors="coerce").astype("float64")
96
100
  returns = self._get_returns(x.iloc[:, -1], f"{self.step_size}{self.step_unit}")
97
101
  if abs_returns:
98
102
  returns = returns.abs()
@@ -4,7 +4,7 @@ import numpy as np
4
4
  import pandas as pd
5
5
 
6
6
  from upgini.autofe.operator import PandasOperator, ParametrizedOperator, VectorizableMixin
7
- from upgini.autofe.utils import pydantic_validator
7
+ from upgini.autofe.utils import bin_index, bin_index_vectorized, pydantic_validator
8
8
 
9
9
 
10
10
  class Abs(PandasOperator, VectorizableMixin):
@@ -163,16 +163,13 @@ class Bin(PandasOperator):
163
163
  is_categorical: bool = True
164
164
 
165
165
  def calculate_unary(self, data: pd.Series) -> pd.Series:
166
- return data.apply(self._bin, bounds=self.bin_bounds).fillna(-1).astype(int).astype("category")
166
+ bounds_arr = np.asarray(self.bin_bounds, dtype=np.float64)
167
+ values = pd.to_numeric(data, errors="coerce").to_numpy(dtype=np.float64, copy=False)
168
+ result = bin_index_vectorized(values, bounds_arr)
169
+ return pd.Series(result, index=data.index).fillna(-1).astype(int).astype("category")
167
170
 
168
171
  def _bin(self, f, bounds):
169
- if f is None or np.isnan(f):
170
- return np.nan
171
- hit = np.where(f >= np.array(bounds))[0]
172
- if hit.size > 0:
173
- return np.max(hit) + 1
174
- else:
175
- return np.nan
172
+ return bin_index(f, bounds)
176
173
 
177
174
  def get_params(self) -> Dict[str, Optional[str]]:
178
175
  res = super().get_params()
@@ -3,8 +3,9 @@ Utility functions for autofe module.
3
3
  """
4
4
 
5
5
  import functools
6
- from typing import Callable
6
+ from typing import Callable, Union
7
7
 
8
+ import numpy as np
8
9
  from pydantic import BaseModel
9
10
 
10
11
 
@@ -111,3 +112,44 @@ def pydantic_copy_method(obj):
111
112
  return obj.model_copy
112
113
  else:
113
114
  return obj.copy
115
+
116
+
117
+ def bin_index(value: Union[float, int, None], bounds) -> float:
118
+ if value is None or (isinstance(value, float) and np.isnan(value)):
119
+ return np.nan
120
+ bounds_arr = np.asarray(bounds, dtype=np.float64)
121
+ if bounds_arr.size == 0 or value < bounds_arr[0]:
122
+ return np.nan
123
+ return np.searchsorted(bounds_arr, value, side="right")
124
+
125
+
126
+ def bin_index_vectorized(values: np.ndarray, bounds: np.ndarray) -> np.ndarray:
127
+ n = len(values)
128
+ result = np.full(n, np.nan)
129
+ bounds_arr = np.asarray(bounds, dtype=np.float64)
130
+ if bounds_arr.size == 0:
131
+ return result
132
+ valid = ~np.isnan(values)
133
+ if not valid.any():
134
+ return result
135
+ valid_values = values[valid]
136
+ idx = np.searchsorted(bounds_arr, valid_values, side="right").astype(np.float64)
137
+ below = valid_values < bounds_arr[0]
138
+ if below.any():
139
+ idx[below] = np.nan
140
+ result[valid] = idx
141
+ return result
142
+
143
+
144
+ def bin_index_many(values: np.ndarray, bounds_2d: np.ndarray) -> np.ndarray:
145
+ n = len(values)
146
+ result = np.full(n, np.nan)
147
+ for i in range(n):
148
+ v = values[i]
149
+ if np.isnan(v):
150
+ continue
151
+ bounds_row = bounds_2d[i]
152
+ if bounds_row.size == 0 or v < bounds_row[0]:
153
+ continue
154
+ result[i] = np.searchsorted(bounds_row, v, side="right")
155
+ return result
@@ -1537,10 +1537,15 @@ class FeaturesEnricher(TransformerMixin):
1537
1537
 
1538
1538
  checking_eval_set_df[date_column] = date_converter.to_date_ms(eval_set_dates[selected_eval_set_idx].to_frame())
1539
1539
 
1540
- cat_features = [c for c in cat_features if c in checking_eval_set_df.columns]
1540
+ baseline_score_column = self._get_renamed_baseline_score_column()
1541
+ psi_df = checking_eval_set_df
1542
+ if baseline_score_column and baseline_score_column in psi_df.columns:
1543
+ psi_df = psi_df.drop(columns=[baseline_score_column])
1544
+
1545
+ cat_features = [c for c in cat_features if c in psi_df.columns]
1541
1546
 
1542
1547
  psi_values_sparse = calculate_sparsity_psi(
1543
- checking_eval_set_df, cat_features, date_column, self.logger, model_task_type
1548
+ psi_df, cat_features, date_column, self.logger, model_task_type
1544
1549
  )
1545
1550
 
1546
1551
  self.logger.info(f"PSI values by sparsity: {psi_values_sparse}")
@@ -1550,7 +1555,7 @@ class FeaturesEnricher(TransformerMixin):
1550
1555
  self.logger.info(f"Unstable by sparsity features ({stability_threshold}): {sorted(unstable_by_sparsity)}")
1551
1556
 
1552
1557
  psi_values = calculate_features_psi(
1553
- checking_eval_set_df, cat_features, date_column, self.logger, model_task_type, stability_agg_func
1558
+ psi_df, cat_features, date_column, self.logger, model_task_type, stability_agg_func
1554
1559
  )
1555
1560
 
1556
1561
  self.logger.info(f"PSI values by value: {psi_values}")
@@ -1564,6 +1569,8 @@ class FeaturesEnricher(TransformerMixin):
1564
1569
  }
1565
1570
 
1566
1571
  total_unstable_features = sorted(set(unstable_by_sparsity + unstable_by_value))
1572
+ if baseline_score_column:
1573
+ total_unstable_features = [f for f in total_unstable_features if f != baseline_score_column]
1567
1574
 
1568
1575
  return total_unstable_features
1569
1576
 
@@ -1753,6 +1760,15 @@ class FeaturesEnricher(TransformerMixin):
1753
1760
  raise ValidationError(self.bundle.get("cat_feature_search_key").format(cat_feature))
1754
1761
  return cat_features, search_keys_for_metrics
1755
1762
 
1763
+ def _get_renamed_baseline_score_column(self, columns_renaming: dict[str, str] | None = None) -> str | None:
1764
+ if self.baseline_score_column is None:
1765
+ return None
1766
+ if columns_renaming:
1767
+ return columns_renaming.get(self.baseline_score_column, self.baseline_score_column)
1768
+ if self.fit_columns_renaming:
1769
+ return self.fit_columns_renaming.get(self.baseline_score_column, self.baseline_score_column)
1770
+ return self.baseline_score_column
1771
+
1756
1772
  def _get_cat_features_for_psi(
1757
1773
  self,
1758
1774
  client_cat_features: list[str] | None,
@@ -104,7 +104,8 @@ def _get_feature_sample(feature_meta: FeaturesMetadataV2, data: Optional[pd.Data
104
104
  if data is not None and len(data) > 0 and feature_meta.name in data.columns:
105
105
  if len(data) > 3:
106
106
  rand = np.random.RandomState(42)
107
- feature_sample = rand.choice(data[feature_meta.name].dropna().unique(), 3).tolist()
107
+ unique_values = sorted(data[feature_meta.name].dropna().unique(), key=str)
108
+ feature_sample = rand.choice(unique_values, 3, replace=False).tolist()
108
109
  else:
109
110
  feature_sample = data[feature_meta.name].dropna().unique().tolist()
110
111
  if len(feature_sample) > 0 and isinstance(feature_sample[0], float):
@@ -1 +0,0 @@
1
- __version__ = "1.2.155.dev1"
File without changes
File without changes
File without changes