upgini 1.2.59a3818.dev1__py3-none-any.whl → 1.2.60a3792.dev1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of upgini might be problematic. Click here for more details.

upgini/__about__.py CHANGED
@@ -1 +1 @@
1
- __version__ = "1.2.59a3818.dev1"
1
+ __version__ = "1.2.60a3792.dev1"
upgini/autofe/vector.py CHANGED
@@ -55,7 +55,7 @@ class TimeSeriesBase(PandasOperand, abc.ABC):
55
55
  ts.set_index(date.name, inplace=True)
56
56
  ts = ts[ts.index.notna()].sort_index()
57
57
  ts = (
58
- ts.groupby([c.name for c in data[1:-1]], group_keys=True)
58
+ ts.groupby([c.name for c in data[1:-1]])
59
59
  .apply(self._shift)[data[-1].name]
60
60
  .to_frame()
61
61
  .reset_index()
@@ -386,6 +386,7 @@ class DataSourcePublisher:
386
386
  search_keys = [k.value.value for k in search_keys] if search_keys else None
387
387
  request = {"bqTableId": bq_table_id, "searchKeys": search_keys}
388
388
  task_id = self._rest_client.upload_online(request, trace_id)
389
+ print(f"Uploading online task created. task_id={task_id}")
389
390
  with Spinner():
390
391
  status_response = self._rest_client.poll_ads_management_task_status(task_id, trace_id)
391
392
  while status_response["status"] not in self.FINAL_STATUSES:
upgini/dataset.py CHANGED
@@ -41,6 +41,7 @@ from upgini.utils.target_utils import (
41
41
  balance_undersample,
42
42
  balance_undersample_forced,
43
43
  balance_undersample_time_series,
44
+ balance_undersample_time_series_trunc,
44
45
  )
45
46
 
46
47
  try:
@@ -58,6 +59,8 @@ class Dataset: # (pd.DataFrame):
58
59
  FIT_SAMPLE_THRESHOLD = 200_000
59
60
  FIT_SAMPLE_WITH_EVAL_SET_ROWS = 200_000
60
61
  FIT_SAMPLE_WITH_EVAL_SET_THRESHOLD = 200_000
62
+ FIT_SAMPLE_THRESHOLD_TS = 54_000
63
+ FIT_SAMPLE_ROWS_TS = 54_000
61
64
  BINARY_MIN_SAMPLE_THRESHOLD = 5_000
62
65
  MULTICLASS_MIN_SAMPLE_THRESHOLD = 25_000
63
66
  IMBALANCE_THESHOLD = 0.6
@@ -304,6 +307,9 @@ class Dataset: # (pd.DataFrame):
304
307
  if not self.imbalanced and EVAL_SET_INDEX in self.data.columns:
305
308
  sample_threshold = self.FIT_SAMPLE_WITH_EVAL_SET_THRESHOLD
306
309
  sample_rows = self.FIT_SAMPLE_WITH_EVAL_SET_ROWS
310
+ elif self.cv_type is not None and self.cv_type.is_time_series():
311
+ sample_threshold = self.FIT_SAMPLE_THRESHOLD_TS
312
+ sample_rows = self.FIT_SAMPLE_ROWS_TS
307
313
  else:
308
314
  sample_threshold = self.FIT_SAMPLE_THRESHOLD
309
315
  sample_rows = self.FIT_SAMPLE_ROWS
@@ -314,7 +320,7 @@ class Dataset: # (pd.DataFrame):
314
320
  f"and will be downsampled to {sample_rows}"
315
321
  )
316
322
  if self.cv_type is not None and self.cv_type.is_time_series():
317
- resampled_data = balance_undersample_time_series(
323
+ resampled_data = balance_undersample_time_series_trunc(
318
324
  df=self.data,
319
325
  id_columns=self.id_columns,
320
326
  date_column=next(
@@ -584,19 +590,31 @@ class Dataset: # (pd.DataFrame):
584
590
  return search_customization
585
591
 
586
592
  def _rename_generate_features(self, runtime_parameters: Optional[RuntimeParameters]) -> Optional[RuntimeParameters]:
587
- if (
588
- runtime_parameters is not None
589
- and runtime_parameters.properties is not None
590
- and "generate_features" in runtime_parameters.properties
591
- ):
592
- generate_features = runtime_parameters.properties["generate_features"].split(",")
593
- renamed_generate_features = []
594
- for f in generate_features:
595
- for new_column, orig_column in self.columns_renaming.items():
596
- if f == orig_column:
597
- renamed_generate_features.append(new_column)
598
- runtime_parameters.properties["generate_features"] = ",".join(renamed_generate_features)
593
+ if runtime_parameters is not None and runtime_parameters.properties is not None:
594
+ if "generate_features" in runtime_parameters.properties:
595
+ generate_features = runtime_parameters.properties["generate_features"].split(",")
596
+ renamed_generate_features = []
597
+ for f in generate_features:
598
+ for new_column, orig_column in self.columns_renaming.items():
599
+ if f == orig_column:
600
+ renamed_generate_features.append(new_column)
601
+ runtime_parameters.properties["generate_features"] = ",".join(renamed_generate_features)
602
+ if "columns_for_online_api" in runtime_parameters.properties:
603
+ columns_for_online_api = runtime_parameters.properties["columns_for_online_api"].split(",")
604
+ renamed_columns_for_online_api = []
605
+ for f in columns_for_online_api:
606
+ for new_column, orig_column in self.columns_renaming.items():
607
+ if f == orig_column:
608
+ renamed_columns_for_online_api.append(new_column)
609
+ runtime_parameters.properties["columns_for_online_api"] = ",".join(renamed_columns_for_online_api)
610
+
611
+ return runtime_parameters
599
612
 
613
+ def _set_sample_size(self, runtime_parameters: Optional[RuntimeParameters]) -> Optional[RuntimeParameters]:
614
+ if runtime_parameters is not None and runtime_parameters.properties is not None:
615
+ if self.cv_type is not None and self.cv_type.is_time_series():
616
+ runtime_parameters.properties["sample_size"] = self.FIT_SAMPLE_ROWS_TS
617
+ runtime_parameters.properties["iter0_sample_size"] = self.FIT_SAMPLE_ROWS_TS
600
618
  return runtime_parameters
601
619
 
602
620
  def _clean_generate_features(self, runtime_parameters: Optional[RuntimeParameters]) -> Optional[RuntimeParameters]:
@@ -630,6 +648,7 @@ class Dataset: # (pd.DataFrame):
630
648
  file_metrics = FileMetrics()
631
649
 
632
650
  runtime_parameters = self._rename_generate_features(runtime_parameters)
651
+ runtime_parameters = self._set_sample_size(runtime_parameters)
633
652
 
634
653
  file_metadata = self.__construct_metadata(exclude_features_sources)
635
654
  search_customization = self.__construct_search_customization(
@@ -222,6 +222,7 @@ class FeaturesEnricher(TransformerMixin):
222
222
  loss: Optional[str] = None,
223
223
  detect_missing_search_keys: bool = True,
224
224
  generate_features: Optional[List[str]] = None,
225
+ columns_for_online_api: Optional[List[str]] = None,
225
226
  round_embeddings: Optional[int] = None,
226
227
  logs_enabled: bool = True,
227
228
  raise_validation_error: bool = True,
@@ -345,6 +346,9 @@ class FeaturesEnricher(TransformerMixin):
345
346
  self.logger.error(msg)
346
347
  raise ValidationError(msg)
347
348
  self.runtime_parameters.properties["round_embeddings"] = round_embeddings
349
+ self.columns_for_online_api = columns_for_online_api
350
+ if columns_for_online_api is not None:
351
+ self.runtime_parameters.properties["columns_for_online_api"] = ",".join(columns_for_online_api)
348
352
  maybe_downsampling_limit = self.runtime_parameters.properties.get("downsampling_limit")
349
353
  if maybe_downsampling_limit is not None:
350
354
  Dataset.FIT_SAMPLE_THRESHOLD = int(maybe_downsampling_limit)
@@ -1873,13 +1877,9 @@ class FeaturesEnricher(TransformerMixin):
1873
1877
 
1874
1878
  # downsample if need to eval_set threshold
1875
1879
  num_samples = _num_samples(df)
1876
- phone_column = self._get_phone_column(self.search_keys)
1877
1880
  force_downsampling = (
1878
1881
  not self.disable_force_downsampling
1879
- and self.generate_features is not None
1880
- and phone_column is not None
1881
- and self.fit_columns_renaming is not None
1882
- and self.fit_columns_renaming.get(phone_column) in self.generate_features
1882
+ and self.columns_for_online_api is not None
1883
1883
  and num_samples > Dataset.FORCE_SAMPLE_SIZE
1884
1884
  )
1885
1885
  if force_downsampling:
@@ -1948,7 +1948,27 @@ class FeaturesEnricher(TransformerMixin):
1948
1948
  df, _ = clean_full_duplicates(df, logger=self.logger, bundle=self.bundle)
1949
1949
 
1950
1950
  num_samples = _num_samples(df)
1951
- if num_samples > Dataset.FIT_SAMPLE_THRESHOLD:
1951
+ force_downsampling = (
1952
+ not self.disable_force_downsampling
1953
+ and self.columns_for_online_api is not None
1954
+ and num_samples > Dataset.FORCE_SAMPLE_SIZE
1955
+ )
1956
+ if force_downsampling:
1957
+ self.logger.info(f"Force downsampling from {num_samples} to {Dataset.FORCE_SAMPLE_SIZE}")
1958
+ df = balance_undersample_forced(
1959
+ df=df,
1960
+ target_column=TARGET,
1961
+ id_columns=self.id_columns,
1962
+ date_column=self._get_date_column(self.search_keys),
1963
+ task_type=self.model_task_type,
1964
+ cv_type=self.cv,
1965
+ random_state=self.random_state,
1966
+ sample_size=Dataset.FORCE_SAMPLE_SIZE,
1967
+ logger=self.logger,
1968
+ bundle=self.bundle,
1969
+ warning_callback=self.__log_warning,
1970
+ )
1971
+ elif num_samples > Dataset.FIT_SAMPLE_THRESHOLD:
1952
1972
  self.logger.info(f"Downsampling from {num_samples} to {Dataset.FIT_SAMPLE_ROWS}")
1953
1973
  df = df.sample(n=Dataset.FIT_SAMPLE_ROWS, random_state=self.random_state)
1954
1974
 
@@ -2620,17 +2640,18 @@ if response.status_code == 200:
2620
2640
  checked_generate_features = []
2621
2641
  for gen_feature in self.generate_features:
2622
2642
  if gen_feature not in x_columns:
2623
- if gen_feature == self._get_phone_column(self.search_keys):
2624
- raise ValidationError(
2625
- self.bundle.get("missing_generate_feature").format(gen_feature, x_columns)
2626
- )
2627
- else:
2628
- self.__log_warning(self.bundle.get("missing_generate_feature").format(gen_feature, x_columns))
2643
+ msg = self.bundle.get("missing_generate_feature").format(gen_feature, x_columns)
2644
+ self.__log_warning(msg)
2629
2645
  else:
2630
2646
  checked_generate_features.append(gen_feature)
2631
2647
  self.generate_features = checked_generate_features
2632
2648
  self.runtime_parameters.properties["generate_features"] = ",".join(self.generate_features)
2633
2649
 
2650
+ if self.columns_for_online_api is not None and len(self.columns_for_online_api) > 0:
2651
+ for column in self.columns_for_online_api:
2652
+ if column not in validated_X.columns:
2653
+ raise ValidationError(self.bundle.get("missing_column_for_online_api").format(column))
2654
+
2634
2655
  if self.id_columns is not None:
2635
2656
  for id_column in self.id_columns:
2636
2657
  if id_column not in validated_X.columns:
@@ -2852,9 +2873,7 @@ if response.status_code == 200:
2852
2873
  # Force downsampling to 7000 for API features generation
2853
2874
  force_downsampling = (
2854
2875
  not self.disable_force_downsampling
2855
- and self.generate_features is not None
2856
- and phone_column is not None
2857
- and self.fit_columns_renaming[phone_column] in self.generate_features
2876
+ and self.columns_for_online_api is not None
2858
2877
  and len(df) > Dataset.FORCE_SAMPLE_SIZE
2859
2878
  )
2860
2879
  if force_downsampling:
@@ -111,6 +111,7 @@ x_is_empty=X is empty
111
111
  y_is_empty=y is empty
112
112
  x_contains_reserved_column_name=Column name {} is reserved. Please rename column and try again
113
113
  missing_generate_feature=Feature {} specified in `generate_features` is not present in input columns: {}
114
+ missing_column_for_online_api=Column {} specified in `columns_for_online_api` is not present in input columns: {}
114
115
  x_unstable_by_date=Your training sample is unstable in number of rows per date. It is recommended to redesign the training sample
115
116
  train_unstable_target=Your training sample contains an unstable target event, PSI = {}. This will lead to unstable scoring on deferred samples. It is recommended to redesign the training sample
116
117
  eval_unstable_target=Your training and evaluation samples have a difference in target distribution. PSI = {}. The results will be unstable. It is recommended to redesign the training and evaluation samples
@@ -116,17 +116,17 @@ class EmailSearchKeyConverter:
116
116
  else:
117
117
  df[self.hem_column] = df[self.hem_column].astype("string").str.lower()
118
118
 
119
- del self.search_keys[self.email_column]
120
- if self.email_column in self.unnest_search_keys:
121
- self.unnest_search_keys.remove(self.email_column)
119
+ # del self.search_keys[self.email_column]
120
+ # if self.email_column in self.unnest_search_keys:
121
+ # self.unnest_search_keys.remove(self.email_column)
122
122
 
123
123
  one_domain_name = self.email_column + self.ONE_DOMAIN_SUFFIX
124
124
  df[one_domain_name] = df[self.email_column].apply(self._email_to_one_domain)
125
125
  self.columns_renaming[one_domain_name] = original_email_column
126
126
  self.search_keys[one_domain_name] = SearchKey.EMAIL_ONE_DOMAIN
127
127
 
128
- if self.email_converted_to_hem:
129
- df = df.drop(columns=self.email_column)
130
- del self.columns_renaming[self.email_column]
128
+ # if self.email_converted_to_hem:
129
+ # df = df.drop(columns=self.email_column)
130
+ # del self.columns_renaming[self.email_column]
131
131
 
132
132
  return df
@@ -10,6 +10,7 @@ from upgini.errors import ValidationError
10
10
  from upgini.metadata import SYSTEM_RECORD_ID, CVType, ModelTaskType
11
11
  from upgini.resource_bundle import ResourceBundle, bundle, get_custom_bundle
12
12
  from upgini.sampler.random_under_sampler import RandomUnderSampler
13
+ from upgini.utils.ts_utils import get_most_frequent_time_unit, trunc_datetime
13
14
 
14
15
  TS_MIN_DIFFERENT_IDS_RATIO = 0.2
15
16
 
@@ -241,7 +242,7 @@ def balance_undersample_forced(
241
242
  df = df.copy().sort_values(by=SYSTEM_RECORD_ID)
242
243
  if cv_type is not None and cv_type.is_time_series():
243
244
  logger.warning(f"Sampling time series dataset from {len(df)} to {sample_size}")
244
- resampled_data = balance_undersample_time_series(
245
+ resampled_data = balance_undersample_time_series_trunc(
245
246
  df,
246
247
  id_columns=id_columns,
247
248
  date_column=date_column,
@@ -280,6 +281,58 @@ def balance_undersample_forced(
280
281
  return resampled_data
281
282
 
282
283
 
284
+ DEFAULT_HIGH_FREQ_TRUNC_LENGTHS = [pd.DateOffset(years=2, months=6), pd.DateOffset(years=2, days=7)]
285
+ DEFAULT_LOW_FREQ_TRUNC_LENGTHS = [pd.DateOffset(years=7), pd.DateOffset(years=5)]
286
+ DEFAULT_TIME_UNIT_THRESHOLD = pd.Timedelta(weeks=4)
287
+
288
+
289
+ def balance_undersample_time_series_trunc(
290
+ df: pd.DataFrame,
291
+ id_columns: List[str],
292
+ date_column: str,
293
+ sample_size: int,
294
+ random_state: int = 42,
295
+ logger: Optional[logging.Logger] = None,
296
+ highfreq_trunc_lengths: List[pd.DateOffset] = DEFAULT_HIGH_FREQ_TRUNC_LENGTHS,
297
+ lowfreq_trunc_lengths: List[pd.DateOffset] = DEFAULT_LOW_FREQ_TRUNC_LENGTHS,
298
+ time_unit_threshold: pd.Timedelta = DEFAULT_TIME_UNIT_THRESHOLD,
299
+ **kwargs,
300
+ ):
301
+ # Convert date column to datetime
302
+ dates_df = df[id_columns + [date_column]].copy()
303
+ dates_df[date_column] = pd.to_datetime(dates_df[date_column], unit="ms")
304
+
305
+ time_unit = get_most_frequent_time_unit(dates_df, id_columns, date_column)
306
+ if logger is not None:
307
+ logger.info(f"Time unit: {time_unit}")
308
+
309
+ if time_unit is None:
310
+ if logger is not None:
311
+ logger.info("Cannot detect time unit, returning original dataset")
312
+ return df
313
+
314
+ if time_unit < time_unit_threshold:
315
+ for trunc_length in highfreq_trunc_lengths:
316
+ sampled_df = trunc_datetime(dates_df, id_columns, date_column, trunc_length)
317
+ if len(sampled_df) <= sample_size:
318
+ break
319
+ if len(sampled_df) > sample_size:
320
+ sampled_df = balance_undersample_time_series(
321
+ sampled_df, id_columns, date_column, sample_size, random_state, logger=logger, **kwargs
322
+ )
323
+ else:
324
+ for trunc_length in lowfreq_trunc_lengths:
325
+ sampled_df = trunc_datetime(dates_df, id_columns, date_column, trunc_length)
326
+ if len(sampled_df) <= sample_size:
327
+ break
328
+ if len(sampled_df) > sample_size:
329
+ sampled_df = balance_undersample_time_series(
330
+ sampled_df, id_columns, date_column, sample_size, random_state, logger=logger, **kwargs
331
+ )
332
+
333
+ return df.loc[sampled_df.index]
334
+
335
+
283
336
  def balance_undersample_time_series(
284
337
  df: pd.DataFrame,
285
338
  id_columns: List[str],
@@ -0,0 +1,47 @@
1
+ import logging
2
+ from typing import List, Optional
3
+ import pandas as pd
4
+
5
+
6
+ def get_most_frequent_time_unit(df: pd.DataFrame, id_columns: List[str], date_column: str) -> Optional[pd.DateOffset]:
7
+
8
+ def closest_unit(diff):
9
+ return pd.tseries.frequencies.to_offset(pd.Timedelta(diff, unit="s"))
10
+
11
+ # Calculate differences for each ID group
12
+ all_diffs = []
13
+ groups = df.groupby(id_columns) if id_columns else [(None, df)]
14
+ for _, group in groups:
15
+ # Get sorted dates for this group
16
+ group_dates = group[date_column].sort_values().unique()
17
+ if len(group_dates) > 1:
18
+ # Calculate time differences between consecutive dates
19
+ diff_series = pd.Series(group_dates[1:] - group_dates[:-1])
20
+ # Convert to nanoseconds
21
+ diff_ns = diff_series.dt.total_seconds()
22
+ all_diffs.extend(diff_ns)
23
+
24
+ # Convert to series for easier processing
25
+ all_diffs = pd.Series(all_diffs)
26
+
27
+ # Get most common time unit across all groups
28
+ most_frequent_unit = all_diffs.apply(closest_unit).mode().min()
29
+
30
+ return most_frequent_unit if isinstance(most_frequent_unit, pd.DateOffset) else None
31
+
32
+
33
+ def trunc_datetime(
34
+ df: pd.DataFrame,
35
+ id_columns: List[str],
36
+ date_column: str,
37
+ length: pd.DateOffset,
38
+ logger: Optional[logging.Logger] = None,
39
+ ) -> pd.DataFrame:
40
+ if logger is not None:
41
+ logger.info(f"Truncating time series dataset to {length}")
42
+
43
+ if id_columns:
44
+ min_datetime = df.groupby(id_columns)[date_column].transform(lambda group: group.max() - length)
45
+ else:
46
+ min_datetime = df[date_column].max() - length
47
+ return df[df[date_column] > min_datetime]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: upgini
3
- Version: 1.2.59a3818.dev1
3
+ Version: 1.2.60a3792.dev1
4
4
  Summary: Intelligent data search & enrichment for Machine Learning
5
5
  Project-URL: Bug Reports, https://github.com/upgini/upgini/issues
6
6
  Project-URL: Homepage, https://upgini.com/
@@ -1,9 +1,9 @@
1
- upgini/__about__.py,sha256=z4el4nWucz8yDnQ3Kw0cOsmIM0He3P64K0v3ZqFnhaI,33
1
+ upgini/__about__.py,sha256=yYjoHiqKj96yFzYqXlsnJPzF_FcgZvyGwKBQjTVsNi4,33
2
2
  upgini/__init__.py,sha256=LXSfTNU0HnlOkE69VCxkgIKDhWP-JFo_eBQ71OxTr5Y,261
3
3
  upgini/ads.py,sha256=nvuRxRx5MHDMgPr9SiU-fsqRdFaBv8p4_v1oqiysKpc,2714
4
- upgini/dataset.py,sha256=vT4JyHmafLNbj54SySXr93f5hNS6-t94aFslbBy-7No,33535
4
+ upgini/dataset.py,sha256=iSZX4KiDJlJFukNnAzBgkuT3UqbS-pyOyJlVXwTyaU0,34993
5
5
  upgini/errors.py,sha256=2b_Wbo0OYhLUbrZqdLIx5jBnAsiD1Mcenh-VjR4HCTw,950
6
- upgini/features_enricher.py,sha256=FkAKQV_XOXTobwOXpdy9BPfRkL4fkgoNa2B6NniiCrs,201554
6
+ upgini/features_enricher.py,sha256=IXU6ahvQqMGLdZsrHCjOGEia1pBAgixfld3pNVPcGEM,202468
7
7
  upgini/http.py,sha256=ud0Cp7h0jNeHuuZGpU_1dAAEiabGoJjGxc1X5oeBQr4,43496
8
8
  upgini/lazy_import.py,sha256=74gQ8JuA48BGRLxAo7lNHNKY2D2emMxrUxKGdxVGhuY,1012
9
9
  upgini/metadata.py,sha256=Jh6YTaS00m_nbaOY_owvlSyn9zgkErkqu8iTr9ZjKI8,12279
@@ -21,16 +21,16 @@ upgini/autofe/feature.py,sha256=zvRdlxCkaOsX0XiragNvh0tAPyOWut0MQTq5JGU5HtY,1474
21
21
  upgini/autofe/groupby.py,sha256=G48_sQZw016eGx3cOy8YQrEIOp95puWqYUpFWd-gdeM,3595
22
22
  upgini/autofe/operand.py,sha256=8Ttrfxv_H91dMbS7J55zxluzAJHfGXU_Y2xCh4OHwb8,4774
23
23
  upgini/autofe/unary.py,sha256=T3E7F3dA_7o_rkdCFq7JV6nHLzcoHLHQTcxO7y5Opa4,4646
24
- upgini/autofe/vector.py,sha256=_ZHgAnVG0O86HBGBoJxTPbn5VoFMdsim-eaFImXjXCM,7127
24
+ upgini/autofe/vector.py,sha256=udkg4pP7IIeLjt0Cg6rzEKUmGaubOnqsEz3bz9R6E44,7110
25
25
  upgini/data_source/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
26
- upgini/data_source/data_source_publisher.py,sha256=X-8aGtVgzGmxyXkMVBoBLIGDMb4lYQaGZbxDnOd4A3Q,22516
26
+ upgini/data_source/data_source_publisher.py,sha256=0vaYz5v3KclJnA6jAWiTUiMQO5mbBTBINWV9jr2F5xM,22591
27
27
  upgini/mdc/__init__.py,sha256=aM08nIWFc2gWdWUa3_IuEnNND0cQPkBGnYpRMnfFN8k,1019
28
28
  upgini/mdc/context.py,sha256=3u1B-jXt7tXEvNcV3qmR9SDCseudnY7KYsLclBdwVLk,1405
29
29
  upgini/normalizer/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
30
30
  upgini/normalizer/normalize_utils.py,sha256=Ft2MwSgVoBilXAORAOYAuwPD79GOLfwn4qQE3IUFzzg,7218
31
31
  upgini/resource_bundle/__init__.py,sha256=S5F2G47pnJd2LDpmFsjDqEwiKkP8Hm-hcseDbMka6Ko,8345
32
32
  upgini/resource_bundle/exceptions.py,sha256=5fRvx0_vWdE1-7HcSgF0tckB4A9AKyf5RiinZkInTsI,621
33
- upgini/resource_bundle/strings.properties,sha256=0_KAExIi1u48N1CQ13LKJS3bgDlRs-MPOyU3VxcE-qY,27350
33
+ upgini/resource_bundle/strings.properties,sha256=UXMiaFP3p-WdiXyZJN3O_OZstb-F33BWVDxDiofyxd4,27464
34
34
  upgini/resource_bundle/strings_widget.properties,sha256=gOdqvZWntP2LCza_tyVk1_yRYcG4c04K9sQOAVhF_gw,1577
35
35
  upgini/sampler/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
36
36
  upgini/sampler/base.py,sha256=7GpjYqjOp58vYcJLiX__1R5wjUlyQbxvHJ2klFnup_M,6389
@@ -46,7 +46,7 @@ upgini/utils/cv_utils.py,sha256=w6FQb9nO8BWDx88EF83NpjPLarK4eR4ia0Wg0kLBJC4,3525
46
46
  upgini/utils/datetime_utils.py,sha256=RVAk4_rakK8X9zjybK3-rj0to0e3elye8tnBuA4wTWU,13491
47
47
  upgini/utils/deduplicate_utils.py,sha256=SMZx9IKIhWI5HqXepfKiQb3uDJrogQZtG6jcWuMo5Z4,8855
48
48
  upgini/utils/display_utils.py,sha256=DsBjJ8jEYAh8BPgfAbzq5imoGFV6IACP20PQ78BQCX0,11964
49
- upgini/utils/email_utils.py,sha256=GbnhHJn1nhUBytmK6PophYqaoq4t7Lp6i0-O0Gd3RV8,5265
49
+ upgini/utils/email_utils.py,sha256=pZ2vCfNxLIPUhxr0-OlABNXm12jjU44isBk8kGmqQzA,5277
50
50
  upgini/utils/fallback_progress_bar.py,sha256=PDaKb8dYpVZaWMroNcOHsTc3pSjgi9mOm0--cOFTwJ0,1074
51
51
  upgini/utils/feature_info.py,sha256=0rOXSyCj-sw-8migWP0ge8qrOzGU50dQvH0JUJUrDfQ,6766
52
52
  upgini/utils/features_validator.py,sha256=lEfmk4DoxZ4ooOE1HC0ZXtUb_lFKRFHIrnFULZ4_rL8,3746
@@ -56,10 +56,11 @@ upgini/utils/phone_utils.py,sha256=IrbztLuOJBiePqqxllfABWfYlfAjYevPhXKipl95wUI,1
56
56
  upgini/utils/postal_code_utils.py,sha256=5M0sUqH2DAr33kARWCTXR-ACyzWbjDq_-0mmEml6ZcU,1716
57
57
  upgini/utils/progress_bar.py,sha256=N-Sfdah2Hg8lXP_fV9EfUTXz_PyRt4lo9fAHoUDOoLc,1550
58
58
  upgini/utils/sklearn_ext.py,sha256=13jQS_k7v0aUtudXV6nGUEWjttPQzAW9AFYL5wgEz9k,44511
59
- upgini/utils/target_utils.py,sha256=RlpKGss9kMibVSlA8iZuO_qxmyeplqzn7X8g6hiGGGs,14341
59
+ upgini/utils/target_utils.py,sha256=a7Ck7WgQeUhDrnluOdFXvOdX6zDL-4Wiqt_f4jZxHag,16543
60
60
  upgini/utils/track_info.py,sha256=G5Lu1xxakg2_TQjKZk4b5SvrHsATTXNVV3NbvWtT8k8,5663
61
+ upgini/utils/ts_utils.py,sha256=_YbNVE144vtEPlvLpvPGguDNzrnUM9IIjdX2VQz4T7E,1671
61
62
  upgini/utils/warning_counter.py,sha256=-GRY8EUggEBKODPSuXAkHn9KnEQwAORC0mmz_tim-PM,254
62
- upgini-1.2.59a3818.dev1.dist-info/METADATA,sha256=mMXI6ZakQLfL9anKmHIvuX8X3ud-XD83uRLOaEQV9PA,49065
63
- upgini-1.2.59a3818.dev1.dist-info/WHEEL,sha256=zEMcRr9Kr03x1ozGwg5v9NQBKn3kndp6LSoSlVg-jhU,87
64
- upgini-1.2.59a3818.dev1.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
65
- upgini-1.2.59a3818.dev1.dist-info/RECORD,,
63
+ upgini-1.2.60a3792.dev1.dist-info/METADATA,sha256=4k4LdGfGvuhNHhpT83pomgnfvZr8x2fKQDQbFCEAyPA,49065
64
+ upgini-1.2.60a3792.dev1.dist-info/WHEEL,sha256=zEMcRr9Kr03x1ozGwg5v9NQBKn3kndp6LSoSlVg-jhU,87
65
+ upgini-1.2.60a3792.dev1.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
66
+ upgini-1.2.60a3792.dev1.dist-info/RECORD,,