upgini 1.1.255a3233.post3__py3-none-any.whl → 1.1.255a3233.post4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of upgini might be problematic. Click here for more details.

@@ -79,6 +79,12 @@ class DataSourcePublisher:
79
79
  f"Invalid update frequency: {update_frequency}. "
80
80
  f"Available values: {self.ACCEPTABLE_UPDATE_FREQUENCIES}"
81
81
  )
82
+ if (
83
+ set(search_keys.values()) == {SearchKey.IP_RANGE_FROM, SearchKey.IP_RANGE_TO}
84
+ or set(search_keys.values()) == {SearchKey.IPV6_RANGE_FROM, SearchKey.IPV6_RANGE_TO}
85
+ or set(search_keys.values()) == {SearchKey.MSISDN_RANGE_FROM, SearchKey.MSISDN_RANGE_TO}
86
+ ) and sort_column is None:
87
+ raise ValidationError("Sort column is required for passed search keys")
82
88
 
83
89
  request = {
84
90
  "dataTableUri": data_table_uri,
upgini/dataset.py CHANGED
@@ -225,11 +225,11 @@ class Dataset: # (pd.DataFrame):
225
225
  self.data[col] = self.data[col].astype("str").str.slice(stop=self.MAX_STRING_FEATURE_LENGTH)
226
226
 
227
227
  def __convert_bools(self):
228
- """Convert bool columns True -> 1, False -> 0"""
228
+ """Convert bool columns to string"""
229
229
  # self.logger.info("Converting bool to int")
230
230
  for col in self.data.columns:
231
231
  if is_bool(self.data[col]):
232
- self.data[col] = self.data[col].astype("Int64")
232
+ self.data[col] = self.data[col].astype("str")
233
233
 
234
234
  def __convert_float16(self):
235
235
  """Convert float16 to float"""
@@ -309,13 +309,12 @@ class Dataset: # (pd.DataFrame):
309
309
  if self.data[ip].isnull().all():
310
310
  raise ValidationError(self.bundle.get("invalid_ip").format(ip))
311
311
 
312
- if self.data[ip].apply(self._is_ipv4).any():
313
- ipv4 = ip + "_v4"
314
- self.data[ipv4] = self.data[ip].apply(self._to_ipv4).apply(self._ip_to_int).astype("Int64")
315
- self.meaning_types[ipv4] = FileColumnMeaningType.IP_ADDRESS
316
- self.etalon_def[FileColumnMeaningType.IP_ADDRESS.value] = ipv4
317
- search_keys.add(ipv4)
318
- self.columns_renaming[ipv4] = original_ip
312
+ ipv4 = ip + "_v4"
313
+ self.data[ipv4] = self.data[ip].apply(self._to_ipv4).apply(self._ip_to_int).astype("Int64")
314
+ self.meaning_types[ipv4] = FileColumnMeaningType.IP_ADDRESS
315
+ self.etalon_def[FileColumnMeaningType.IP_ADDRESS.value] = ipv4
316
+ search_keys.add(ipv4)
317
+ self.columns_renaming[ipv4] = original_ip
319
318
 
320
319
  ipv6 = ip + "_v6"
321
320
  self.data[ipv6] = (
@@ -687,8 +686,10 @@ class Dataset: # (pd.DataFrame):
687
686
  + "".join("<tr>" + "".join(map(map_color, row[1:])) + "</tr>" for row in df_stats.itertuples())
688
687
  + "</table>"
689
688
  )
689
+ print()
690
690
  display(HTML(html_stats))
691
691
  except (ImportError, NameError):
692
+ print()
692
693
  print(df_stats)
693
694
 
694
695
  if len(self.data) == 0:
@@ -27,7 +27,6 @@ from scipy.stats import ks_2samp
27
27
  from sklearn.base import TransformerMixin
28
28
  from sklearn.exceptions import NotFittedError
29
29
  from sklearn.model_selection import BaseCrossValidator
30
- from sklearn.model_selection._split import GroupsConsumerMixin
31
30
 
32
31
  from upgini.autofe.feature import Feature
33
32
  from upgini.data_source.data_source_publisher import CommercialSchema
@@ -1255,8 +1254,18 @@ class FeaturesEnricher(TransformerMixin):
1255
1254
  _cv, groups = CVConfig(
1256
1255
  _cv, date_series, self.random_state, self._search_task.get_shuffle_kfold(), group_columns=group_columns
1257
1256
  ).get_cv_and_groups(X)
1258
- elif isinstance(_cv, GroupsConsumerMixin):
1259
- groups = get_groups(X, group_columns)
1257
+ else:
1258
+ from sklearn import __version__ as sklearn_version
1259
+ try:
1260
+ from sklearn.model_selection._split import GroupsConsumerMixin
1261
+
1262
+ if isinstance(_cv, GroupsConsumerMixin):
1263
+ groups = get_groups(X, group_columns)
1264
+ except ImportError:
1265
+ print(f"WARNING: Unsupported scikit-learn version {sklearn_version}. Restart kernel and try again")
1266
+ self.logger.exception(
1267
+ f"Failed to import GroupsConsumerMixin to check CV. Version of sklearn: {sklearn_version}"
1268
+ )
1260
1269
 
1261
1270
  return _cv, groups
1262
1271
 
@@ -1329,18 +1338,17 @@ class FeaturesEnricher(TransformerMixin):
1329
1338
  fitting_X = X_sorted[client_features].copy()
1330
1339
  fitting_enriched_X = enriched_X_sorted[client_features + existing_filtered_enriched_features].copy()
1331
1340
 
1332
- # Don't do this because one hot encoded client features will be removed
1333
- # # Detect and drop high cardinality columns in train
1334
- # columns_with_high_cardinality = FeaturesValidator.find_high_cardinality(fitting_X)
1335
- # columns_with_high_cardinality = [
1336
- # c for c in columns_with_high_cardinality if c not in (self.generate_features or [])
1337
- # ]
1338
- # if len(columns_with_high_cardinality) > 0:
1339
- # self.logger.warning(
1340
- # f"High cardinality columns {columns_with_high_cardinality} will be dropped for metrics calculation"
1341
- # )
1342
- # fitting_X = fitting_X.drop(columns=columns_with_high_cardinality, errors="ignore")
1343
- # fitting_enriched_X = fitting_enriched_X.drop(columns=columns_with_high_cardinality, errors="ignore")
1341
+ # Detect and drop high cardinality columns in train
1342
+ columns_with_high_cardinality = FeaturesValidator.find_high_cardinality(fitting_X)
1343
+ columns_with_high_cardinality = [
1344
+ c for c in columns_with_high_cardinality if c not in (self.generate_features or [])
1345
+ ]
1346
+ if len(columns_with_high_cardinality) > 0:
1347
+ self.logger.warning(
1348
+ f"High cardinality columns {columns_with_high_cardinality} will be dropped for metrics calculation"
1349
+ )
1350
+ fitting_X = fitting_X.drop(columns=columns_with_high_cardinality, errors="ignore")
1351
+ fitting_enriched_X = fitting_enriched_X.drop(columns=columns_with_high_cardinality, errors="ignore")
1344
1352
 
1345
1353
  # Detect and drop constant columns
1346
1354
  constant_columns = FeaturesValidator.find_constant_features(fitting_X)
@@ -1389,11 +1397,11 @@ class FeaturesEnricher(TransformerMixin):
1389
1397
  ].copy()
1390
1398
 
1391
1399
  # # Drop high cardinality features in eval set
1392
- # if len(columns_with_high_cardinality) > 0:
1393
- # fitting_eval_X = fitting_eval_X.drop(columns=columns_with_high_cardinality, errors="ignore")
1394
- # fitting_enriched_eval_X = fitting_enriched_eval_X.drop(
1395
- # columns=columns_with_high_cardinality, errors="ignore"
1396
- # )
1400
+ if len(columns_with_high_cardinality) > 0:
1401
+ fitting_eval_X = fitting_eval_X.drop(columns=columns_with_high_cardinality, errors="ignore")
1402
+ fitting_enriched_eval_X = fitting_enriched_eval_X.drop(
1403
+ columns=columns_with_high_cardinality, errors="ignore"
1404
+ )
1397
1405
  # Drop constant features in eval_set
1398
1406
  if len(constant_columns) > 0:
1399
1407
  fitting_eval_X = fitting_eval_X.drop(columns=constant_columns, errors="ignore")
@@ -1673,7 +1681,7 @@ class FeaturesEnricher(TransformerMixin):
1673
1681
  eval_set_sampled_dict[idx] = (eval_x_sampled, enriched_eval_x, eval_y_sampled)
1674
1682
  else:
1675
1683
  self.logger.info("Transform without eval_set")
1676
- df = self.X.copy()
1684
+ df = validated_X.copy()
1677
1685
 
1678
1686
  df[TARGET] = validated_y
1679
1687
  num_samples = _num_samples(df)
@@ -1850,7 +1858,7 @@ class FeaturesEnricher(TransformerMixin):
1850
1858
  msg = self.bundle.get("transform_usage_info").format(
1851
1859
  transform_usage.limit, transform_usage.transformed_rows
1852
1860
  )
1853
- self.logger.info("transform_usage_warning")
1861
+ self.logger.info(msg)
1854
1862
  print(msg)
1855
1863
 
1856
1864
  validated_X = self._validate_X(X, is_transform=True)
@@ -2276,7 +2284,9 @@ class FeaturesEnricher(TransformerMixin):
2276
2284
 
2277
2285
  features_columns = [c for c in df.columns if c not in non_feature_columns]
2278
2286
 
2279
- features_to_drop = FeaturesValidator(self.logger).validate(df, features_columns, self.warning_counter)
2287
+ features_to_drop = FeaturesValidator(self.logger).validate(
2288
+ df, features_columns, self.generate_features, self.warning_counter
2289
+ )
2280
2290
  self.fit_dropped_features.update(features_to_drop)
2281
2291
  df = df.drop(columns=features_to_drop)
2282
2292
 
upgini/metrics.py CHANGED
@@ -6,6 +6,7 @@ from typing import Any, Callable, Dict, List, Optional, Tuple, Union
6
6
  import numpy as np
7
7
  import pandas as pd
8
8
  from catboost import CatBoostClassifier, CatBoostRegressor
9
+ import catboost
9
10
  from lightgbm import LGBMClassifier, LGBMRegressor
10
11
  from numpy import log1p
11
12
  from pandas.api.types import is_numeric_dtype
@@ -424,24 +425,35 @@ class CatBoostWrapper(EstimatorWrapper):
424
425
  X, y, groups, params = super()._prepare_to_fit(X, y)
425
426
 
426
427
  # Find embeddings
427
- emb_pattern = r"(.+)_emb\d+"
428
- self.emb_features = [c for c in X.columns if re.match(emb_pattern, c) and is_numeric_dtype(X[c])]
429
- embedding_features = []
430
- if len(self.emb_features) > 3: # There is no reason to reduce embeddings dimension with less than 4
431
- self.logger.info(
432
- f"Embedding features count more than 3, so group them into one vector for CatBoost: {self.emb_features}"
433
- )
434
- X, embedding_features = self.group_embeddings(X)
435
- params["embedding_features"] = embedding_features
428
+ if hasattr(CatBoostClassifier, "get_embedding_feature_indices"):
429
+ emb_pattern = r"(.+)_emb\d+"
430
+ self.emb_features = [c for c in X.columns if re.match(emb_pattern, c) and is_numeric_dtype(X[c])]
431
+ embedding_features = []
432
+ if len(self.emb_features) > 3: # There is no reason to reduce embeddings dimension with less than 4
433
+ self.logger.info(
434
+ "Embedding features count more than 3, so group them into one vector for CatBoost: "
435
+ f"{self.emb_features}"
436
+ )
437
+ X, embedding_features = self.group_embeddings(X)
438
+ params["embedding_features"] = embedding_features
439
+ else:
440
+ self.logger.info(
441
+ f"Embedding features count less than 3, so use them separately: {self.emb_features}"
442
+ )
443
+ self.emb_features = []
436
444
  else:
437
- self.emb_features = []
445
+ self.logger.warning(f"Embedding features are not supported by Catboost version {catboost.__version__}")
438
446
 
439
447
  # Find text features from passed in generate_features
440
- if self.text_features is not None:
441
- self.logger.info(f"Passed text features for CatBoost: {self.text_features}")
442
- self.text_features = [f for f in self.text_features if f in X.columns and not is_numeric_dtype(X[f])]
443
- self.logger.info(f"Rest text features after checks: {self.text_features}")
444
- params["text_features"] = self.text_features
448
+ if hasattr(CatBoostClassifier, "get_text_feature_indices"):
449
+ if self.text_features is not None:
450
+ self.logger.info(f"Passed text features for CatBoost: {self.text_features}")
451
+ self.text_features = [f for f in self.text_features if f in X.columns and not is_numeric_dtype(X[f])]
452
+ self.logger.info(f"Rest text features after checks: {self.text_features}")
453
+ params["text_features"] = self.text_features
454
+ else:
455
+ self.text_features = None
456
+ self.logger.warning(f"Text features are not supported by this Catboost version {catboost.__version__}")
445
457
 
446
458
  # Find rest categorical features
447
459
  self.cat_features = _get_cat_features(X, self.text_features, embedding_features)
@@ -28,8 +28,8 @@ metrics_exclude_paid_features=\nWARNING: Metrics calculated after enrichment has
28
28
  metrics_no_important_free_features=\nWARNING: No important free features to calculate metrics
29
29
  metrics_no_important_features=\nWARNING: No important features to calculate metrics
30
30
  metrics_negative_uplift_without_cv=Please re-check that your task is not a time series prediction. If so, restart search with cv=CVType.time_series param for correct search results. See docs https://github.com/upgini/upgini#-time-series-prediction-support
31
- metrics_with_trial_features=The calculation of final accuracy metrics using Trial data is not available for unauthorized users.\nGet a free API key on https://upgini.com and repeat your request.
32
- transform_with_trial_features=\nWARNING: Your search results contain Trial data sources. To enrich your dataframe using transform or fit_transform with features from these Trial data sources, please register for a Free API key at https://upgini.com and resubmit your request.
31
+ # metrics_with_trial_features=The calculation of final accuracy metrics using Trial data is not available for unauthorized users.\nGet a free API key on https://upgini.com and repeat your request.
32
+ # transform_with_trial_features=\nWARNING: Your search results contain Trial data sources. To enrich your dataframe using transform or fit_transform with features from these Trial data sources, please register for a Free API key at https://upgini.com and resubmit your request.
33
33
  # Enriching with Trial data is not available for unauthorized users.\nGet a free API key on https://upgini.com and repeat your request.
34
34
  metrics_with_paid_features=\nWARNING: The calculation of final accuracy metrics using Paid data is not available.\nContact Upgini support for the data access
35
35
  transform_with_paid_features=\nWARNING: Enriching with Paid data is not available.\nContact Upgini support for the data access
@@ -132,18 +132,17 @@ baseline_score_column_not_exists=baseline_score_column {} doesn't exist in input
132
132
  baseline_score_column_has_na=baseline_score_column contains NaN. Clear it and and retry
133
133
  # target validation
134
134
  empty_target=Target is empty in all rows
135
- non_numeric_target=Binary target should be numerical type
135
+ # non_numeric_target=Binary target should be numerical type
136
136
  uneven_eval_target_distribution=\nWARNING: y distributions from the training sample and eval_set differ according to the Kolmogorov-Smirnov test,\nwhich makes metrics between the train and eval_set incomparable.
137
- target_outliers_warning=We detected {} outliers in your sample.\nExamples of outliers with maximum value of target:\n{}\nOutliers will {}be excluded during the metrics calculation.
137
+ target_outliers_warning=\nWARNING: We detected {} outliers in your sample.\nExamples of outliers with maximum value of target:\n{}\nOutliers will {}be excluded during the metrics calculation.
138
138
  # features validation
139
- empty_or_contant_features=Columns {} has value with frequency more than 99%, removed from X
140
- high_cardinality_features=Columns {} has high cardinality (>90% unique values), removed from X
141
- one_hot_encoded_features=\nWARNING: One hot encoded features detected. Use int encoding for correct results of fit.\n{}
139
+ empty_or_contant_features=\nWARNING: Columns {} has value with frequency more than 99%, removed from X
140
+ high_cardinality_features=\nWARNING: Columns {} has high cardinality (>90% unique values), removed from X
141
+ # one_hot_encoded_features=\nWARNING: One hot encoded features detected. Use int encoding for correct results of fit.\n{}
142
142
  # Dataset validation
143
143
  dataset_too_few_rows=X size should be at least {} rows after validation
144
144
  dataset_too_many_rows_registered=X rows limit for transform is {}. Please sample X
145
145
  dataset_empty_column_names=Some column names are empty. Add names please
146
- dataset_too_long_column_name=Column {} is too long: {} characters. Remove this column or trim length to 50 characters
147
146
  dataset_full_duplicates=\nWARNING: {:.5f}% of the rows are fully duplicated
148
147
  dataset_diff_target_duplicates=\nWARNING: {:.4f}% of rows ({}) in X and eval_set are duplicates with different y values. These rows will be deleted as incorrect\nIncorrect row indexes: {}
149
148
  dataset_diff_target_duplicates_fintech=\nWARNING: {:.4f}% of rows ({}) in X and eval_set are duplicates, not taking into consideration dates, IP addresses and features from the training set, but have different y values. These rows have been removed to optimize search results.\nRemoved row indexes: {}
@@ -165,7 +164,7 @@ dataset_invalid_column_type=Unsupported data type of column {}: {}
165
164
  dataset_invalid_filter=Unknown field in filter_features. Should be {'min_importance', 'max_psi', 'max_count', 'selected_features'}.
166
165
  dataset_too_big_file=Too big size of dataframe X for processing. Please reduce number of rows or columns
167
166
  dataset_transform_diff_fit=You try to enrich dataset that column names are different from the train dataset column names that you used on the fit stage. Please make the column names the same as in the train dataset and restart.
168
- binary_small_dataset=The least populated class in Target contains less than 1000 rows.\nSmall numbers of observations may negatively affect the number of selected features and quality of your ML model.\nUpgini recommends you increase the number of observations in the least populated class.
167
+ binary_small_dataset=\nWARNING: The least populated class in Target contains less than 1000 rows.\nSmall numbers of observations may negatively affect the number of selected features and quality of your ML model.\nUpgini recommends you increase the number of observations in the least populated class.
169
168
  all_search_keys_invalid=All search keys are invalid
170
169
  all_emails_invalid=\nWARNING: All values in column {} are invalid emails
171
170
  # Metrics validation
@@ -11,7 +11,20 @@ from pandas.api.types import is_numeric_dtype, is_period_dtype, is_string_dtype
11
11
  from upgini.errors import ValidationError
12
12
  from upgini.resource_bundle import ResourceBundle, get_custom_bundle
13
13
 
14
- DATE_FORMATS = ["%Y-%m-%d", "%d.%m.%y", "%d.%m.%Y", "%m.%d.%y", "%m.%d.%Y", "%Y-%m-%dT%H:%M:%S.%f"]
14
+ DATE_FORMATS = [
15
+ "%Y-%m-%d",
16
+ "%d.%m.%y",
17
+ "%d.%m.%Y",
18
+ "%m.%d.%y",
19
+ "%m.%d.%Y",
20
+ "%Y/%m/%d",
21
+ "%y/%m/%d",
22
+ "%d/%m/%Y",
23
+ "%d/%m/%y",
24
+ "%m/%d/%Y",
25
+ "%m/%d/%y",
26
+ "%Y-%m-%dT%H:%M:%S.%f",
27
+ ]
15
28
 
16
29
  DATETIME_PATTERN = r"^[\d\s\.\-:T]+$"
17
30
 
@@ -3,7 +3,8 @@ from logging import Logger
3
3
  from typing import List, Optional
4
4
 
5
5
  import pandas as pd
6
- from pandas.api.types import is_object_dtype, is_integer_dtype, is_string_dtype
6
+ from pandas.api.types import is_integer_dtype, is_object_dtype, is_string_dtype
7
+
7
8
  from upgini.resource_bundle import bundle
8
9
  from upgini.utils.warning_counter import WarningCounter
9
10
 
@@ -16,9 +17,16 @@ class FeaturesValidator:
16
17
  self.logger = logging.getLogger()
17
18
  self.logger.setLevel("FATAL")
18
19
 
19
- def validate(self, df: pd.DataFrame, features: List[str], warning_counter: WarningCounter) -> List[str]:
20
+ def validate(
21
+ self,
22
+ df: pd.DataFrame,
23
+ features: List[str],
24
+ features_for_generate: Optional[List[str]],
25
+ warning_counter: WarningCounter,
26
+ ) -> List[str]:
20
27
  # one_hot_encoded_features = []
21
28
  empty_or_constant_features = []
29
+ high_cardinality_features = []
22
30
 
23
31
  for f in features:
24
32
  column = df[f]
@@ -51,23 +59,31 @@ class FeaturesValidator:
51
59
  msg = bundle.get("empty_or_contant_features").format(empty_or_constant_features)
52
60
  print(msg)
53
61
  self.logger.warning(msg)
62
+ warning_counter.increment()
63
+
64
+ high_cardinality_features = self.find_high_cardinality(df[features])
65
+ if features_for_generate:
66
+ high_cardinality_features = [f for f in high_cardinality_features if f not in features_for_generate]
67
+ if high_cardinality_features:
68
+ msg = bundle.get("high_cardinality_features").format(high_cardinality_features)
69
+ print(msg)
70
+ self.logger.warning(msg)
71
+ warning_counter.increment()
54
72
 
55
- return empty_or_constant_features
73
+ return empty_or_constant_features + high_cardinality_features
56
74
 
57
75
  @staticmethod
58
76
  def find_high_cardinality(df: pd.DataFrame) -> List[str]:
59
77
  # Remove high cardinality columns
60
78
  row_count = df.shape[0]
79
+ if row_count < 100: # For tests with small datasets
80
+ return []
61
81
  return [
62
82
  i
63
83
  for i in df
64
- if (is_string_dtype(df[i]) or is_integer_dtype(df[i])) and (df[i].nunique() / row_count >= 0.9)
84
+ if (is_string_dtype(df[i]) or is_integer_dtype(df[i])) and (df[i].nunique(dropna=False) / row_count >= 0.95)
65
85
  ]
66
86
 
67
87
  @staticmethod
68
88
  def find_constant_features(df: pd.DataFrame) -> List[str]:
69
- return [
70
- i
71
- for i in df
72
- if df[i].nunique() == 1
73
- ]
89
+ return [i for i in df if df[i].nunique() == 1]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: upgini
3
- Version: 1.1.255a3233.post3
3
+ Version: 1.1.255a3233.post4
4
4
  Summary: Intelligent data search & enrichment for Machine Learning
5
5
  Home-page: https://upgini.com/
6
6
  Author: Upgini Developers
@@ -1,12 +1,12 @@
1
1
  upgini/__init__.py,sha256=asENHgEVHQBIkV-e_0IhE_ZWqkCG6398U3ZLrNzAH6k,407
2
2
  upgini/ads.py,sha256=mre6xn44wcC_fg63iLT_kTh4mViZqR9AKRJZAtpQz8Y,2592
3
- upgini/dataset.py,sha256=sL1w8dVmXkuV5PWaBTUpz8X8joW6mIOLyPbSAlSbpR0,45576
3
+ upgini/dataset.py,sha256=4LfrUwxhd__ZVqZkjPVxbC4SW3YLsk1sMMqnYPUaVpw,45529
4
4
  upgini/errors.py,sha256=pdzQl3MKuK52yvncxMWMRWeSIOGhUFzpQoszoRFBOk0,958
5
- upgini/features_enricher.py,sha256=NZHH1Wf7QbvyFYgr-LJhG3d9HjUDQyfrZ6fFLsn9PdM,171774
5
+ upgini/features_enricher.py,sha256=WbwnLvPVqn4m995b6jSamWkXyRVy18fnG7faBeuJbWI,172132
6
6
  upgini/fingerprint.js,sha256=VygVIQlN1v4NGZfjHqtRogOw8zjTnnMNJg_f7M5iGQU,33442
7
7
  upgini/http.py,sha256=zaO86LBBLmkieGbgYifk29eVoPCxXimZQ8YkQtKcM0I,42244
8
8
  upgini/metadata.py,sha256=fwVxtkR6Mn4iRoOqV6BfMJvJrx65I3YwZUMbZjhPyOI,9673
9
- upgini/metrics.py,sha256=LS2MgEKgmn9VEXsKzxv3pBZ-q71mTnpWu6vL8fYgpo4,26727
9
+ upgini/metrics.py,sha256=3VvSZW1cCOIPHImXuqcnWzD3fWcpPzVa9k8eulLbUmY,27426
10
10
  upgini/search_task.py,sha256=tmJ17WUxv3J5NWrYUJB_NKdZ792Ifz8Z8UnDXeQnpss,17077
11
11
  upgini/spinner.py,sha256=Dm1dQ5F_z_Ua2odLxZX7OypcOX9tSx_vE5MGaKtUmfw,1118
12
12
  upgini/version_validator.py,sha256=rDIncP6BEko4J2F2hUcMOtKm_vZbI4ICWcNcw8hrwM4,1400
@@ -21,14 +21,14 @@ upgini/autofe/operand.py,sha256=Rhy7Ky3we-I1Su1--dS4xdsO3K8neV4rqM_Q4xYE4ug,2779
21
21
  upgini/autofe/unary.py,sha256=gyMkrx9bfa3o19zS-4JaRlScHrfeZGBsYe7d_6ePT-0,2853
22
22
  upgini/autofe/vector.py,sha256=Qk7VmdwURNwVw7fIMEspWEo7HTiyUWCYIqu3hcWQQio,507
23
23
  upgini/data_source/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
24
- upgini/data_source/data_source_publisher.py,sha256=iH26_cQiT1fm4VxsJD7WoAaOFyfmsEKKtiWa-JJFpeQ,15494
24
+ upgini/data_source/data_source_publisher.py,sha256=QASEDhJ9SxJKcWxoN2vUPxrM_HTlwKQOPa92L7EQneA,15962
25
25
  upgini/mdc/__init__.py,sha256=ETDh3JKbrDdPMOECiYLAa8lvKYe68mv4IY6fZa9FimA,1126
26
26
  upgini/mdc/context.py,sha256=Sl1S_InKlzzRxYqwJ2k24lawJdCKWgGJ-RIRfvzWJrk,1468
27
27
  upgini/normalizer/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
28
28
  upgini/normalizer/phone_normalizer.py,sha256=lhwsPEnfyjeIsndW2EcQGZksXYsfxaQ1ghAzVYoDRKM,9927
29
29
  upgini/resource_bundle/__init__.py,sha256=hdvbqL0b0xMWbY6-kiYGsW1ro2GMiWpxxsO9uCv-h9Q,8379
30
30
  upgini/resource_bundle/exceptions.py,sha256=5fRvx0_vWdE1-7HcSgF0tckB4A9AKyf5RiinZkInTsI,621
31
- upgini/resource_bundle/strings.properties,sha256=WoT1k7uJSVhECUsgJKX2R8I3aPQjTDIhms6HVexWaCA,25294
31
+ upgini/resource_bundle/strings.properties,sha256=MGU_oBc15VAmbPZdThCpm3B4xERAKwbCIUTIG66dvUo,25228
32
32
  upgini/resource_bundle/strings_widget.properties,sha256=gOdqvZWntP2LCza_tyVk1_yRYcG4c04K9sQOAVhF_gw,1577
33
33
  upgini/sampler/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
34
34
  upgini/sampler/base.py,sha256=CC-DvPbrN7zp5--SVFuUqkVmdWM_5F7R0Do98ETV82U,6421
@@ -40,12 +40,12 @@ upgini/utils/blocked_time_series.py,sha256=dMz5ewk3PsoeOrc3lDzInCVPS9u_2XQkV0W6P
40
40
  upgini/utils/country_utils.py,sha256=1KXhLSNqkNYVL3on8-zK0Arc_SspUH7AMZvGZICysOU,6462
41
41
  upgini/utils/custom_loss_utils.py,sha256=DBslpjWGPt7xTeypt78baR59012SYphbPsO_YLKdilo,3972
42
42
  upgini/utils/cv_utils.py,sha256=Tn01RJvpZGZh0PUQUimlBkV-AXwe7s6yjCNFtw352Uc,3525
43
- upgini/utils/datetime_utils.py,sha256=b8pyNhrC8ni6apsLQivQOiKqu-37pU4EF3nNHPZqiN8,8713
43
+ upgini/utils/datetime_utils.py,sha256=ol5Bgh98wU6KBY9z4QskNO0ja-L7HJL70HmTAjl7iRU,8836
44
44
  upgini/utils/deduplicate_utils.py,sha256=ckJrpU8Ruc_vcwIPTopbUjyJuNiseLHNAbQlLfhUCxo,5888
45
45
  upgini/utils/display_utils.py,sha256=BfPaJGUJAkGaijdAKPrdIfUqjXewFbBRrYqzzylB9t4,10667
46
46
  upgini/utils/email_utils.py,sha256=3CvHXTSzlgLyGsQOXfRYVfFhfPy6OXG4uXOBWRaLfHg,3479
47
47
  upgini/utils/fallback_progress_bar.py,sha256=cdbd1XGcWm4Ed4eAqV2_St3z7uC_kkH22gEyrN5ub6M,1090
48
- upgini/utils/features_validator.py,sha256=VexG-9p63ni66Hf9T7dgP4iUAhpXqwo3sgMwBK_eii8,2565
48
+ upgini/utils/features_validator.py,sha256=P-dfjBLAMxgzOcUX1Jo1bhVp8-8WyTyF3Ef0YZ5nfRI,3269
49
49
  upgini/utils/format.py,sha256=Yv5cvvSs2bOLUzzNu96Pu33VMDNbabio92QepUj41jU,243
50
50
  upgini/utils/ip_utils.py,sha256=Zf3F2cnQmOCH09QLQHetpjMFu1PnD0cTmDymn0SnSy8,1672
51
51
  upgini/utils/phone_utils.py,sha256=JNSkF8G6mgsN8Czy11pamaJdsY6rBINEMpi7jbVt_RA,408
@@ -55,8 +55,8 @@ upgini/utils/sklearn_ext.py,sha256=fvuTWJ5AnT3ED9KSaQu_yIgW2JR19hFlaGDoVP3k60g,4
55
55
  upgini/utils/target_utils.py,sha256=WVhhxpQVvnhsDV7ctlds51VFg7hz59S_MFUSoRZFszw,7204
56
56
  upgini/utils/track_info.py,sha256=EPcJ13Jqa17_T0JjM37Ac9kWDz5Zk0GVsIZKutOb8aU,5207
57
57
  upgini/utils/warning_counter.py,sha256=dIWBB4dI5XRRJZudvIlqlIYKEiwLLPcXarsZuYRt338,227
58
- upgini-1.1.255a3233.post3.dist-info/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
59
- upgini-1.1.255a3233.post3.dist-info/METADATA,sha256=EOPXhWBGtI0RgsqcVJmRUkI8yWppXlLts8TEYQ2Q8pY,48167
60
- upgini-1.1.255a3233.post3.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
61
- upgini-1.1.255a3233.post3.dist-info/top_level.txt,sha256=OFhTGiDIWKl5gFI49qvWq1R9IKflPaE2PekcbDXDtx4,7
62
- upgini-1.1.255a3233.post3.dist-info/RECORD,,
58
+ upgini-1.1.255a3233.post4.dist-info/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
59
+ upgini-1.1.255a3233.post4.dist-info/METADATA,sha256=LISA1JiOQR8ZPKCt7QlF-sTEJyiban04m9Zfln5DVyA,48167
60
+ upgini-1.1.255a3233.post4.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
61
+ upgini-1.1.255a3233.post4.dist-info/top_level.txt,sha256=OFhTGiDIWKl5gFI49qvWq1R9IKflPaE2PekcbDXDtx4,7
62
+ upgini-1.1.255a3233.post4.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: bdist_wheel (0.42.0)
2
+ Generator: bdist_wheel (0.43.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5