upgini 1.1.252a5__py3-none-any.whl → 1.1.253__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of upgini might be problematic. Click here for more details.

@@ -40,7 +40,7 @@ class DataSourcePublisher:
40
40
  if logs_enabled:
41
41
  self.logger = LoggerFactory().get_logger(endpoint, api_key)
42
42
  else:
43
- self.logger = logging.getLogger("muted_logger")
43
+ self.logger = logging.getLogger()
44
44
  self.logger.setLevel("FATAL")
45
45
 
46
46
  def place(
upgini/dataset.py CHANGED
@@ -15,9 +15,9 @@ from pandas.api.types import (
15
15
  is_float_dtype,
16
16
  is_integer_dtype,
17
17
  is_numeric_dtype,
18
+ is_period_dtype,
18
19
  is_string_dtype,
19
20
  )
20
- from pandas.core.dtypes.common import is_period_dtype
21
21
 
22
22
  from upgini.errors import ValidationError
23
23
  from upgini.http import ProgressStage, SearchProgress, _RestClient
@@ -39,10 +39,10 @@ from upgini.metadata import (
39
39
  )
40
40
  from upgini.normalizer.phone_normalizer import PhoneNormalizer
41
41
  from upgini.resource_bundle import ResourceBundle, get_custom_bundle
42
+ from upgini.sampler.random_under_sampler import RandomUnderSampler
42
43
  from upgini.search_task import SearchTask
43
- from upgini.utils import combine_search_keys
44
+ from upgini.utils import combine_search_keys, find_numbers_with_decimal_comma
44
45
  from upgini.utils.email_utils import EmailSearchKeyConverter
45
- from upgini.utils.target_utils import balance_undersample
46
46
 
47
47
  try:
48
48
  from upgini.utils.progress_bar import CustomProgressBar as ProgressBar
@@ -61,8 +61,6 @@ class Dataset: # (pd.DataFrame):
61
61
  FIT_SAMPLE_WITH_EVAL_SET_THRESHOLD = 200_000
62
62
  MIN_SAMPLE_THRESHOLD = 5_000
63
63
  IMBALANCE_THESHOLD = 0.4
64
- BINARY_BOOTSTRAP_LOOPS = 5
65
- MULTICLASS_BOOTSTRAP_LOOPS = 2
66
64
  MIN_TARGET_CLASS_ROWS = 100
67
65
  MAX_MULTICLASS_CLASS_COUNT = 100
68
66
  MIN_SUPPORTED_DATE_TS = 946684800000 # 2000-01-01
@@ -224,45 +222,6 @@ class Dataset: # (pd.DataFrame):
224
222
  if max_length > self.MAX_STRING_FEATURE_LENGTH:
225
223
  self.data[col] = self.data[col].astype("str").str.slice(stop=self.MAX_STRING_FEATURE_LENGTH)
226
224
 
227
- def __clean_duplicates(self, silent_mode: bool = False):
228
- """Clean DataSet from full duplicates."""
229
- # self.logger.info("Clean full duplicates")
230
- nrows = len(self.data)
231
- if nrows == 0:
232
- return
233
- # Remove absolute duplicates (exclude system_record_id)
234
- unique_columns = self.data.columns.tolist()
235
- unique_columns.remove(SYSTEM_RECORD_ID)
236
- self.logger.info(f"Dataset shape before clean duplicates: {self.data.shape}")
237
- self.data.drop_duplicates(subset=unique_columns, inplace=True)
238
- self.logger.info(f"Dataset shape after clean duplicates: {self.data.shape}")
239
- nrows_after_full_dedup = len(self.data)
240
- share_full_dedup = 100 * (1 - nrows_after_full_dedup / nrows)
241
- if share_full_dedup > 0:
242
- msg = self.bundle.get("dataset_full_duplicates").format(share_full_dedup)
243
- self.logger.warning(msg)
244
- # if not silent_mode:
245
- # print(msg)
246
- # self.warning_counter.increment()
247
- target_column = self.etalon_def_checked.get(FileColumnMeaningType.TARGET.value)
248
- if target_column is not None:
249
- unique_columns.remove(target_column)
250
- marked_duplicates = self.data.duplicated(subset=unique_columns, keep=False)
251
- if marked_duplicates.sum() > 0:
252
- dups_indices = self.data[marked_duplicates].index.to_list()
253
- nrows_after_tgt_dedup = len(self.data.drop_duplicates(subset=unique_columns))
254
- num_dup_rows = nrows_after_full_dedup - nrows_after_tgt_dedup
255
- share_tgt_dedup = 100 * num_dup_rows / nrows_after_full_dedup
256
-
257
- msg = self.bundle.get("dataset_diff_target_duplicates").format(
258
- share_tgt_dedup, num_dup_rows, dups_indices
259
- )
260
- self.logger.warning(msg)
261
- if not silent_mode:
262
- print(msg)
263
- self.data.drop_duplicates(subset=unique_columns, keep=False, inplace=True)
264
- self.logger.info(f"Dataset shape after clean invalid target duplicates: {self.data.shape}")
265
-
266
225
  def __convert_bools(self):
267
226
  """Convert bool columns True -> 1, False -> 0"""
268
227
  # self.logger.info("Converting bool to int")
@@ -280,12 +239,10 @@ class Dataset: # (pd.DataFrame):
280
239
  def __correct_decimal_comma(self):
281
240
  """Check DataSet for decimal commas and fix them"""
282
241
  # self.logger.info("Correct decimal commas")
283
- tmp = self.data.head(10)
284
- # all columns with sep="," will have dtype == 'object', i.e string
285
- # sep="." will be casted to numeric automatically
286
- cls_to_check = [i for i in tmp.columns if is_string_dtype(tmp[i])]
287
- for col in cls_to_check:
288
- if tmp[col].astype("string").str.match("^[0-9]+,[0-9]*$").any():
242
+ columns_to_fix = find_numbers_with_decimal_comma(self.data)
243
+ if len(columns_to_fix) > 0:
244
+ self.logger.warning(f"Convert strings with decimal comma to float: {columns_to_fix}")
245
+ for col in columns_to_fix:
289
246
  self.data[col] = self.data[col].astype("string").str.replace(",", ".").astype(np.float64)
290
247
 
291
248
  @staticmethod
@@ -504,8 +461,10 @@ class Dataset: # (pd.DataFrame):
504
461
  self.task_type == ModelTaskType.BINARY and len(train_segment) > self.MIN_SAMPLE_THRESHOLD
505
462
  ):
506
463
  count = len(train_segment)
507
- target_column = self.etalon_def_checked.get(FileColumnMeaningType.TARGET.value, TARGET)
508
- target = train_segment[target_column]
464
+ min_class_count = count
465
+ min_class_value = None
466
+ target_column = self.etalon_def_checked.get(FileColumnMeaningType.TARGET.value, "")
467
+ target = train_segment[target_column].copy()
509
468
  target_classes_count = target.nunique()
510
469
 
511
470
  if target_classes_count > self.MAX_MULTICLASS_CLASS_COUNT:
@@ -515,9 +474,12 @@ class Dataset: # (pd.DataFrame):
515
474
  self.logger.warning(msg)
516
475
  raise ValidationError(msg)
517
476
 
518
- vc = target.value_counts()
519
- min_class_value = vc.index[len(vc) - 1]
520
- min_class_count = vc[min_class_value]
477
+ unique_target = target.unique()
478
+ for v in list(unique_target): # type: ignore
479
+ current_class_count = len(train_segment.loc[target == v])
480
+ if current_class_count < min_class_count:
481
+ min_class_count = current_class_count
482
+ min_class_value = v
521
483
 
522
484
  if min_class_count < self.MIN_TARGET_CLASS_ROWS:
523
485
  msg = self.bundle.get("dataset_rarest_class_less_min").format(
@@ -530,19 +492,53 @@ class Dataset: # (pd.DataFrame):
530
492
  min_class_threshold = min_class_percent * count
531
493
 
532
494
  if min_class_count < min_class_threshold:
533
- self.imbalanced = True
534
- self.data = balance_undersample(
535
- df=train_segment,
536
- target_column=target_column,
537
- task_type=self.task_type,
538
- random_state=self.random_state,
539
- imbalance_threshold=self.IMBALANCE_THESHOLD,
540
- binary_bootstrap_loops=self.BINARY_BOOTSTRAP_LOOPS,
541
- multiclass_bootstrap_loops=self.MULTICLASS_BOOTSTRAP_LOOPS,
542
- logger=self.logger,
543
- bundle=self.bundle,
544
- warning_counter=self.warning_counter,
495
+ msg = self.bundle.get("dataset_rarest_class_less_threshold").format(
496
+ min_class_value, min_class_count, min_class_threshold, min_class_percent * 100
545
497
  )
498
+ self.logger.warning(msg)
499
+ print(msg)
500
+ self.warning_counter.increment()
501
+
502
+ train_segment = train_segment.copy().sort_values(by=SYSTEM_RECORD_ID)
503
+ if self.task_type == ModelTaskType.MULTICLASS:
504
+ # Sort classes by rows count and find 25% quantile class
505
+ classes = target.value_counts().index
506
+ quantile25_idx = int(0.75 * len(classes))
507
+ quantile25_class = classes[quantile25_idx]
508
+ count_of_quantile25_class = len(target[target == quantile25_class])
509
+ msg = self.bundle.get("imbalance_multiclass").format(quantile25_class, count_of_quantile25_class)
510
+ self.logger.warning(msg)
511
+ print(msg)
512
+ # 25% and lower classes will stay as is. Higher classes will be downsampled
513
+ parts = []
514
+ for class_idx in range(quantile25_idx):
515
+ sampled = train_segment[train_segment[target_column] == classes[class_idx]].sample(
516
+ n=count_of_quantile25_class, random_state=self.random_state
517
+ )
518
+ parts.append(sampled)
519
+ for class_idx in range(quantile25_idx, len(classes)):
520
+ parts.append(train_segment[train_segment[target_column] == classes[class_idx]])
521
+ resampled_data = pd.concat(parts)
522
+ elif self.task_type == ModelTaskType.BINARY and min_class_count < self.MIN_SAMPLE_THRESHOLD / 2:
523
+ minority_class = train_segment[train_segment[target_column] == min_class_value]
524
+ majority_class = train_segment[train_segment[target_column] != min_class_value]
525
+ sampled_majority_class = majority_class.sample(
526
+ n=self.MIN_SAMPLE_THRESHOLD - min_class_count, random_state=self.random_state
527
+ )
528
+ resampled_data = train_segment[
529
+ (train_segment[SYSTEM_RECORD_ID].isin(minority_class[SYSTEM_RECORD_ID]))
530
+ | (train_segment[SYSTEM_RECORD_ID].isin(sampled_majority_class[SYSTEM_RECORD_ID]))
531
+ ]
532
+ else:
533
+ sampler = RandomUnderSampler(random_state=self.random_state)
534
+ X = train_segment[SYSTEM_RECORD_ID]
535
+ X = X.to_frame(SYSTEM_RECORD_ID)
536
+ new_x, _ = sampler.fit_resample(X, target) # type: ignore
537
+ resampled_data = train_segment[train_segment[SYSTEM_RECORD_ID].isin(new_x[SYSTEM_RECORD_ID])]
538
+
539
+ self.data = resampled_data
540
+ self.logger.info(f"Shape after rebalance resampling: {self.data.shape}")
541
+ self.imbalanced = True
546
542
 
547
543
  # Resample over fit threshold
548
544
  if not self.imbalanced and EVAL_SET_INDEX in self.data.columns:
@@ -16,7 +16,13 @@ from typing import Any, Callable, Dict, List, Optional, Set, Tuple, Union
16
16
 
17
17
  import numpy as np
18
18
  import pandas as pd
19
- from pandas.api.types import is_numeric_dtype, is_string_dtype
19
+ from pandas.api.types import (
20
+ is_bool,
21
+ is_datetime64_any_dtype,
22
+ is_numeric_dtype,
23
+ is_period_dtype,
24
+ is_string_dtype,
25
+ )
20
26
  from scipy.stats import ks_2samp
21
27
  from sklearn.base import TransformerMixin
22
28
  from sklearn.exceptions import NotFittedError
@@ -54,7 +60,7 @@ from upgini.metrics import EstimatorWrapper, validate_scoring_argument
54
60
  from upgini.resource_bundle import ResourceBundle, bundle, get_custom_bundle
55
61
  from upgini.search_task import SearchTask
56
62
  from upgini.spinner import Spinner
57
- from upgini.utils import combine_search_keys
63
+ from upgini.utils import combine_search_keys, find_numbers_with_decimal_comma
58
64
  from upgini.utils.country_utils import CountrySearchKeyDetector
59
65
  from upgini.utils.custom_loss_utils import (
60
66
  get_additional_params_custom_loss,
@@ -215,7 +221,7 @@ class FeaturesEnricher(TransformerMixin):
215
221
  if logs_enabled:
216
222
  self.logger = LoggerFactory().get_logger(endpoint, self._api_key, client_ip, client_visitorid)
217
223
  else:
218
- self.logger = logging.getLogger("muted_logger")
224
+ self.logger = logging.getLogger()
219
225
  self.logger.setLevel("FATAL")
220
226
 
221
227
  if len(kwargs) > 0:
@@ -1323,16 +1329,52 @@ class FeaturesEnricher(TransformerMixin):
1323
1329
  fitting_X = X_sorted[client_features].copy()
1324
1330
  fitting_enriched_X = enriched_X_sorted[client_features + existing_filtered_enriched_features].copy()
1325
1331
 
1326
- # Detect and drop high cardinality columns in train
1327
- columns_with_high_cardinality = FeaturesValidator.find_high_cardinality(fitting_X)
1328
- columns_with_high_cardinality = [
1329
- c for c in columns_with_high_cardinality if c not in (self.generate_features or [])
1332
+ # Don't do this because one hot encoded client features will be removed
1333
+ # # Detect and drop high cardinality columns in train
1334
+ # columns_with_high_cardinality = FeaturesValidator.find_high_cardinality(fitting_X)
1335
+ # columns_with_high_cardinality = [
1336
+ # c for c in columns_with_high_cardinality if c not in (self.generate_features or [])
1337
+ # ]
1338
+ # if len(columns_with_high_cardinality) > 0:
1339
+ # self.logger.warning(
1340
+ # f"High cardinality columns {columns_with_high_cardinality} will be dropped for metrics calculation"
1341
+ # )
1342
+ # fitting_X = fitting_X.drop(columns=columns_with_high_cardinality, errors="ignore")
1343
+ # fitting_enriched_X = fitting_enriched_X.drop(columns=columns_with_high_cardinality, errors="ignore")
1344
+
1345
+ # Detect and drop constant columns
1346
+ constant_columns = FeaturesValidator.find_constant_features(fitting_X)
1347
+ if len(constant_columns) > 0:
1348
+ self.logger.warning(f"Constant columns {constant_columns} will be dropped for metrics calculation")
1349
+ fitting_X = fitting_X.drop(columns=constant_columns, errors="ignore")
1350
+ fitting_enriched_X = fitting_enriched_X.drop(columns=constant_columns, errors="ignore")
1351
+
1352
+ # Remove datetime features
1353
+ datetime_features = [
1354
+ f for f in fitting_X.columns if is_datetime64_any_dtype(fitting_X[f]) or is_period_dtype(fitting_X[f])
1330
1355
  ]
1331
- self.logger.info(
1332
- f"Columns {columns_with_high_cardinality} will be dropped for metrics calculation due to high cardinality"
1333
- )
1334
- fitting_X = fitting_X.drop(columns=columns_with_high_cardinality, errors="ignore")
1335
- fitting_enriched_X = fitting_enriched_X.drop(columns=columns_with_high_cardinality, errors="ignore")
1356
+ if len(datetime_features) > 0:
1357
+ self.logger.warning(self.bundle.get("dataset_date_features").format(datetime_features))
1358
+ fitting_X = fitting_X.drop(columns=datetime_features, errors="ignore")
1359
+ fitting_enriched_X = fitting_enriched_X.drop(columns=datetime_features, errors="ignore")
1360
+
1361
+ bool_columns = []
1362
+ for col in fitting_X.columns:
1363
+ if is_bool(fitting_X[col]):
1364
+ bool_columns.append(col)
1365
+ fitting_X[col] = fitting_X[col].astype(str)
1366
+ fitting_enriched_X[col] = fitting_enriched_X[col].astype(str)
1367
+ if len(bool_columns) > 0:
1368
+ self.logger.warning(f"Bool columns {bool_columns} was converted to string for metrics calculation")
1369
+
1370
+ decimal_columns_to_fix = find_numbers_with_decimal_comma(fitting_X)
1371
+ if len(decimal_columns_to_fix) > 0:
1372
+ self.logger.warning(f"Convert strings with decimal comma to float: {decimal_columns_to_fix}")
1373
+ for col in decimal_columns_to_fix:
1374
+ fitting_X[col] = fitting_X[col].astype("string").str.replace(",", ".").astype(np.float64)
1375
+ fitting_enriched_X[col] = (
1376
+ fitting_enriched_X[col].astype("string").str.replace(",", ".").astype(np.float64)
1377
+ )
1336
1378
 
1337
1379
  fitting_eval_set_dict = dict()
1338
1380
  for idx, eval_tuple in eval_set_sampled_dict.items():
@@ -1346,11 +1388,31 @@ class FeaturesEnricher(TransformerMixin):
1346
1388
  client_features + existing_filtered_enriched_features
1347
1389
  ].copy()
1348
1390
 
1349
- # Drop high cardinality columns in eval set
1350
- fitting_eval_X = fitting_eval_X.drop(columns=columns_with_high_cardinality, errors="ignore")
1351
- fitting_enriched_eval_X = fitting_enriched_eval_X.drop(
1352
- columns=columns_with_high_cardinality, errors="ignore"
1353
- )
1391
+ # # Drop high cardinality features in eval set
1392
+ # if len(columns_with_high_cardinality) > 0:
1393
+ # fitting_eval_X = fitting_eval_X.drop(columns=columns_with_high_cardinality, errors="ignore")
1394
+ # fitting_enriched_eval_X = fitting_enriched_eval_X.drop(
1395
+ # columns=columns_with_high_cardinality, errors="ignore"
1396
+ # )
1397
+ # Drop constant features in eval_set
1398
+ if len(constant_columns) > 0:
1399
+ fitting_eval_X = fitting_eval_X.drop(columns=constant_columns, errors="ignore")
1400
+ fitting_enriched_eval_X = fitting_enriched_eval_X.drop(columns=constant_columns, errors="ignore")
1401
+ # Drop datetime features in eval_set
1402
+ if len(datetime_features) > 0:
1403
+ fitting_eval_X = fitting_eval_X.drop(columns=datetime_features, errors="ignore")
1404
+ fitting_enriched_eval_X = fitting_enriched_eval_X.drop(columns=datetime_features, errors="ignore")
1405
+ # Convert bool to string in eval_set
1406
+ if len(bool_columns) > 0:
1407
+ fitting_eval_X[col] = fitting_eval_X[col].astype(str)
1408
+ fitting_enriched_eval_X[col] = fitting_enriched_eval_X[col].astype(str)
1409
+ # Correct string features with decimal commas
1410
+ if len(decimal_columns_to_fix) > 0:
1411
+ for col in decimal_columns_to_fix:
1412
+ fitting_eval_X[col] = fitting_eval_X[col].astype("string").str.replace(",", ".").astype(np.float64)
1413
+ fitting_enriched_eval_X[col] = (
1414
+ fitting_enriched_eval_X[col].astype("string").str.replace(",", ".").astype(np.float64)
1415
+ )
1354
1416
 
1355
1417
  fitting_eval_set_dict[idx] = (
1356
1418
  fitting_eval_X,
@@ -1398,6 +1460,7 @@ class FeaturesEnricher(TransformerMixin):
1398
1460
  elif len(self.feature_importances_) == 0:
1399
1461
  self.logger.info("No external features selected. So use only input datasets for metrics calculation")
1400
1462
  return self.__sample_only_input(validated_X, validated_y, eval_set, is_demo_dataset)
1463
+ # TODO save and check if dataset was deduplicated - use imbalance branch for such case
1401
1464
  elif not self.imbalanced and not exclude_features_sources and is_input_same_as_fit:
1402
1465
  self.logger.info("Dataset is not imbalanced, so use enriched_X from fit")
1403
1466
  return self.__sample_balanced(eval_set, trace_id, remove_outliers_calc_metrics)
@@ -1438,6 +1501,8 @@ class FeaturesEnricher(TransformerMixin):
1438
1501
  eval_xy[EVAL_SET_INDEX] = idx + 1
1439
1502
  df = pd.concat([df, eval_xy])
1440
1503
 
1504
+ df = clean_full_duplicates(df, logger=self.logger, silent=True, bundle=self.bundle)
1505
+
1441
1506
  num_samples = _num_samples(df)
1442
1507
  sample_threshold, sample_rows = (
1443
1508
  (Dataset.FIT_SAMPLE_WITH_EVAL_SET_THRESHOLD, Dataset.FIT_SAMPLE_WITH_EVAL_SET_ROWS)
@@ -1561,14 +1626,7 @@ class FeaturesEnricher(TransformerMixin):
1561
1626
  eval_df_with_index[EVAL_SET_INDEX] = idx + 1
1562
1627
  df = pd.concat([df, eval_df_with_index])
1563
1628
 
1564
- _, df = remove_fintech_duplicates(
1565
- df,
1566
- self.search_keys,
1567
- date_format=self.date_format,
1568
- logger=self.logger,
1569
- silent=True,
1570
- bundle=self.bundle,
1571
- )
1629
+ df = clean_full_duplicates(df, logger=self.logger, silent=True, bundle=self.bundle)
1572
1630
 
1573
1631
  # downsample if need to eval_set threshold
1574
1632
  num_samples = _num_samples(df)
@@ -1653,9 +1711,7 @@ class FeaturesEnricher(TransformerMixin):
1653
1711
 
1654
1712
  self.__cached_sampled_datasets = (X_sampled, y_sampled, enriched_X, eval_set_sampled_dict, self.search_keys)
1655
1713
 
1656
- return self.__mk_sampled_data_tuple(
1657
- X_sampled, y_sampled, enriched_X, eval_set_sampled_dict, self.search_keys
1658
- )
1714
+ return self.__mk_sampled_data_tuple(X_sampled, y_sampled, enriched_X, eval_set_sampled_dict, self.search_keys)
1659
1715
 
1660
1716
  def __mk_sampled_data_tuple(
1661
1717
  self,
@@ -2179,11 +2235,10 @@ class FeaturesEnricher(TransformerMixin):
2179
2235
 
2180
2236
  df = self.__add_country_code(df, self.fit_search_keys)
2181
2237
 
2182
- need_full_defuplication, df = remove_fintech_duplicates(
2238
+ df = remove_fintech_duplicates(
2183
2239
  df, self.fit_search_keys, date_format=self.date_format, logger=self.logger, bundle=self.bundle
2184
2240
  )
2185
- if need_full_defuplication:
2186
- df = clean_full_duplicates(df, self.logger, bundle=self.bundle)
2241
+ df = clean_full_duplicates(df, self.logger, bundle=self.bundle)
2187
2242
 
2188
2243
  date_column = self._get_date_column(self.fit_search_keys)
2189
2244
  self.__adjust_cv(df, date_column, model_task_type)
@@ -2806,8 +2861,9 @@ class FeaturesEnricher(TransformerMixin):
2806
2861
  # save original order or rows
2807
2862
  original_index_name = df.index.name
2808
2863
  index_name = df.index.name or DEFAULT_INDEX
2809
- df = df.reset_index().reset_index(drop=True)
2810
- df = df.rename(columns={index_name: ORIGINAL_INDEX})
2864
+ original_order_name = "original_order"
2865
+ df = df.reset_index().rename(columns={index_name: ORIGINAL_INDEX})
2866
+ df = df.reset_index().rename(columns={DEFAULT_INDEX: original_order_name})
2811
2867
 
2812
2868
  # order by date and idempotent order by other keys
2813
2869
  if self.cv not in [CVType.time_series, CVType.blocked_time_series]:
@@ -2847,7 +2903,7 @@ class FeaturesEnricher(TransformerMixin):
2847
2903
  # return original order
2848
2904
  df = df.set_index(ORIGINAL_INDEX)
2849
2905
  df.index.name = original_index_name
2850
- # df = df.sort_index()
2906
+ df = df.sort_values(by=original_order_name).drop(columns=original_order_name)
2851
2907
 
2852
2908
  meaning_types[SYSTEM_RECORD_ID] = FileColumnMeaningType.SYSTEM_RECORD_ID
2853
2909
  return df
@@ -2966,6 +3022,7 @@ class FeaturesEnricher(TransformerMixin):
2966
3022
  return result_train, result_eval_sets
2967
3023
 
2968
3024
  def __prepare_feature_importances(self, trace_id: str, x_columns: List[str], silent=False):
3025
+ llm_source = "LLM with external data augmentation"
2969
3026
  if self._search_task is None:
2970
3027
  raise NotFittedError(self.bundle.get("transform_unfitted_enricher"))
2971
3028
  features_meta = self._search_task.get_all_features_metadata_v2()
@@ -2990,6 +3047,20 @@ class FeaturesEnricher(TransformerMixin):
2990
3047
  def list_or_single(lst: List[str], single: str):
2991
3048
  return lst or ([single] if single else [])
2992
3049
 
3050
+ def to_anchor(link: str, value: str) -> str:
3051
+ if not value:
3052
+ return ""
3053
+ elif not link:
3054
+ return value
3055
+ elif value == llm_source:
3056
+ return value
3057
+ else:
3058
+ return f"<a href='{link}' target='_blank' rel='noopener noreferrer'>{value}</a>"
3059
+
3060
+ def make_links(names: List[str], links: List[str]):
3061
+ all_links = [to_anchor(link, name) for name, link in itertools.zip_longest(names, links)]
3062
+ return ",".join(all_links)
3063
+
2993
3064
  features_meta.sort(key=lambda m: (-m.shap_value, m.name))
2994
3065
  for feature_meta in features_meta:
2995
3066
  if feature_meta.name in original_names_dict.keys():
@@ -3015,18 +3086,6 @@ class FeaturesEnricher(TransformerMixin):
3015
3086
  if len(feature_sample) > 30:
3016
3087
  feature_sample = feature_sample[:30] + "..."
3017
3088
 
3018
- def to_anchor(link: str, value: str) -> str:
3019
- if not value:
3020
- return ""
3021
- elif not link:
3022
- return value
3023
- else:
3024
- return f"<a href='{link}' target='_blank' rel='noopener noreferrer'>{value}</a>"
3025
-
3026
- def make_links(names: List[str], links: List[str]):
3027
- all_links = [to_anchor(link, name) for name, link in itertools.zip_longest(names, links)]
3028
- return ",".join(all_links)
3029
-
3030
3089
  internal_provider = feature_meta.data_provider or "Upgini"
3031
3090
  providers = list_or_single(feature_meta.data_providers, feature_meta.data_provider)
3032
3091
  provider_links = list_or_single(feature_meta.data_provider_links, feature_meta.data_provider_link)
@@ -3036,7 +3095,7 @@ class FeaturesEnricher(TransformerMixin):
3036
3095
  provider = to_anchor("https://upgini.com", "Upgini")
3037
3096
 
3038
3097
  internal_source = feature_meta.data_source or (
3039
- "LLM with external data augmentation"
3098
+ llm_source
3040
3099
  if not feature_meta.name.endswith("_country") and not feature_meta.name.endswith("_postal_code")
3041
3100
  else ""
3042
3101
  )
upgini/search_task.py CHANGED
@@ -57,7 +57,7 @@ class SearchTask:
57
57
  if logger is not None:
58
58
  self.logger = logger
59
59
  else:
60
- self.logger = logging.getLogger("muted_logger")
60
+ self.logger = logging.getLogger()
61
61
  self.logger.setLevel("FATAL")
62
62
  self.provider_metadata_v2: Optional[List[ProviderTaskMetadataV2]] = None
63
63
  self.unused_features_for_generation: Optional[List[str]] = None
upgini/utils/__init__.py CHANGED
@@ -1,6 +1,9 @@
1
1
  import itertools
2
2
  from typing import List, Tuple
3
3
 
4
+ import pandas as pd
5
+ from pandas.api.types import is_string_dtype
6
+
4
7
 
5
8
  def combine_search_keys(search_keys: List[str]) -> List[Tuple[str]]:
6
9
  combined_search_keys = []
@@ -8,3 +11,14 @@ def combine_search_keys(search_keys: List[str]) -> List[Tuple[str]]:
8
11
  for subset in itertools.combinations(search_keys, L):
9
12
  combined_search_keys.append(subset)
10
13
  return combined_search_keys
14
+
15
+
16
+ def find_numbers_with_decimal_comma(df: pd.DataFrame) -> pd.DataFrame:
17
+ tmp = df.head(10)
18
+ # all columns with sep="," will have dtype == 'object', i.e string
19
+ # sep="." will be casted to numeric automatically
20
+ return [
21
+ col
22
+ for col in tmp.columns
23
+ if is_string_dtype(tmp[col]) and tmp[col].astype("string").str.match("^[0-9]+,[0-9]*$").any()
24
+ ]
@@ -31,7 +31,7 @@ class DateTimeSearchKeyConverter:
31
31
  if logger is not None:
32
32
  self.logger = logger
33
33
  else:
34
- self.logger = logging.getLogger("muted_logger")
34
+ self.logger = logging.getLogger()
35
35
  self.logger.setLevel("FATAL")
36
36
  self.generated_features: List[str] = []
37
37
  self.bundle = bundle or get_custom_bundle()
@@ -1,9 +1,9 @@
1
1
  from logging import Logger
2
- from typing import Dict, List, Optional, Tuple, Union
2
+ from typing import Dict, List, Optional, Union
3
3
 
4
4
  import pandas as pd
5
5
 
6
- from upgini.metadata import SYSTEM_RECORD_ID, TARGET, ModelTaskType, SearchKey
6
+ from upgini.metadata import SORT_ID, SYSTEM_RECORD_ID, TARGET, ModelTaskType, SearchKey
7
7
  from upgini.resource_bundle import ResourceBundle
8
8
  from upgini.utils.datetime_utils import DateTimeSearchKeyConverter
9
9
  from upgini.utils.target_utils import define_task
@@ -16,17 +16,15 @@ def remove_fintech_duplicates(
16
16
  logger: Optional[Logger] = None,
17
17
  silent=False,
18
18
  bundle: ResourceBundle = None,
19
- ) -> Tuple[bool, pd.DataFrame]:
19
+ ) -> pd.DataFrame:
20
20
  # Base checks
21
- need_full_deduplication = True
22
-
23
21
  date_col = _get_column_by_key(search_keys, [SearchKey.DATE, SearchKey.DATETIME])
24
22
  if define_task(df[TARGET], date_col is not None, silent=True) != ModelTaskType.BINARY:
25
- return need_full_deduplication, df
23
+ return df
26
24
 
27
25
  date_col = _get_column_by_key(search_keys, [SearchKey.DATE, SearchKey.DATETIME])
28
26
  if date_col is None:
29
- return need_full_deduplication, df
27
+ return df
30
28
 
31
29
  personal_cols = []
32
30
  phone_col = _get_column_by_key(search_keys, SearchKey.PHONE)
@@ -39,13 +37,13 @@ def remove_fintech_duplicates(
39
37
  if hem_col:
40
38
  personal_cols.append(hem_col)
41
39
  if len(personal_cols) == 0:
42
- return need_full_deduplication, df
40
+ return df
43
41
 
44
42
  sub_df = df[personal_cols + [date_col, TARGET]]
45
43
 
46
44
  # Fast check for duplicates by personal keys
47
45
  if not sub_df[personal_cols].duplicated().any():
48
- return need_full_deduplication, df
46
+ return df
49
47
 
50
48
  grouped_by_personal_cols = sub_df.groupby(personal_cols, group_keys=False)
51
49
 
@@ -54,21 +52,19 @@ def remove_fintech_duplicates(
54
52
  total = len(uniques)
55
53
  diff_dates = len(uniques[uniques > 1])
56
54
  if diff_dates / total >= 0.6:
57
- return need_full_deduplication, df
55
+ return df
58
56
 
59
57
  # Additional checks
60
58
 
61
- need_full_deduplication = False
62
-
63
59
  duplicates = sub_df.duplicated(personal_cols, keep=False)
64
60
  duplicate_rows = sub_df[duplicates]
65
61
  if len(duplicate_rows) == 0:
66
- return need_full_deduplication, df
62
+ return df
67
63
 
68
64
  # if there is no different target values in personal keys duplicate rows
69
65
  nonunique_target_groups = grouped_by_personal_cols[TARGET].nunique() > 1
70
66
  if nonunique_target_groups.sum() == 0:
71
- return need_full_deduplication, df
67
+ return df
72
68
 
73
69
  def has_diff_target_within_60_days(rows):
74
70
  rows = rows.sort_values(by=date_col)
@@ -96,7 +92,7 @@ def remove_fintech_duplicates(
96
92
  df = df[~df.index.isin(rows_to_remove.index)]
97
93
  logger.info(f"Dataset shape after clean fintech duplicates: {df.shape}")
98
94
 
99
- return need_full_deduplication, df
95
+ return df
100
96
 
101
97
 
102
98
  def clean_full_duplicates(
@@ -109,8 +105,8 @@ def clean_full_duplicates(
109
105
  unique_columns = df.columns.tolist()
110
106
  if SYSTEM_RECORD_ID in unique_columns:
111
107
  unique_columns.remove(SYSTEM_RECORD_ID)
112
- if "sort_id" in unique_columns:
113
- unique_columns.remove("sort_id")
108
+ if SORT_ID in unique_columns:
109
+ unique_columns.remove(SORT_ID)
114
110
  logger.info(f"Dataset shape before clean duplicates: {df.shape}")
115
111
  df = df.drop_duplicates(subset=unique_columns)
116
112
  logger.info(f"Dataset shape after clean duplicates: {df.shape}")
@@ -55,7 +55,7 @@ class FeaturesValidator:
55
55
  return empty_or_constant_features
56
56
 
57
57
  @staticmethod
58
- def find_high_cardinality(df: pd.DataFrame):
58
+ def find_high_cardinality(df: pd.DataFrame) -> List[str]:
59
59
  # Remove high cardinality columns
60
60
  row_count = df.shape[0]
61
61
  return [
@@ -63,3 +63,11 @@ class FeaturesValidator:
63
63
  for i in df
64
64
  if (is_string_dtype(df[i]) or is_integer_dtype(df[i])) and (df[i].nunique() / row_count >= 0.9)
65
65
  ]
66
+
67
+ @staticmethod
68
+ def find_constant_features(df: pd.DataFrame) -> List[str]:
69
+ return [
70
+ i
71
+ for i in df
72
+ if df[i].nunique() == 1
73
+ ]
@@ -6,10 +6,8 @@ import pandas as pd
6
6
  from pandas.api.types import is_numeric_dtype
7
7
 
8
8
  from upgini.errors import ValidationError
9
- from upgini.metadata import SYSTEM_RECORD_ID, ModelTaskType
10
- from upgini.resource_bundle import ResourceBundle, bundle, get_custom_bundle
11
- from upgini.sampler.random_under_sampler import RandomUnderSampler
12
- from upgini.utils.warning_counter import WarningCounter
9
+ from upgini.metadata import ModelTaskType
10
+ from upgini.resource_bundle import bundle
13
11
 
14
12
 
15
13
  def correct_string_target(y: Union[pd.Series, np.ndarray]) -> Union[pd.Series, np.ndarray]:
@@ -74,110 +72,3 @@ def is_int_encoding(unique_values):
74
72
  return set(unique_values) == set(range(len(unique_values))) or set(unique_values) == set(
75
73
  range(1, len(unique_values) + 1)
76
74
  )
77
-
78
-
79
- def balance_undersample(
80
- df: pd.DataFrame,
81
- target_column: str,
82
- task_type: ModelTaskType,
83
- random_state: int,
84
- imbalance_threshold: int = 0.2,
85
- min_sample_threshold: int = 5000,
86
- binary_bootstrap_loops: int = 5,
87
- multiclass_bootstrap_loops: int = 2,
88
- logger: Optional[logging.Logger] = None,
89
- bundle: Optional[ResourceBundle] = None,
90
- warning_counter: Optional[WarningCounter] = None,
91
- ) -> pd.DataFrame:
92
- if logger is None:
93
- logger = logging.getLogger("muted_logger")
94
- logger.setLevel("FATAL")
95
- bundle = bundle or get_custom_bundle()
96
- if SYSTEM_RECORD_ID not in df.columns:
97
- raise Exception("System record id must be presented for undersampling")
98
-
99
- count = len(df)
100
- target = df[target_column].copy()
101
- target_classes_count = target.nunique()
102
-
103
- vc = target.value_counts()
104
- max_class_value = vc.index[0]
105
- min_class_value = vc.index[len(vc) - 1]
106
- max_class_count = vc[max_class_value]
107
- min_class_count = vc[min_class_value]
108
-
109
- min_class_percent = imbalance_threshold / target_classes_count
110
- min_class_threshold = min_class_percent * count
111
-
112
- resampled_data = df
113
- df = df.copy().sort_values(by=SYSTEM_RECORD_ID)
114
- if task_type == ModelTaskType.MULTICLASS:
115
- # Sort classes by rows count and find 25% quantile class
116
- classes = vc.index
117
- quantile25_idx = int(0.75 * len(classes)) - 1
118
- quantile25_class = classes[quantile25_idx]
119
- quantile25_class_cnt = vc[quantile25_class]
120
-
121
- if max_class_count > (quantile25_class_cnt * multiclass_bootstrap_loops):
122
- msg = bundle.get("imbalance_multiclass").format(quantile25_class, quantile25_class_cnt)
123
- logger.warning(msg)
124
- print(msg)
125
- if warning_counter:
126
- warning_counter.increment()
127
-
128
- # 25% and lower classes will stay as is. Higher classes will be downsampled
129
- sample_strategy = dict()
130
- for class_idx in range(quantile25_idx):
131
- # compare class count with count_of_quantile25_class * 2
132
- class_value = classes[class_idx]
133
- class_count = vc[class_value]
134
- sample_strategy[class_value] = min(class_count, quantile25_class_cnt * multiclass_bootstrap_loops)
135
- sampler = RandomUnderSampler(
136
- sampling_strategy=sample_strategy, random_state=random_state
137
- )
138
- X = df[SYSTEM_RECORD_ID]
139
- X = X.to_frame(SYSTEM_RECORD_ID)
140
- new_x, _ = sampler.fit_resample(X, target) # type: ignore
141
-
142
- resampled_data = df[df[SYSTEM_RECORD_ID].isin(new_x[SYSTEM_RECORD_ID])]
143
- elif len(df) > min_sample_threshold and min_class_count < min_sample_threshold / 2:
144
- msg = bundle.get("dataset_rarest_class_less_threshold").format(
145
- min_class_value, min_class_count, min_class_threshold, min_class_percent * 100
146
- )
147
- logger.warning(msg)
148
- print(msg)
149
- if warning_counter:
150
- warning_counter.increment()
151
-
152
- # fill up to min_sample_threshold by majority class
153
- minority_class = df[df[target_column] == min_class_value]
154
- majority_class = df[df[target_column] != min_class_value]
155
- sample_size = min(len(majority_class, min_sample_threshold - min_class_count))
156
- sampled_majority_class = majority_class.sample(
157
- n=sample_size, random_state=random_state
158
- )
159
- resampled_data = df[
160
- (df[SYSTEM_RECORD_ID].isin(minority_class[SYSTEM_RECORD_ID]))
161
- | (df[SYSTEM_RECORD_ID].isin(sampled_majority_class[SYSTEM_RECORD_ID]))
162
- ]
163
-
164
- elif max_class_count > min_class_count * binary_bootstrap_loops:
165
- msg = bundle.get("dataset_rarest_class_less_threshold").format(
166
- min_class_value, min_class_count, min_class_threshold, min_class_percent * 100
167
- )
168
- logger.warning(msg)
169
- print(msg)
170
- if warning_counter:
171
- warning_counter.increment()
172
-
173
- sampler = RandomUnderSampler(
174
- sampling_strategy={max_class_value: binary_bootstrap_loops * min_class_count}, random_state=random_state
175
- )
176
- X = df[SYSTEM_RECORD_ID]
177
- X = X.to_frame(SYSTEM_RECORD_ID)
178
- new_x, _ = sampler.fit_resample(X, target) # type: ignore
179
-
180
- resampled_data = df[df[SYSTEM_RECORD_ID].isin(new_x[SYSTEM_RECORD_ID])]
181
-
182
- logger.info(f"Shape after rebalance resampling: {resampled_data}")
183
- return resampled_data
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: upgini
3
- Version: 1.1.252a5
3
+ Version: 1.1.253
4
4
  Summary: Intelligent data search & enrichment for Machine Learning
5
5
  Home-page: https://upgini.com/
6
6
  Author: Upgini Developers
@@ -1,13 +1,13 @@
1
1
  upgini/__init__.py,sha256=asENHgEVHQBIkV-e_0IhE_ZWqkCG6398U3ZLrNzAH6k,407
2
2
  upgini/ads.py,sha256=mre6xn44wcC_fg63iLT_kTh4mViZqR9AKRJZAtpQz8Y,2592
3
- upgini/dataset.py,sha256=jNYcD86UhmC-3in51bnX51uoFFgXo0gfSvuHxag1VyE,47816
3
+ upgini/dataset.py,sha256=tLa0aEcT7XwVJz1AawXCIEj3vxsSBi-geKicuYpRIMw,48196
4
4
  upgini/errors.py,sha256=pdzQl3MKuK52yvncxMWMRWeSIOGhUFzpQoszoRFBOk0,958
5
- upgini/features_enricher.py,sha256=VQUrzZw4uPNw0PiunE7GjOebELOlelvaOcYh0aL9yeY,168016
5
+ upgini/features_enricher.py,sha256=dP6Oyhi4erESEGlVFA_j67lqhNqNvbkfM4FGpE_WaTU,171760
6
6
  upgini/fingerprint.js,sha256=VygVIQlN1v4NGZfjHqtRogOw8zjTnnMNJg_f7M5iGQU,33442
7
7
  upgini/http.py,sha256=eSG4gOpmCGlXmB6KIPNzAG8tRZNUjyYpMeUeHw_2li4,42264
8
8
  upgini/metadata.py,sha256=fwVxtkR6Mn4iRoOqV6BfMJvJrx65I3YwZUMbZjhPyOI,9673
9
9
  upgini/metrics.py,sha256=LS2MgEKgmn9VEXsKzxv3pBZ-q71mTnpWu6vL8fYgpo4,26727
10
- upgini/search_task.py,sha256=tmJ17WUxv3J5NWrYUJB_NKdZ792Ifz8Z8UnDXeQnpss,17077
10
+ upgini/search_task.py,sha256=5n4qGJmtu48s0-FHAtF3L5qVLMd1JVW3FJlM8dFbh-s,17063
11
11
  upgini/spinner.py,sha256=Dm1dQ5F_z_Ua2odLxZX7OypcOX9tSx_vE5MGaKtUmfw,1118
12
12
  upgini/version_validator.py,sha256=rDIncP6BEko4J2F2hUcMOtKm_vZbI4ICWcNcw8hrwM4,1400
13
13
  upgini/ads_management/__init__.py,sha256=qzyisOToVRP-tquAJD1PblZhNtMrOB8FiyF9JvfkvgE,50
@@ -21,7 +21,7 @@ upgini/autofe/operand.py,sha256=Rhy7Ky3we-I1Su1--dS4xdsO3K8neV4rqM_Q4xYE4ug,2779
21
21
  upgini/autofe/unary.py,sha256=gyMkrx9bfa3o19zS-4JaRlScHrfeZGBsYe7d_6ePT-0,2853
22
22
  upgini/autofe/vector.py,sha256=Qk7VmdwURNwVw7fIMEspWEo7HTiyUWCYIqu3hcWQQio,507
23
23
  upgini/data_source/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
24
- upgini/data_source/data_source_publisher.py,sha256=LZ8iZHGrGVP1c7xIk57zio9OqN3VmBS_S3NW9Xs2pL4,15124
24
+ upgini/data_source/data_source_publisher.py,sha256=ZMNyh1x1S3QkXkA-PTtBQ-sbOiANtNioEQs8VoQ24Lk,15110
25
25
  upgini/mdc/__init__.py,sha256=ETDh3JKbrDdPMOECiYLAa8lvKYe68mv4IY6fZa9FimA,1126
26
26
  upgini/mdc/context.py,sha256=Sl1S_InKlzzRxYqwJ2k24lawJdCKWgGJ-RIRfvzWJrk,1468
27
27
  upgini/normalizer/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -34,29 +34,29 @@ upgini/sampler/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
34
34
  upgini/sampler/base.py,sha256=CC-DvPbrN7zp5--SVFuUqkVmdWM_5F7R0Do98ETV82U,6421
35
35
  upgini/sampler/random_under_sampler.py,sha256=XU4c2swPIFxVXHOPpxgM2bUao0Xm-aoMmd6fKjIuV5s,4068
36
36
  upgini/sampler/utils.py,sha256=PYOk3kKSnFlyxcpdtDNLBEEhTB4lO_iP7pQHqeUcmAc,20211
37
- upgini/utils/__init__.py,sha256=XDL_YTIPiCbd8BPHwAx4HMDfLXP5xH8NFPEI0V9wOr4,336
37
+ upgini/utils/__init__.py,sha256=dQ4-s8-sZ5eOBZ-mH3gEwDHTdI0wI1bUAVgVqUKKPx4,786
38
38
  upgini/utils/base_search_key_detector.py,sha256=DGwhXLvc8i5VZWMDr0rncFfV5GEHdsCSnLGon_W9TPs,859
39
39
  upgini/utils/blocked_time_series.py,sha256=dMz5ewk3PsoeOrc3lDzInCVPS9u_2XQkV0W6PuMMjPg,3380
40
40
  upgini/utils/country_utils.py,sha256=1KXhLSNqkNYVL3on8-zK0Arc_SspUH7AMZvGZICysOU,6462
41
41
  upgini/utils/custom_loss_utils.py,sha256=DBslpjWGPt7xTeypt78baR59012SYphbPsO_YLKdilo,3972
42
42
  upgini/utils/cv_utils.py,sha256=Tn01RJvpZGZh0PUQUimlBkV-AXwe7s6yjCNFtw352Uc,3525
43
- upgini/utils/datetime_utils.py,sha256=b8pyNhrC8ni6apsLQivQOiKqu-37pU4EF3nNHPZqiN8,8713
44
- upgini/utils/deduplicate_utils.py,sha256=GRPwD8bXZNspKvf19W3SrYjqg1qQMDlZD-BDkHnKYyo,6176
43
+ upgini/utils/datetime_utils.py,sha256=P5no4mFgYpEP6oY524ebTKvKc3TBMJzAYpWdj210_Fw,8699
44
+ upgini/utils/deduplicate_utils.py,sha256=ckJrpU8Ruc_vcwIPTopbUjyJuNiseLHNAbQlLfhUCxo,5888
45
45
  upgini/utils/display_utils.py,sha256=tiq5sFOfMwkKCjQ7OGdyK_twe0Qdr9F3mzkW1QXSDog,10664
46
46
  upgini/utils/email_utils.py,sha256=3CvHXTSzlgLyGsQOXfRYVfFhfPy6OXG4uXOBWRaLfHg,3479
47
47
  upgini/utils/fallback_progress_bar.py,sha256=cdbd1XGcWm4Ed4eAqV2_St3z7uC_kkH22gEyrN5ub6M,1090
48
- upgini/utils/features_validator.py,sha256=iP8muF3PUf_aP9m7O3i3LPMuJPTNbw8rCAWqgvDt_h8,2369
48
+ upgini/utils/features_validator.py,sha256=VexG-9p63ni66Hf9T7dgP4iUAhpXqwo3sgMwBK_eii8,2565
49
49
  upgini/utils/format.py,sha256=Yv5cvvSs2bOLUzzNu96Pu33VMDNbabio92QepUj41jU,243
50
50
  upgini/utils/ip_utils.py,sha256=Zf3F2cnQmOCH09QLQHetpjMFu1PnD0cTmDymn0SnSy8,1672
51
51
  upgini/utils/phone_utils.py,sha256=JNSkF8G6mgsN8Czy11pamaJdsY6rBINEMpi7jbVt_RA,408
52
52
  upgini/utils/postal_code_utils.py,sha256=_8CR9tBqsPptQsmMUvnrCAmBaMIQSWH3JfJ4ly3x_zs,409
53
53
  upgini/utils/progress_bar.py,sha256=iNXyqT3vKCeHpfiG5HHwr7Lk2cTtKViM93Fl8iZnjGc,1564
54
54
  upgini/utils/sklearn_ext.py,sha256=fvuTWJ5AnT3ED9KSaQu_yIgW2JR19hFlaGDoVP3k60g,44027
55
- upgini/utils/target_utils.py,sha256=OzW1dlhW0tQj5FBR-iIIjdpzqIGfGFRoYePppP8yRhw,7204
55
+ upgini/utils/target_utils.py,sha256=DH812qcZ7Pvf9WVVb33fbwQjb1W9h1hXRNCCiG7Y6tI,2563
56
56
  upgini/utils/track_info.py,sha256=EPcJ13Jqa17_T0JjM37Ac9kWDz5Zk0GVsIZKutOb8aU,5207
57
57
  upgini/utils/warning_counter.py,sha256=dIWBB4dI5XRRJZudvIlqlIYKEiwLLPcXarsZuYRt338,227
58
- upgini-1.1.252a5.dist-info/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
59
- upgini-1.1.252a5.dist-info/METADATA,sha256=xynsF_WVgmRp0hcYCqjqvGfF3PcaBXUzzwqKlGX0HbE,48158
60
- upgini-1.1.252a5.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
61
- upgini-1.1.252a5.dist-info/top_level.txt,sha256=OFhTGiDIWKl5gFI49qvWq1R9IKflPaE2PekcbDXDtx4,7
62
- upgini-1.1.252a5.dist-info/RECORD,,
58
+ upgini-1.1.253.dist-info/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
59
+ upgini-1.1.253.dist-info/METADATA,sha256=6FwSFP4xzkd9GTHCyToBORKRQEriGSZKJPs1O1ujbcI,48156
60
+ upgini-1.1.253.dist-info/WHEEL,sha256=oiQVh_5PnQM0E3gPdiz09WCNmwiHDMaGer_elqB3coM,92
61
+ upgini-1.1.253.dist-info/top_level.txt,sha256=OFhTGiDIWKl5gFI49qvWq1R9IKflPaE2PekcbDXDtx4,7
62
+ upgini-1.1.253.dist-info/RECORD,,