upgini 1.2.23__py3-none-any.whl → 1.2.24__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of upgini might be problematic. Click here for more details.

upgini/__about__.py CHANGED
@@ -1 +1 @@
1
- __version__ = "1.2.23"
1
+ __version__ = "1.2.24"
upgini/metrics.py CHANGED
@@ -273,6 +273,9 @@ class EstimatorWrapper:
273
273
  else:
274
274
  x, y = self._remove_empty_target_rows(x, y)
275
275
 
276
+ # Make order of columns idempotent
277
+ x = x[sorted(x.columns)]
278
+
276
279
  self.logger.info(f"After preparing data columns: {x.columns.to_list()}")
277
280
  return x, y, groups
278
281
 
@@ -434,7 +437,8 @@ class EstimatorWrapper:
434
437
  f"Client cat_feature `{cat_feature}` not found in x columns: {x.columns.to_list()}"
435
438
  )
436
439
  estimator_copy.set_params(
437
- cat_features=[x.columns.get_loc(cat_feature) for cat_feature in cat_features]
440
+ # cat_features=[x.columns.get_loc(cat_feature) for cat_feature in cat_features]
441
+ cat_features=cat_features
438
442
  )
439
443
  estimator = CatBoostWrapper(**kwargs)
440
444
  else:
@@ -171,7 +171,7 @@ dataset_invalid_column_type=Unsupported data type of column {}: {}
171
171
  dataset_invalid_filter=Unknown field in filter_features. Should be {'min_importance', 'max_psi', 'max_count', 'selected_features'}.
172
172
  dataset_too_big_file=Too big size of dataframe X for processing. Please reduce number of rows or columns
173
173
  dataset_transform_diff_fit=You try to enrich dataset that column names are different from the train dataset column names that you used on the fit stage. Please make the column names the same as in the train dataset and restart.
174
- binary_small_dataset=The least populated class in Target contains less than 1000 rows.\nSmall numbers of observations may negatively affect the number of selected features and quality of your ML model.\nUpgini recommends you increase the number of observations in the least populated class.
174
+ binary_small_dataset=The least populated class in Target contains less than 1000 rows.\nSmall numbers of observations may negatively affect the number of selected features and quality of your ML model.\nUpgini recommends you increase the number of observations in the least populated class.\n
175
175
  all_search_keys_invalid=All search keys are invalid
176
176
  all_emails_invalid=All values in column {} are invalid emails # Metrics validation
177
177
  metrics_msle_negative_target=Mean Squared Logarithmic Error cannot be used when y contain negative values
@@ -24,7 +24,7 @@ def remove_fintech_duplicates(
24
24
  date_format: Optional[str] = None,
25
25
  logger: Optional[Logger] = None,
26
26
  bundle: ResourceBundle = None,
27
- ) -> tuple[pd.DataFrame, Optional[List[str]]]:
27
+ ) -> Tuple[pd.DataFrame, Optional[List[str]]]:
28
28
  # Initial checks for target type and date column
29
29
  bundle = bundle or get_custom_bundle()
30
30
  if logger is None:
@@ -60,7 +60,7 @@ def remove_fintech_duplicates(
60
60
 
61
61
  warning_messages = []
62
62
 
63
- def process_df(segment_df: pd.DataFrame, eval_index=0) -> tuple[pd.DataFrame, Optional[str]]:
63
+ def process_df(segment_df: pd.DataFrame, eval_index=0) -> Tuple[pd.DataFrame, Optional[str]]:
64
64
  """Process a subset of the dataset to remove duplicates based on personal keys."""
65
65
  # Fast check for duplicates based on personal keys
66
66
  if not segment_df[personal_cols].duplicated().any():
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: upgini
3
- Version: 1.2.23
3
+ Version: 1.2.24
4
4
  Summary: Intelligent data search & enrichment for Machine Learning
5
5
  Project-URL: Bug Reports, https://github.com/upgini/upgini/issues
6
6
  Project-URL: Homepage, https://upgini.com/
@@ -1,4 +1,4 @@
1
- upgini/__about__.py,sha256=y_Ev8AcJxzZe4ZJWlW3Wsver97OJUqU1nFSDqHzKBDw,23
1
+ upgini/__about__.py,sha256=rRcFnLqwG22zZ399qswskAE5L_if50hEsd_TKzGcrZ4,23
2
2
  upgini/__init__.py,sha256=M64LwQTBa-5Jz24Zm2h8rWwlKQQ1J8nP7gGgIciS0WU,589
3
3
  upgini/ads.py,sha256=nvuRxRx5MHDMgPr9SiU-fsqRdFaBv8p4_v1oqiysKpc,2714
4
4
  upgini/dataset.py,sha256=iPFiMJtk4HF1ytw9wCQr8H9RfoOKj_TIo8XYZKWgcMc,31331
@@ -7,7 +7,7 @@ upgini/features_enricher.py,sha256=rctS3kRWwTJmU5X203t7sUZ_B40XYVBPeXy_0hPw2Ec,1
7
7
  upgini/http.py,sha256=21asexflvavydzCOONJDGQBtQanCElrbnqLXakJ9Cu8,42880
8
8
  upgini/lazy_import.py,sha256=74gQ8JuA48BGRLxAo7lNHNKY2D2emMxrUxKGdxVGhuY,1012
9
9
  upgini/metadata.py,sha256=osmzdNESeh7yP3BZday6N9Q3eaIHfzhhRM1d6NSgcf0,11223
10
- upgini/metrics.py,sha256=10Cg_6cqIOsZyz2tO4GGbCxiBH7lGb35Vh-pR6IUzLg,34459
10
+ upgini/metrics.py,sha256=PoY1fq6XYAHNzn-rmnwRQZjCoVYP5bJNmKhR0ST2Txk,34588
11
11
  upgini/search_task.py,sha256=qxUxAD-bed-FpZYmTB_4orW7YJsW_O6a1TcgnZIRFr4,17307
12
12
  upgini/spinner.py,sha256=4iMd-eIe_BnkqFEMIliULTbj6rNI2HkN_VJ4qYe0cUc,1118
13
13
  upgini/version_validator.py,sha256=h1GViOWzULy5vf6M4dpTJuIk-4V38UCrTY1sb9yLa5I,1594
@@ -30,7 +30,7 @@ upgini/normalizer/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU
30
30
  upgini/normalizer/normalize_utils.py,sha256=Ft2MwSgVoBilXAORAOYAuwPD79GOLfwn4qQE3IUFzzg,7218
31
31
  upgini/resource_bundle/__init__.py,sha256=S5F2G47pnJd2LDpmFsjDqEwiKkP8Hm-hcseDbMka6Ko,8345
32
32
  upgini/resource_bundle/exceptions.py,sha256=5fRvx0_vWdE1-7HcSgF0tckB4A9AKyf5RiinZkInTsI,621
33
- upgini/resource_bundle/strings.properties,sha256=wxdxH13ncXjRion__GCM_ecllCDjGqOhOxZ41beFslg,26665
33
+ upgini/resource_bundle/strings.properties,sha256=ikL5KvPcJz9fGyVK-xOvvo6LyRfeOey8xXjoq5nnWqU,26667
34
34
  upgini/resource_bundle/strings_widget.properties,sha256=gOdqvZWntP2LCza_tyVk1_yRYcG4c04K9sQOAVhF_gw,1577
35
35
  upgini/sampler/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
36
36
  upgini/sampler/base.py,sha256=7GpjYqjOp58vYcJLiX__1R5wjUlyQbxvHJ2klFnup_M,6389
@@ -43,7 +43,7 @@ upgini/utils/country_utils.py,sha256=lY-eXWwFVegdVENFttbvLcgGDjFO17Sex8hd2PyJaRk
43
43
  upgini/utils/custom_loss_utils.py,sha256=kieNZYBYZm5ZGBltF1F_jOSF4ea6C29rYuCyiDcqVNY,3857
44
44
  upgini/utils/cv_utils.py,sha256=w6FQb9nO8BWDx88EF83NpjPLarK4eR4ia0Wg0kLBJC4,3525
45
45
  upgini/utils/datetime_utils.py,sha256=a8X4jX2y3-6E7ZNZIG5z61qfzCvsvaNEjR1Bi5KUqfM,11279
46
- upgini/utils/deduplicate_utils.py,sha256=kINO1KoH8kPRA3JSYogzv4jaUP1Ceguv5etBPtLcsSw,8855
46
+ upgini/utils/deduplicate_utils.py,sha256=SMZx9IKIhWI5HqXepfKiQb3uDJrogQZtG6jcWuMo5Z4,8855
47
47
  upgini/utils/display_utils.py,sha256=NGhki1aGMsS8OeI69eLXEpmS_s41k8ojKHQxacJaXiU,11493
48
48
  upgini/utils/email_utils.py,sha256=j0Ug1R_0AnCg1Y92zIZ4XMwvKo3G5_pcOlBN1OH_gZs,5191
49
49
  upgini/utils/fallback_progress_bar.py,sha256=PDaKb8dYpVZaWMroNcOHsTc3pSjgi9mOm0--cOFTwJ0,1074
@@ -57,7 +57,7 @@ upgini/utils/sklearn_ext.py,sha256=13jQS_k7v0aUtudXV6nGUEWjttPQzAW9AFYL5wgEz9k,4
57
57
  upgini/utils/target_utils.py,sha256=qHzZRmICFbLNCrmVqGkaBcjm91L2ERRZMppci36acV4,10085
58
58
  upgini/utils/track_info.py,sha256=G5Lu1xxakg2_TQjKZk4b5SvrHsATTXNVV3NbvWtT8k8,5663
59
59
  upgini/utils/warning_counter.py,sha256=-GRY8EUggEBKODPSuXAkHn9KnEQwAORC0mmz_tim-PM,254
60
- upgini-1.2.23.dist-info/METADATA,sha256=DGV0FR8F9_7casA5R9U3b22oSUhXuZeX0RfNGnMgnQ8,48578
61
- upgini-1.2.23.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
62
- upgini-1.2.23.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
63
- upgini-1.2.23.dist-info/RECORD,,
60
+ upgini-1.2.24.dist-info/METADATA,sha256=eRRiMIY75gP4H4Y20_D9dmut5jCgx_siV-TrG_VA_qg,48578
61
+ upgini-1.2.24.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
62
+ upgini-1.2.24.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
63
+ upgini-1.2.24.dist-info/RECORD,,