upgini 1.2.23__tar.gz → 1.2.24a2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. {upgini-1.2.23 → upgini-1.2.24a2}/PKG-INFO +1 -1
  2. upgini-1.2.24a2/src/upgini/__about__.py +1 -0
  3. {upgini-1.2.23 → upgini-1.2.24a2}/src/upgini/metrics.py +3 -0
  4. {upgini-1.2.23 → upgini-1.2.24a2}/src/upgini/resource_bundle/strings.properties +1 -1
  5. {upgini-1.2.23 → upgini-1.2.24a2}/src/upgini/utils/deduplicate_utils.py +2 -2
  6. upgini-1.2.23/src/upgini/__about__.py +0 -1
  7. {upgini-1.2.23 → upgini-1.2.24a2}/.gitignore +0 -0
  8. {upgini-1.2.23 → upgini-1.2.24a2}/LICENSE +0 -0
  9. {upgini-1.2.23 → upgini-1.2.24a2}/README.md +0 -0
  10. {upgini-1.2.23 → upgini-1.2.24a2}/pyproject.toml +0 -0
  11. {upgini-1.2.23 → upgini-1.2.24a2}/src/upgini/__init__.py +0 -0
  12. {upgini-1.2.23 → upgini-1.2.24a2}/src/upgini/ads.py +0 -0
  13. {upgini-1.2.23 → upgini-1.2.24a2}/src/upgini/ads_management/__init__.py +0 -0
  14. {upgini-1.2.23 → upgini-1.2.24a2}/src/upgini/ads_management/ads_manager.py +0 -0
  15. {upgini-1.2.23 → upgini-1.2.24a2}/src/upgini/autofe/__init__.py +0 -0
  16. {upgini-1.2.23 → upgini-1.2.24a2}/src/upgini/autofe/all_operands.py +0 -0
  17. {upgini-1.2.23 → upgini-1.2.24a2}/src/upgini/autofe/binary.py +0 -0
  18. {upgini-1.2.23 → upgini-1.2.24a2}/src/upgini/autofe/date.py +0 -0
  19. {upgini-1.2.23 → upgini-1.2.24a2}/src/upgini/autofe/feature.py +0 -0
  20. {upgini-1.2.23 → upgini-1.2.24a2}/src/upgini/autofe/groupby.py +0 -0
  21. {upgini-1.2.23 → upgini-1.2.24a2}/src/upgini/autofe/operand.py +0 -0
  22. {upgini-1.2.23 → upgini-1.2.24a2}/src/upgini/autofe/unary.py +0 -0
  23. {upgini-1.2.23 → upgini-1.2.24a2}/src/upgini/autofe/vector.py +0 -0
  24. {upgini-1.2.23 → upgini-1.2.24a2}/src/upgini/data_source/__init__.py +0 -0
  25. {upgini-1.2.23 → upgini-1.2.24a2}/src/upgini/data_source/data_source_publisher.py +0 -0
  26. {upgini-1.2.23 → upgini-1.2.24a2}/src/upgini/dataset.py +0 -0
  27. {upgini-1.2.23 → upgini-1.2.24a2}/src/upgini/errors.py +0 -0
  28. {upgini-1.2.23 → upgini-1.2.24a2}/src/upgini/features_enricher.py +0 -0
  29. {upgini-1.2.23 → upgini-1.2.24a2}/src/upgini/http.py +0 -0
  30. {upgini-1.2.23 → upgini-1.2.24a2}/src/upgini/lazy_import.py +0 -0
  31. {upgini-1.2.23 → upgini-1.2.24a2}/src/upgini/mdc/__init__.py +0 -0
  32. {upgini-1.2.23 → upgini-1.2.24a2}/src/upgini/mdc/context.py +0 -0
  33. {upgini-1.2.23 → upgini-1.2.24a2}/src/upgini/metadata.py +0 -0
  34. {upgini-1.2.23 → upgini-1.2.24a2}/src/upgini/normalizer/__init__.py +0 -0
  35. {upgini-1.2.23 → upgini-1.2.24a2}/src/upgini/normalizer/normalize_utils.py +0 -0
  36. {upgini-1.2.23 → upgini-1.2.24a2}/src/upgini/resource_bundle/__init__.py +0 -0
  37. {upgini-1.2.23 → upgini-1.2.24a2}/src/upgini/resource_bundle/exceptions.py +0 -0
  38. {upgini-1.2.23 → upgini-1.2.24a2}/src/upgini/resource_bundle/strings_widget.properties +0 -0
  39. {upgini-1.2.23 → upgini-1.2.24a2}/src/upgini/sampler/__init__.py +0 -0
  40. {upgini-1.2.23 → upgini-1.2.24a2}/src/upgini/sampler/base.py +0 -0
  41. {upgini-1.2.23 → upgini-1.2.24a2}/src/upgini/sampler/random_under_sampler.py +0 -0
  42. {upgini-1.2.23 → upgini-1.2.24a2}/src/upgini/sampler/utils.py +0 -0
  43. {upgini-1.2.23 → upgini-1.2.24a2}/src/upgini/search_task.py +0 -0
  44. {upgini-1.2.23 → upgini-1.2.24a2}/src/upgini/spinner.py +0 -0
  45. {upgini-1.2.23 → upgini-1.2.24a2}/src/upgini/utils/__init__.py +0 -0
  46. {upgini-1.2.23 → upgini-1.2.24a2}/src/upgini/utils/base_search_key_detector.py +0 -0
  47. {upgini-1.2.23 → upgini-1.2.24a2}/src/upgini/utils/blocked_time_series.py +0 -0
  48. {upgini-1.2.23 → upgini-1.2.24a2}/src/upgini/utils/country_utils.py +0 -0
  49. {upgini-1.2.23 → upgini-1.2.24a2}/src/upgini/utils/custom_loss_utils.py +0 -0
  50. {upgini-1.2.23 → upgini-1.2.24a2}/src/upgini/utils/cv_utils.py +0 -0
  51. {upgini-1.2.23 → upgini-1.2.24a2}/src/upgini/utils/datetime_utils.py +0 -0
  52. {upgini-1.2.23 → upgini-1.2.24a2}/src/upgini/utils/display_utils.py +0 -0
  53. {upgini-1.2.23 → upgini-1.2.24a2}/src/upgini/utils/email_utils.py +0 -0
  54. {upgini-1.2.23 → upgini-1.2.24a2}/src/upgini/utils/fallback_progress_bar.py +0 -0
  55. {upgini-1.2.23 → upgini-1.2.24a2}/src/upgini/utils/features_validator.py +0 -0
  56. {upgini-1.2.23 → upgini-1.2.24a2}/src/upgini/utils/format.py +0 -0
  57. {upgini-1.2.23 → upgini-1.2.24a2}/src/upgini/utils/ip_utils.py +0 -0
  58. {upgini-1.2.23 → upgini-1.2.24a2}/src/upgini/utils/phone_utils.py +0 -0
  59. {upgini-1.2.23 → upgini-1.2.24a2}/src/upgini/utils/postal_code_utils.py +0 -0
  60. {upgini-1.2.23 → upgini-1.2.24a2}/src/upgini/utils/progress_bar.py +0 -0
  61. {upgini-1.2.23 → upgini-1.2.24a2}/src/upgini/utils/sklearn_ext.py +0 -0
  62. {upgini-1.2.23 → upgini-1.2.24a2}/src/upgini/utils/target_utils.py +0 -0
  63. {upgini-1.2.23 → upgini-1.2.24a2}/src/upgini/utils/track_info.py +0 -0
  64. {upgini-1.2.23 → upgini-1.2.24a2}/src/upgini/utils/warning_counter.py +0 -0
  65. {upgini-1.2.23 → upgini-1.2.24a2}/src/upgini/version_validator.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: upgini
3
- Version: 1.2.23
3
+ Version: 1.2.24a2
4
4
  Summary: Intelligent data search & enrichment for Machine Learning
5
5
  Project-URL: Bug Reports, https://github.com/upgini/upgini/issues
6
6
  Project-URL: Homepage, https://upgini.com/
@@ -0,0 +1 @@
1
+ __version__ = "1.2.24a2"
@@ -273,6 +273,9 @@ class EstimatorWrapper:
273
273
  else:
274
274
  x, y = self._remove_empty_target_rows(x, y)
275
275
 
276
+ # Make order of columns idempotent
277
+ x = x[sorted(x.columns)]
278
+
276
279
  self.logger.info(f"After preparing data columns: {x.columns.to_list()}")
277
280
  return x, y, groups
278
281
 
@@ -171,7 +171,7 @@ dataset_invalid_column_type=Unsupported data type of column {}: {}
171
171
  dataset_invalid_filter=Unknown field in filter_features. Should be {'min_importance', 'max_psi', 'max_count', 'selected_features'}.
172
172
  dataset_too_big_file=Too big size of dataframe X for processing. Please reduce number of rows or columns
173
173
  dataset_transform_diff_fit=You try to enrich dataset that column names are different from the train dataset column names that you used on the fit stage. Please make the column names the same as in the train dataset and restart.
174
- binary_small_dataset=The least populated class in Target contains less than 1000 rows.\nSmall numbers of observations may negatively affect the number of selected features and quality of your ML model.\nUpgini recommends you increase the number of observations in the least populated class.
174
+ binary_small_dataset=The least populated class in Target contains less than 1000 rows.\nSmall numbers of observations may negatively affect the number of selected features and quality of your ML model.\nUpgini recommends you increase the number of observations in the least populated class.\n
175
175
  all_search_keys_invalid=All search keys are invalid
176
176
  all_emails_invalid=All values in column {} are invalid emails # Metrics validation
177
177
  metrics_msle_negative_target=Mean Squared Logarithmic Error cannot be used when y contain negative values
@@ -24,7 +24,7 @@ def remove_fintech_duplicates(
24
24
  date_format: Optional[str] = None,
25
25
  logger: Optional[Logger] = None,
26
26
  bundle: ResourceBundle = None,
27
- ) -> tuple[pd.DataFrame, Optional[List[str]]]:
27
+ ) -> Tuple[pd.DataFrame, Optional[List[str]]]:
28
28
  # Initial checks for target type and date column
29
29
  bundle = bundle or get_custom_bundle()
30
30
  if logger is None:
@@ -60,7 +60,7 @@ def remove_fintech_duplicates(
60
60
 
61
61
  warning_messages = []
62
62
 
63
- def process_df(segment_df: pd.DataFrame, eval_index=0) -> tuple[pd.DataFrame, Optional[str]]:
63
+ def process_df(segment_df: pd.DataFrame, eval_index=0) -> Tuple[pd.DataFrame, Optional[str]]:
64
64
  """Process a subset of the dataset to remove duplicates based on personal keys."""
65
65
  # Fast check for duplicates based on personal keys
66
66
  if not segment_df[personal_cols].duplicated().any():
@@ -1 +0,0 @@
1
- __version__ = "1.2.23"
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes