upgini 1.2.121a1__tar.gz → 1.2.121a3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (82) hide show
  1. {upgini-1.2.121a1 → upgini-1.2.121a3}/PKG-INFO +1 -1
  2. upgini-1.2.121a3/src/upgini/__about__.py +1 -0
  3. {upgini-1.2.121a1 → upgini-1.2.121a3}/src/upgini/resource_bundle/strings.properties +1 -1
  4. {upgini-1.2.121a1 → upgini-1.2.121a3}/src/upgini/utils/features_validator.py +13 -9
  5. upgini-1.2.121a1/src/upgini/__about__.py +0 -1
  6. {upgini-1.2.121a1 → upgini-1.2.121a3}/.gitignore +0 -0
  7. {upgini-1.2.121a1 → upgini-1.2.121a3}/LICENSE +0 -0
  8. {upgini-1.2.121a1 → upgini-1.2.121a3}/README.md +0 -0
  9. {upgini-1.2.121a1 → upgini-1.2.121a3}/pyproject.toml +0 -0
  10. {upgini-1.2.121a1 → upgini-1.2.121a3}/src/upgini/__init__.py +0 -0
  11. {upgini-1.2.121a1 → upgini-1.2.121a3}/src/upgini/ads.py +0 -0
  12. {upgini-1.2.121a1 → upgini-1.2.121a3}/src/upgini/ads_management/__init__.py +0 -0
  13. {upgini-1.2.121a1 → upgini-1.2.121a3}/src/upgini/ads_management/ads_manager.py +0 -0
  14. {upgini-1.2.121a1 → upgini-1.2.121a3}/src/upgini/autofe/__init__.py +0 -0
  15. {upgini-1.2.121a1 → upgini-1.2.121a3}/src/upgini/autofe/all_operators.py +0 -0
  16. {upgini-1.2.121a1 → upgini-1.2.121a3}/src/upgini/autofe/binary.py +0 -0
  17. {upgini-1.2.121a1 → upgini-1.2.121a3}/src/upgini/autofe/date.py +0 -0
  18. {upgini-1.2.121a1 → upgini-1.2.121a3}/src/upgini/autofe/feature.py +0 -0
  19. {upgini-1.2.121a1 → upgini-1.2.121a3}/src/upgini/autofe/groupby.py +0 -0
  20. {upgini-1.2.121a1 → upgini-1.2.121a3}/src/upgini/autofe/operator.py +0 -0
  21. {upgini-1.2.121a1 → upgini-1.2.121a3}/src/upgini/autofe/timeseries/__init__.py +0 -0
  22. {upgini-1.2.121a1 → upgini-1.2.121a3}/src/upgini/autofe/timeseries/base.py +0 -0
  23. {upgini-1.2.121a1 → upgini-1.2.121a3}/src/upgini/autofe/timeseries/cross.py +0 -0
  24. {upgini-1.2.121a1 → upgini-1.2.121a3}/src/upgini/autofe/timeseries/delta.py +0 -0
  25. {upgini-1.2.121a1 → upgini-1.2.121a3}/src/upgini/autofe/timeseries/lag.py +0 -0
  26. {upgini-1.2.121a1 → upgini-1.2.121a3}/src/upgini/autofe/timeseries/roll.py +0 -0
  27. {upgini-1.2.121a1 → upgini-1.2.121a3}/src/upgini/autofe/timeseries/trend.py +0 -0
  28. {upgini-1.2.121a1 → upgini-1.2.121a3}/src/upgini/autofe/timeseries/volatility.py +0 -0
  29. {upgini-1.2.121a1 → upgini-1.2.121a3}/src/upgini/autofe/unary.py +0 -0
  30. {upgini-1.2.121a1 → upgini-1.2.121a3}/src/upgini/autofe/utils.py +0 -0
  31. {upgini-1.2.121a1 → upgini-1.2.121a3}/src/upgini/autofe/vector.py +0 -0
  32. {upgini-1.2.121a1 → upgini-1.2.121a3}/src/upgini/data_source/__init__.py +0 -0
  33. {upgini-1.2.121a1 → upgini-1.2.121a3}/src/upgini/data_source/data_source_publisher.py +0 -0
  34. {upgini-1.2.121a1 → upgini-1.2.121a3}/src/upgini/dataset.py +0 -0
  35. {upgini-1.2.121a1 → upgini-1.2.121a3}/src/upgini/errors.py +0 -0
  36. {upgini-1.2.121a1 → upgini-1.2.121a3}/src/upgini/features_enricher.py +0 -0
  37. {upgini-1.2.121a1 → upgini-1.2.121a3}/src/upgini/http.py +0 -0
  38. {upgini-1.2.121a1 → upgini-1.2.121a3}/src/upgini/mdc/__init__.py +0 -0
  39. {upgini-1.2.121a1 → upgini-1.2.121a3}/src/upgini/mdc/context.py +0 -0
  40. {upgini-1.2.121a1 → upgini-1.2.121a3}/src/upgini/metadata.py +0 -0
  41. {upgini-1.2.121a1 → upgini-1.2.121a3}/src/upgini/metrics.py +0 -0
  42. {upgini-1.2.121a1 → upgini-1.2.121a3}/src/upgini/normalizer/__init__.py +0 -0
  43. {upgini-1.2.121a1 → upgini-1.2.121a3}/src/upgini/normalizer/normalize_utils.py +0 -0
  44. {upgini-1.2.121a1 → upgini-1.2.121a3}/src/upgini/resource_bundle/__init__.py +0 -0
  45. {upgini-1.2.121a1 → upgini-1.2.121a3}/src/upgini/resource_bundle/exceptions.py +0 -0
  46. {upgini-1.2.121a1 → upgini-1.2.121a3}/src/upgini/resource_bundle/strings_widget.properties +0 -0
  47. {upgini-1.2.121a1 → upgini-1.2.121a3}/src/upgini/sampler/__init__.py +0 -0
  48. {upgini-1.2.121a1 → upgini-1.2.121a3}/src/upgini/sampler/base.py +0 -0
  49. {upgini-1.2.121a1 → upgini-1.2.121a3}/src/upgini/sampler/random_under_sampler.py +0 -0
  50. {upgini-1.2.121a1 → upgini-1.2.121a3}/src/upgini/sampler/utils.py +0 -0
  51. {upgini-1.2.121a1 → upgini-1.2.121a3}/src/upgini/search_task.py +0 -0
  52. {upgini-1.2.121a1 → upgini-1.2.121a3}/src/upgini/spinner.py +0 -0
  53. {upgini-1.2.121a1 → upgini-1.2.121a3}/src/upgini/utils/Roboto-Regular.ttf +0 -0
  54. {upgini-1.2.121a1 → upgini-1.2.121a3}/src/upgini/utils/__init__.py +0 -0
  55. {upgini-1.2.121a1 → upgini-1.2.121a3}/src/upgini/utils/base_search_key_detector.py +0 -0
  56. {upgini-1.2.121a1 → upgini-1.2.121a3}/src/upgini/utils/blocked_time_series.py +0 -0
  57. {upgini-1.2.121a1 → upgini-1.2.121a3}/src/upgini/utils/config.py +0 -0
  58. {upgini-1.2.121a1 → upgini-1.2.121a3}/src/upgini/utils/country_utils.py +0 -0
  59. {upgini-1.2.121a1 → upgini-1.2.121a3}/src/upgini/utils/custom_loss_utils.py +0 -0
  60. {upgini-1.2.121a1 → upgini-1.2.121a3}/src/upgini/utils/cv_utils.py +0 -0
  61. {upgini-1.2.121a1 → upgini-1.2.121a3}/src/upgini/utils/datetime_utils.py +0 -0
  62. {upgini-1.2.121a1 → upgini-1.2.121a3}/src/upgini/utils/deduplicate_utils.py +0 -0
  63. {upgini-1.2.121a1 → upgini-1.2.121a3}/src/upgini/utils/display_utils.py +0 -0
  64. {upgini-1.2.121a1 → upgini-1.2.121a3}/src/upgini/utils/email_utils.py +0 -0
  65. {upgini-1.2.121a1 → upgini-1.2.121a3}/src/upgini/utils/fallback_progress_bar.py +0 -0
  66. {upgini-1.2.121a1 → upgini-1.2.121a3}/src/upgini/utils/feature_info.py +0 -0
  67. {upgini-1.2.121a1 → upgini-1.2.121a3}/src/upgini/utils/format.py +0 -0
  68. {upgini-1.2.121a1 → upgini-1.2.121a3}/src/upgini/utils/hash_utils.py +0 -0
  69. {upgini-1.2.121a1 → upgini-1.2.121a3}/src/upgini/utils/ip_utils.py +0 -0
  70. {upgini-1.2.121a1 → upgini-1.2.121a3}/src/upgini/utils/mstats.py +0 -0
  71. {upgini-1.2.121a1 → upgini-1.2.121a3}/src/upgini/utils/phone_utils.py +0 -0
  72. {upgini-1.2.121a1 → upgini-1.2.121a3}/src/upgini/utils/postal_code_utils.py +0 -0
  73. {upgini-1.2.121a1 → upgini-1.2.121a3}/src/upgini/utils/progress_bar.py +0 -0
  74. {upgini-1.2.121a1 → upgini-1.2.121a3}/src/upgini/utils/psi.py +0 -0
  75. {upgini-1.2.121a1 → upgini-1.2.121a3}/src/upgini/utils/sample_utils.py +0 -0
  76. {upgini-1.2.121a1 → upgini-1.2.121a3}/src/upgini/utils/sklearn_ext.py +0 -0
  77. {upgini-1.2.121a1 → upgini-1.2.121a3}/src/upgini/utils/sort.py +0 -0
  78. {upgini-1.2.121a1 → upgini-1.2.121a3}/src/upgini/utils/target_utils.py +0 -0
  79. {upgini-1.2.121a1 → upgini-1.2.121a3}/src/upgini/utils/track_info.py +0 -0
  80. {upgini-1.2.121a1 → upgini-1.2.121a3}/src/upgini/utils/ts_utils.py +0 -0
  81. {upgini-1.2.121a1 → upgini-1.2.121a3}/src/upgini/utils/warning_counter.py +0 -0
  82. {upgini-1.2.121a1 → upgini-1.2.121a3}/src/upgini/version_validator.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: upgini
3
- Version: 1.2.121a1
3
+ Version: 1.2.121a3
4
4
  Summary: Intelligent data search & enrichment for Machine Learning
5
5
  Project-URL: Bug Reports, https://github.com/upgini/upgini/issues
6
6
  Project-URL: Homepage, https://upgini.com/
@@ -0,0 +1 @@
1
+ __version__ = "1.2.121a3"
@@ -155,7 +155,7 @@ target_outliers_warning=We detected {} outliers in your sample.\nExamples of out
155
155
  # features validation
156
156
  empty_or_contant_features=Columns {} has value with frequency more than 99%, removed from X
157
157
  high_cardinality_features=Columns {} has high cardinality (>90% unique values), removed from X
158
- # one_hot_encoded_features=One hot encoded features detected. Use int encoding for correct results of fit.\n{}
158
+ one_hot_encoded_features=One hot encoded features detected: {}
159
159
 
160
160
  # Dataset validation
161
161
  dataset_too_few_rows=X size should be at least {} rows after validation
@@ -36,9 +36,6 @@ class FeaturesValidator:
36
36
  value_counts = column.value_counts(dropna=False, normalize=True)
37
37
  most_frequent_percent = value_counts.iloc[0]
38
38
 
39
- if most_frequent_percent >= 0.99:
40
- empty_or_constant_features.append(f)
41
-
42
39
  if len(value_counts) == 1:
43
40
  empty_or_constant_features.append(f)
44
41
  elif most_frequent_percent >= 0.99:
@@ -103,18 +100,25 @@ class FeaturesValidator:
103
100
  @staticmethod
104
101
  def is_one_hot_encoded(series: pd.Series) -> bool:
105
102
  try:
106
- # Column contains only 0 and 1 (as strings or numbers)
107
- series = series.astype(float)
108
- if set(series.unique()) != {0.0, 1.0}:
103
+ # First, handle string representations of True/False
104
+ series_copy = series.copy()
105
+ if series_copy.dtype == "object" or series_copy.dtype == "string":
106
+ # Convert string representations of boolean values to numeric
107
+ series_copy = series_copy.astype(str).str.strip().str.lower()
108
+ series_copy = series_copy.replace({"true": "1", "false": "0"})
109
+
110
+ # Column contains only 0 and 1 (as strings or numbers or booleans)
111
+ series_copy = series_copy.astype(float)
112
+ if set(series_copy.unique()) != {0.0, 1.0}:
109
113
  return False
110
114
 
111
- series = series.astype(int)
115
+ series_copy = series_copy.astype(int)
112
116
 
113
117
  # Column doesn't contain any NaN, np.NaN, space, null, etc.
114
- if not (series.isin([0, 1])).all():
118
+ if not (series_copy.isin([0, 1])).all():
115
119
  return False
116
120
 
117
- vc = series.value_counts()
121
+ vc = series_copy.value_counts()
118
122
  # Column should contain both 0 and 1
119
123
  if len(vc) != 2:
120
124
  return False
@@ -1 +0,0 @@
1
- __version__ = "1.2.121a1"
File without changes
File without changes
File without changes
File without changes
File without changes