upgini 1.2.96a3906.dev1__tar.gz → 1.2.96a3906.dev2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (79) hide show
  1. {upgini-1.2.96a3906.dev1 → upgini-1.2.96a3906.dev2}/PKG-INFO +1 -1
  2. upgini-1.2.96a3906.dev2/src/upgini/__about__.py +1 -0
  3. {upgini-1.2.96a3906.dev1 → upgini-1.2.96a3906.dev2}/src/upgini/normalizer/normalize_utils.py +25 -23
  4. upgini-1.2.96a3906.dev1/src/upgini/__about__.py +0 -1
  5. {upgini-1.2.96a3906.dev1 → upgini-1.2.96a3906.dev2}/.gitignore +0 -0
  6. {upgini-1.2.96a3906.dev1 → upgini-1.2.96a3906.dev2}/LICENSE +0 -0
  7. {upgini-1.2.96a3906.dev1 → upgini-1.2.96a3906.dev2}/README.md +0 -0
  8. {upgini-1.2.96a3906.dev1 → upgini-1.2.96a3906.dev2}/pyproject.toml +0 -0
  9. {upgini-1.2.96a3906.dev1 → upgini-1.2.96a3906.dev2}/src/upgini/__init__.py +0 -0
  10. {upgini-1.2.96a3906.dev1 → upgini-1.2.96a3906.dev2}/src/upgini/ads.py +0 -0
  11. {upgini-1.2.96a3906.dev1 → upgini-1.2.96a3906.dev2}/src/upgini/ads_management/__init__.py +0 -0
  12. {upgini-1.2.96a3906.dev1 → upgini-1.2.96a3906.dev2}/src/upgini/ads_management/ads_manager.py +0 -0
  13. {upgini-1.2.96a3906.dev1 → upgini-1.2.96a3906.dev2}/src/upgini/autofe/__init__.py +0 -0
  14. {upgini-1.2.96a3906.dev1 → upgini-1.2.96a3906.dev2}/src/upgini/autofe/all_operators.py +0 -0
  15. {upgini-1.2.96a3906.dev1 → upgini-1.2.96a3906.dev2}/src/upgini/autofe/binary.py +0 -0
  16. {upgini-1.2.96a3906.dev1 → upgini-1.2.96a3906.dev2}/src/upgini/autofe/date.py +0 -0
  17. {upgini-1.2.96a3906.dev1 → upgini-1.2.96a3906.dev2}/src/upgini/autofe/feature.py +0 -0
  18. {upgini-1.2.96a3906.dev1 → upgini-1.2.96a3906.dev2}/src/upgini/autofe/groupby.py +0 -0
  19. {upgini-1.2.96a3906.dev1 → upgini-1.2.96a3906.dev2}/src/upgini/autofe/operator.py +0 -0
  20. {upgini-1.2.96a3906.dev1 → upgini-1.2.96a3906.dev2}/src/upgini/autofe/timeseries/__init__.py +0 -0
  21. {upgini-1.2.96a3906.dev1 → upgini-1.2.96a3906.dev2}/src/upgini/autofe/timeseries/base.py +0 -0
  22. {upgini-1.2.96a3906.dev1 → upgini-1.2.96a3906.dev2}/src/upgini/autofe/timeseries/cross.py +0 -0
  23. {upgini-1.2.96a3906.dev1 → upgini-1.2.96a3906.dev2}/src/upgini/autofe/timeseries/delta.py +0 -0
  24. {upgini-1.2.96a3906.dev1 → upgini-1.2.96a3906.dev2}/src/upgini/autofe/timeseries/lag.py +0 -0
  25. {upgini-1.2.96a3906.dev1 → upgini-1.2.96a3906.dev2}/src/upgini/autofe/timeseries/roll.py +0 -0
  26. {upgini-1.2.96a3906.dev1 → upgini-1.2.96a3906.dev2}/src/upgini/autofe/timeseries/trend.py +0 -0
  27. {upgini-1.2.96a3906.dev1 → upgini-1.2.96a3906.dev2}/src/upgini/autofe/timeseries/volatility.py +0 -0
  28. {upgini-1.2.96a3906.dev1 → upgini-1.2.96a3906.dev2}/src/upgini/autofe/unary.py +0 -0
  29. {upgini-1.2.96a3906.dev1 → upgini-1.2.96a3906.dev2}/src/upgini/autofe/utils.py +0 -0
  30. {upgini-1.2.96a3906.dev1 → upgini-1.2.96a3906.dev2}/src/upgini/autofe/vector.py +0 -0
  31. {upgini-1.2.96a3906.dev1 → upgini-1.2.96a3906.dev2}/src/upgini/data_source/__init__.py +0 -0
  32. {upgini-1.2.96a3906.dev1 → upgini-1.2.96a3906.dev2}/src/upgini/data_source/data_source_publisher.py +0 -0
  33. {upgini-1.2.96a3906.dev1 → upgini-1.2.96a3906.dev2}/src/upgini/dataset.py +0 -0
  34. {upgini-1.2.96a3906.dev1 → upgini-1.2.96a3906.dev2}/src/upgini/errors.py +0 -0
  35. {upgini-1.2.96a3906.dev1 → upgini-1.2.96a3906.dev2}/src/upgini/features_enricher.py +0 -0
  36. {upgini-1.2.96a3906.dev1 → upgini-1.2.96a3906.dev2}/src/upgini/http.py +0 -0
  37. {upgini-1.2.96a3906.dev1 → upgini-1.2.96a3906.dev2}/src/upgini/mdc/__init__.py +0 -0
  38. {upgini-1.2.96a3906.dev1 → upgini-1.2.96a3906.dev2}/src/upgini/mdc/context.py +0 -0
  39. {upgini-1.2.96a3906.dev1 → upgini-1.2.96a3906.dev2}/src/upgini/metadata.py +0 -0
  40. {upgini-1.2.96a3906.dev1 → upgini-1.2.96a3906.dev2}/src/upgini/metrics.py +0 -0
  41. {upgini-1.2.96a3906.dev1 → upgini-1.2.96a3906.dev2}/src/upgini/normalizer/__init__.py +0 -0
  42. {upgini-1.2.96a3906.dev1 → upgini-1.2.96a3906.dev2}/src/upgini/resource_bundle/__init__.py +0 -0
  43. {upgini-1.2.96a3906.dev1 → upgini-1.2.96a3906.dev2}/src/upgini/resource_bundle/exceptions.py +0 -0
  44. {upgini-1.2.96a3906.dev1 → upgini-1.2.96a3906.dev2}/src/upgini/resource_bundle/strings.properties +0 -0
  45. {upgini-1.2.96a3906.dev1 → upgini-1.2.96a3906.dev2}/src/upgini/resource_bundle/strings_widget.properties +0 -0
  46. {upgini-1.2.96a3906.dev1 → upgini-1.2.96a3906.dev2}/src/upgini/sampler/__init__.py +0 -0
  47. {upgini-1.2.96a3906.dev1 → upgini-1.2.96a3906.dev2}/src/upgini/sampler/base.py +0 -0
  48. {upgini-1.2.96a3906.dev1 → upgini-1.2.96a3906.dev2}/src/upgini/sampler/random_under_sampler.py +0 -0
  49. {upgini-1.2.96a3906.dev1 → upgini-1.2.96a3906.dev2}/src/upgini/sampler/utils.py +0 -0
  50. {upgini-1.2.96a3906.dev1 → upgini-1.2.96a3906.dev2}/src/upgini/search_task.py +0 -0
  51. {upgini-1.2.96a3906.dev1 → upgini-1.2.96a3906.dev2}/src/upgini/spinner.py +0 -0
  52. {upgini-1.2.96a3906.dev1 → upgini-1.2.96a3906.dev2}/src/upgini/utils/Roboto-Regular.ttf +0 -0
  53. {upgini-1.2.96a3906.dev1 → upgini-1.2.96a3906.dev2}/src/upgini/utils/__init__.py +0 -0
  54. {upgini-1.2.96a3906.dev1 → upgini-1.2.96a3906.dev2}/src/upgini/utils/base_search_key_detector.py +0 -0
  55. {upgini-1.2.96a3906.dev1 → upgini-1.2.96a3906.dev2}/src/upgini/utils/blocked_time_series.py +0 -0
  56. {upgini-1.2.96a3906.dev1 → upgini-1.2.96a3906.dev2}/src/upgini/utils/country_utils.py +0 -0
  57. {upgini-1.2.96a3906.dev1 → upgini-1.2.96a3906.dev2}/src/upgini/utils/custom_loss_utils.py +0 -0
  58. {upgini-1.2.96a3906.dev1 → upgini-1.2.96a3906.dev2}/src/upgini/utils/cv_utils.py +0 -0
  59. {upgini-1.2.96a3906.dev1 → upgini-1.2.96a3906.dev2}/src/upgini/utils/datetime_utils.py +0 -0
  60. {upgini-1.2.96a3906.dev1 → upgini-1.2.96a3906.dev2}/src/upgini/utils/deduplicate_utils.py +0 -0
  61. {upgini-1.2.96a3906.dev1 → upgini-1.2.96a3906.dev2}/src/upgini/utils/display_utils.py +0 -0
  62. {upgini-1.2.96a3906.dev1 → upgini-1.2.96a3906.dev2}/src/upgini/utils/email_utils.py +0 -0
  63. {upgini-1.2.96a3906.dev1 → upgini-1.2.96a3906.dev2}/src/upgini/utils/fallback_progress_bar.py +0 -0
  64. {upgini-1.2.96a3906.dev1 → upgini-1.2.96a3906.dev2}/src/upgini/utils/feature_info.py +0 -0
  65. {upgini-1.2.96a3906.dev1 → upgini-1.2.96a3906.dev2}/src/upgini/utils/features_validator.py +0 -0
  66. {upgini-1.2.96a3906.dev1 → upgini-1.2.96a3906.dev2}/src/upgini/utils/format.py +0 -0
  67. {upgini-1.2.96a3906.dev1 → upgini-1.2.96a3906.dev2}/src/upgini/utils/ip_utils.py +0 -0
  68. {upgini-1.2.96a3906.dev1 → upgini-1.2.96a3906.dev2}/src/upgini/utils/mstats.py +0 -0
  69. {upgini-1.2.96a3906.dev1 → upgini-1.2.96a3906.dev2}/src/upgini/utils/phone_utils.py +0 -0
  70. {upgini-1.2.96a3906.dev1 → upgini-1.2.96a3906.dev2}/src/upgini/utils/postal_code_utils.py +0 -0
  71. {upgini-1.2.96a3906.dev1 → upgini-1.2.96a3906.dev2}/src/upgini/utils/progress_bar.py +0 -0
  72. {upgini-1.2.96a3906.dev1 → upgini-1.2.96a3906.dev2}/src/upgini/utils/sample_utils.py +0 -0
  73. {upgini-1.2.96a3906.dev1 → upgini-1.2.96a3906.dev2}/src/upgini/utils/sklearn_ext.py +0 -0
  74. {upgini-1.2.96a3906.dev1 → upgini-1.2.96a3906.dev2}/src/upgini/utils/sort.py +0 -0
  75. {upgini-1.2.96a3906.dev1 → upgini-1.2.96a3906.dev2}/src/upgini/utils/target_utils.py +0 -0
  76. {upgini-1.2.96a3906.dev1 → upgini-1.2.96a3906.dev2}/src/upgini/utils/track_info.py +0 -0
  77. {upgini-1.2.96a3906.dev1 → upgini-1.2.96a3906.dev2}/src/upgini/utils/ts_utils.py +0 -0
  78. {upgini-1.2.96a3906.dev1 → upgini-1.2.96a3906.dev2}/src/upgini/utils/warning_counter.py +0 -0
  79. {upgini-1.2.96a3906.dev1 → upgini-1.2.96a3906.dev2}/src/upgini/version_validator.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: upgini
3
- Version: 1.2.96a3906.dev1
3
+ Version: 1.2.96a3906.dev2
4
4
  Summary: Intelligent data search & enrichment for Machine Learning
5
5
  Project-URL: Bug Reports, https://github.com/upgini/upgini/issues
6
6
  Project-URL: Homepage, https://upgini.com/
@@ -0,0 +1 @@
1
+ __version__ = "1.2.96a3906.dev2"
@@ -74,33 +74,19 @@ class Normalizer:
74
74
  new_columns = []
75
75
  dup_counter = 0
76
76
  for column in df.columns:
77
- if (
78
- column
79
- in [
80
- TARGET,
81
- EVAL_SET_INDEX,
82
- SYSTEM_RECORD_ID,
83
- ENTITY_SYSTEM_RECORD_ID,
84
- SEARCH_KEY_UNNEST,
85
- DateTimeSearchKeyConverter.DATETIME_COL,
86
- ]
87
- ):
77
+ if column in [
78
+ TARGET,
79
+ EVAL_SET_INDEX,
80
+ SYSTEM_RECORD_ID,
81
+ ENTITY_SYSTEM_RECORD_ID,
82
+ SEARCH_KEY_UNNEST,
83
+ DateTimeSearchKeyConverter.DATETIME_COL,
84
+ ]:
88
85
  self.columns_renaming[column] = column
89
86
  new_columns.append(column)
90
87
  continue
91
88
 
92
- new_column = str(column)
93
- suffix = hashlib.sha256(new_column.encode()).hexdigest()[:6]
94
- if len(new_column) == 0:
95
- raise ValidationError(self.bundle.get("dataset_empty_column_names"))
96
- # db limit for column length
97
- if len(new_column) > 250:
98
- new_column = new_column[:250]
99
-
100
- # make column name unique relative to server features
101
- new_column = f"{new_column}_{suffix}"
102
-
103
- new_column = new_column.lower()
89
+ new_column = add_hash_suffix(column, self.bundle)
104
90
 
105
91
  # if column starts with non alphabetic symbol then add "a" to the beginning of string
106
92
  if ord(new_column[0]) not in range(ord("a"), ord("z") + 1):
@@ -198,3 +184,19 @@ class Normalizer:
198
184
  if not is_numeric_dtype(df[f]):
199
185
  df[f] = df[f].astype("string")
200
186
  return df
187
+
188
+
189
+ def add_hash_suffix(column: str, bundle: ResourceBundle | None = None) -> str:
190
+ new_column = str(column)
191
+ suffix = hashlib.sha256(new_column.encode()).hexdigest()[:6]
192
+ if bundle is not None and len(new_column) == 0:
193
+ raise ValidationError(bundle.get("dataset_empty_column_names"))
194
+ # db limit for column length
195
+ if len(new_column) > 250:
196
+ new_column = new_column[:250]
197
+
198
+ # make column name unique relative to server features
199
+ new_column = f"{new_column}_{suffix}"
200
+
201
+ new_column = new_column.lower()
202
+ return new_column
@@ -1 +0,0 @@
1
- __version__ = "1.2.96a3906.dev1"