upgini 1.2.96a3906.dev1__py3-none-any.whl → 1.2.96a3906.dev2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of upgini might be problematic. Click here for more details.

upgini/__about__.py CHANGED
@@ -1 +1 @@
1
- __version__ = "1.2.96a3906.dev1"
1
+ __version__ = "1.2.96a3906.dev2"
@@ -74,33 +74,19 @@ class Normalizer:
74
74
  new_columns = []
75
75
  dup_counter = 0
76
76
  for column in df.columns:
77
- if (
78
- column
79
- in [
80
- TARGET,
81
- EVAL_SET_INDEX,
82
- SYSTEM_RECORD_ID,
83
- ENTITY_SYSTEM_RECORD_ID,
84
- SEARCH_KEY_UNNEST,
85
- DateTimeSearchKeyConverter.DATETIME_COL,
86
- ]
87
- ):
77
+ if column in [
78
+ TARGET,
79
+ EVAL_SET_INDEX,
80
+ SYSTEM_RECORD_ID,
81
+ ENTITY_SYSTEM_RECORD_ID,
82
+ SEARCH_KEY_UNNEST,
83
+ DateTimeSearchKeyConverter.DATETIME_COL,
84
+ ]:
88
85
  self.columns_renaming[column] = column
89
86
  new_columns.append(column)
90
87
  continue
91
88
 
92
- new_column = str(column)
93
- suffix = hashlib.sha256(new_column.encode()).hexdigest()[:6]
94
- if len(new_column) == 0:
95
- raise ValidationError(self.bundle.get("dataset_empty_column_names"))
96
- # db limit for column length
97
- if len(new_column) > 250:
98
- new_column = new_column[:250]
99
-
100
- # make column name unique relative to server features
101
- new_column = f"{new_column}_{suffix}"
102
-
103
- new_column = new_column.lower()
89
+ new_column = add_hash_suffix(column, self.bundle)
104
90
 
105
91
  # if column starts with non alphabetic symbol then add "a" to the beginning of string
106
92
  if ord(new_column[0]) not in range(ord("a"), ord("z") + 1):
@@ -198,3 +184,19 @@ class Normalizer:
198
184
  if not is_numeric_dtype(df[f]):
199
185
  df[f] = df[f].astype("string")
200
186
  return df
187
+
188
+
189
+ def add_hash_suffix(column: str, bundle: ResourceBundle | None = None) -> str:
190
+ new_column = str(column)
191
+ suffix = hashlib.sha256(new_column.encode()).hexdigest()[:6]
192
+ if bundle is not None and len(new_column) == 0:
193
+ raise ValidationError(bundle.get("dataset_empty_column_names"))
194
+ # db limit for column length
195
+ if len(new_column) > 250:
196
+ new_column = new_column[:250]
197
+
198
+ # make column name unique relative to server features
199
+ new_column = f"{new_column}_{suffix}"
200
+
201
+ new_column = new_column.lower()
202
+ return new_column
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: upgini
3
- Version: 1.2.96a3906.dev1
3
+ Version: 1.2.96a3906.dev2
4
4
  Summary: Intelligent data search & enrichment for Machine Learning
5
5
  Project-URL: Bug Reports, https://github.com/upgini/upgini/issues
6
6
  Project-URL: Homepage, https://upgini.com/
@@ -1,4 +1,4 @@
1
- upgini/__about__.py,sha256=PUiYzCofvZt-NqYbWaNaLcmWYA89yEpQxOLTh8v2lac,33
1
+ upgini/__about__.py,sha256=8ZaMc0M4yKUigIQciHTdkff0EFfiqt8pmRDvJz70MsQ,33
2
2
  upgini/__init__.py,sha256=LXSfTNU0HnlOkE69VCxkgIKDhWP-JFo_eBQ71OxTr5Y,261
3
3
  upgini/ads.py,sha256=nvuRxRx5MHDMgPr9SiU-fsqRdFaBv8p4_v1oqiysKpc,2714
4
4
  upgini/dataset.py,sha256=e6JDYTZ2AwC5aF-dqclKZKkiKrHo2f6cFmMQO2ZZmjM,32724
@@ -35,7 +35,7 @@ upgini/data_source/data_source_publisher.py,sha256=ufL8qK1vg8iUKd5bLWz6hEMGiC3Je
35
35
  upgini/mdc/__init__.py,sha256=iHJlXQg6xRM1-ZOUtaPSJqw5SpQDszvxp4LyqviNLIQ,1027
36
36
  upgini/mdc/context.py,sha256=3u1B-jXt7tXEvNcV3qmR9SDCseudnY7KYsLclBdwVLk,1405
37
37
  upgini/normalizer/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
38
- upgini/normalizer/normalize_utils.py,sha256=g2TcDXZeJp9kAFO2sTqZ4CAsN4J1qHNgoJHZ8gtzUWo,7376
38
+ upgini/normalizer/normalize_utils.py,sha256=hMHi5u6Oleqp885UW0Q0Uf1F8fRdZ5jJ7NYoY52SqaI,7403
39
39
  upgini/resource_bundle/__init__.py,sha256=S5F2G47pnJd2LDpmFsjDqEwiKkP8Hm-hcseDbMka6Ko,8345
40
40
  upgini/resource_bundle/exceptions.py,sha256=5fRvx0_vWdE1-7HcSgF0tckB4A9AKyf5RiinZkInTsI,621
41
41
  upgini/resource_bundle/strings.properties,sha256=Hfpr2-I5Ws6ugIN1QSz549OHayZeLYglRsbrGDT6g9g,28491
@@ -71,7 +71,7 @@ upgini/utils/target_utils.py,sha256=i3Xt5l9ybB2_nF_ma5cfPuL3OeFTs2dY2xDI0p4Azpg,
71
71
  upgini/utils/track_info.py,sha256=G5Lu1xxakg2_TQjKZk4b5SvrHsATTXNVV3NbvWtT8k8,5663
72
72
  upgini/utils/ts_utils.py,sha256=26vhC0pN7vLXK6R09EEkMK3Lwb9IVPH7LRdqFIQ3kPs,1383
73
73
  upgini/utils/warning_counter.py,sha256=-GRY8EUggEBKODPSuXAkHn9KnEQwAORC0mmz_tim-PM,254
74
- upgini-1.2.96a3906.dev1.dist-info/METADATA,sha256=tsYVKpMvgRQC8vNFCNlPhKvjxD8mysNzj7E4BWM18Gc,49538
75
- upgini-1.2.96a3906.dev1.dist-info/WHEEL,sha256=zEMcRr9Kr03x1ozGwg5v9NQBKn3kndp6LSoSlVg-jhU,87
76
- upgini-1.2.96a3906.dev1.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
77
- upgini-1.2.96a3906.dev1.dist-info/RECORD,,
74
+ upgini-1.2.96a3906.dev2.dist-info/METADATA,sha256=IiF040yVE4K20_1kus6Y4tzj6rDeA2X8kYkhJ0_Kxr8,49538
75
+ upgini-1.2.96a3906.dev2.dist-info/WHEEL,sha256=zEMcRr9Kr03x1ozGwg5v9NQBKn3kndp6LSoSlVg-jhU,87
76
+ upgini-1.2.96a3906.dev2.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
77
+ upgini-1.2.96a3906.dev2.dist-info/RECORD,,