upgini 1.2.33__py3-none-any.whl → 1.2.34a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of upgini might be problematic. Click here for more details.

upgini/__about__.py CHANGED
@@ -1 +1 @@
1
- __version__ = "1.2.33"
1
+ __version__ = "1.2.34a1"
upgini/dataset.py CHANGED
@@ -422,11 +422,11 @@ class Dataset: # (pd.DataFrame):
422
422
  + "".join("<tr>" + "".join(map(map_color, row[1:])) + "</tr>" for row in df_stats.itertuples())
423
423
  + "</table>"
424
424
  )
425
- print()
426
425
  display(HTML(html_stats))
427
- except (ImportError, NameError):
428
426
  print()
427
+ except (ImportError, NameError):
429
428
  print(df_stats)
429
+ print()
430
430
 
431
431
  if len(self.data) == 0:
432
432
  raise ValidationError(self.bundle.get("all_search_keys_invalid"))
@@ -494,11 +494,17 @@ class Dataset: # (pd.DataFrame):
494
494
  taskType=self.task_type,
495
495
  )
496
496
 
497
+ @staticmethod
498
+ def is_column_binary_type(column):
499
+ return column.apply(lambda x: x is None or isinstance(x, (bytes, bytearray))).all()
500
+
497
501
  def __get_data_type(self, pandas_data_type, column_name: str) -> DataType:
498
502
  if is_integer_dtype(pandas_data_type):
499
503
  return DataType.INT
500
504
  elif is_float_dtype(pandas_data_type):
501
505
  return DataType.DECIMAL
506
+ elif self.is_column_binary_type(self.data[column_name]):
507
+ return DataType.BYTES
502
508
  elif is_string_dtype(pandas_data_type) or is_object_dtype(pandas_data_type):
503
509
  return DataType.STRING
504
510
  else:
upgini/metadata.py CHANGED
@@ -43,6 +43,9 @@ class FileColumnMeaningType(Enum):
43
43
  EVAL_SET_INDEX = "EVAL_SET_INDEX"
44
44
  ENTITY_SYSTEM_RECORD_ID = "ENTITY_SYSTEM_RECORD_ID"
45
45
  UNNEST_KEY = "UNNEST_KEY"
46
+ IP_BINARY = "IP_BINARY"
47
+ IP_RANGE_FROM_BINARY = "IP_RANGE_FROM_BINARY"
48
+ IP_RANGE_TO_BINARY = "IP_RANGE_TO_BINARY"
46
49
 
47
50
 
48
51
  class SearchKey(Enum):
@@ -60,6 +63,9 @@ class SearchKey(Enum):
60
63
  IPV6_ADDRESS = FileColumnMeaningType.IPV6_ADDRESS
61
64
  IPV6_RANGE_FROM = FileColumnMeaningType.IPV6_RANGE_FROM
62
65
  IPV6_RANGE_TO = FileColumnMeaningType.IPV6_RANGE_TO
66
+ IP_BINARY = FileColumnMeaningType.IP_BINARY
67
+ IP_RANGE_FROM_BINARY = FileColumnMeaningType.IP_RANGE_FROM_BINARY
68
+ IP_RANGE_TO_BINARY = FileColumnMeaningType.IP_RANGE_TO_BINARY
63
69
 
64
70
  # For data source registration. Don't use it for FeaturesEnricher
65
71
  EMAIL_ONE_DOMAIN = FileColumnMeaningType.EMAIL_ONE_DOMAIN
@@ -112,6 +118,12 @@ class SearchKey(Enum):
112
118
  return SearchKey.MSISDN_RANGE_FROM
113
119
  if meaning_type == FileColumnMeaningType.MSISDN_RANGE_TO:
114
120
  return SearchKey.MSISDN_RANGE_TO
121
+ if meaning_type == FileColumnMeaningType.IP_BINARY:
122
+ return SearchKey.IP_BINARY
123
+ if meaning_type == FileColumnMeaningType.IP_RANGE_FROM_BINARY:
124
+ return SearchKey.IP_RANGE_FROM_BINARY
125
+ if meaning_type == FileColumnMeaningType.IP_RANGE_TO_BINARY:
126
+ return SearchKey.IP_RANGE_TO_BINARY
115
127
 
116
128
  @staticmethod
117
129
  def find_key(search_keys: Dict[str, SearchKey], keys: Union[SearchKey, List[SearchKey]]) -> Optional[SearchKey]:
@@ -136,6 +148,7 @@ class DataType(Enum):
136
148
  DATE_TIME = "DATE_TIME"
137
149
  STRING = "STRING"
138
150
  BOOLEAN = "BOOLEAN"
151
+ BYTES = "BYTES"
139
152
 
140
153
 
141
154
  class ModelTaskType(Enum):
upgini/utils/ip_utils.py CHANGED
@@ -42,6 +42,16 @@ class IpSearchKeyConverter:
42
42
  except Exception:
43
43
  pass
44
44
 
45
+ @staticmethod
46
+ def _ip_to_binary(ip: Optional[_BaseAddress]) -> Optional[bytes]:
47
+ try:
48
+ if isinstance(ip, IPv6Address) and ip.ipv4_mapped is not None:
49
+ return ip.ipv4_mapped.packed
50
+ else:
51
+ return ip.packed
52
+ except Exception:
53
+ pass
54
+
45
55
  @staticmethod
46
56
  def _ip_to_int_str(ip: Optional[_BaseAddress]) -> Optional[str]:
47
57
  try:
@@ -100,11 +110,16 @@ class IpSearchKeyConverter:
100
110
  .astype("string")
101
111
  # .str.replace(".0", "", regex=False)
102
112
  )
113
+ ip_binary = self.ip_column + "_binary"
114
+ df[ip_binary] = df[self.ip_column].apply(self._ip_to_binary)
115
+
103
116
  df = df.drop(columns=self.ip_column)
104
117
  del self.search_keys[self.ip_column]
105
118
  del self.columns_renaming[self.ip_column]
106
119
  self.search_keys[ipv6] = SearchKey.IPV6_ADDRESS
120
+ self.search_keys[ip_binary] = SearchKey.IP_BINARY
107
121
  self.columns_renaming[ipv6] = original_ip # could be __unnest_ip...
122
+ self.columns_renaming[ip_binary] = original_ip
108
123
 
109
124
  return df
110
125
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: upgini
3
- Version: 1.2.33
3
+ Version: 1.2.34a1
4
4
  Summary: Intelligent data search & enrichment for Machine Learning
5
5
  Project-URL: Bug Reports, https://github.com/upgini/upgini/issues
6
6
  Project-URL: Homepage, https://upgini.com/
@@ -1,12 +1,12 @@
1
- upgini/__about__.py,sha256=ozUAhuj1IzPzq1FQeoqbf-7laxntI-m4qA0LSTBVtrw,23
1
+ upgini/__about__.py,sha256=SukQ1Uy5sk-JVIQnHlOf7tk3UiWGJcs49nQJPj7NTPU,25
2
2
  upgini/__init__.py,sha256=LXSfTNU0HnlOkE69VCxkgIKDhWP-JFo_eBQ71OxTr5Y,261
3
3
  upgini/ads.py,sha256=nvuRxRx5MHDMgPr9SiU-fsqRdFaBv8p4_v1oqiysKpc,2714
4
- upgini/dataset.py,sha256=KnkqV7Nnx3kxfQ89giDao3bmCm4MFJWqJUrONy85E-k,32030
4
+ upgini/dataset.py,sha256=rUBE7_G7CLaaHAviFEyVPqjVSsX1DaLmi1dGFQR-eEo,32279
5
5
  upgini/errors.py,sha256=2b_Wbo0OYhLUbrZqdLIx5jBnAsiD1Mcenh-VjR4HCTw,950
6
6
  upgini/features_enricher.py,sha256=q11aMFPlCJy1m4sOFfGZFfb4vdG3-hdd0wgm2BXgs9A,194748
7
7
  upgini/http.py,sha256=plZGTGoi1h2edd8Cnjt4eYB8t4NbBGnZz7DtPTByiNc,42885
8
8
  upgini/lazy_import.py,sha256=74gQ8JuA48BGRLxAo7lNHNKY2D2emMxrUxKGdxVGhuY,1012
9
- upgini/metadata.py,sha256=ACzIQQwCHCFHlUqXqKpxd3IQ4bBAaVvy8UaCGTqLGQs,11278
9
+ upgini/metadata.py,sha256=sB5uU-fdz_dA6g-PO6A8FzwIfDbkcFOewcpNs2xZzoY,11943
10
10
  upgini/metrics.py,sha256=hr7UwLphbZ_FEglLuO2lzr_pFgxOJ4c3WBeg7H-fNqY,35521
11
11
  upgini/search_task.py,sha256=qxUxAD-bed-FpZYmTB_4orW7YJsW_O6a1TcgnZIRFr4,17307
12
12
  upgini/spinner.py,sha256=4iMd-eIe_BnkqFEMIliULTbj6rNI2HkN_VJ4qYe0cUc,1118
@@ -51,7 +51,7 @@ upgini/utils/fallback_progress_bar.py,sha256=PDaKb8dYpVZaWMroNcOHsTc3pSjgi9mOm0-
51
51
  upgini/utils/feature_info.py,sha256=Tp_2g5-rCjY4NpzKhzxwNxuqH5FFL8vG94OU5kH6wzk,6702
52
52
  upgini/utils/features_validator.py,sha256=lEfmk4DoxZ4ooOE1HC0ZXtUb_lFKRFHIrnFULZ4_rL8,3746
53
53
  upgini/utils/format.py,sha256=Yv5cvvSs2bOLUzzNu96Pu33VMDNbabio92QepUj41jU,243
54
- upgini/utils/ip_utils.py,sha256=Q6vb7Sr5Khx3Sq3eENjW2qCXKej_S5jZbneH6zEOkzQ,5171
54
+ upgini/utils/ip_utils.py,sha256=n_ZY2PPVsby6Iq3N_uZsBMWjD2i5cY8WnoEnGcgpYH4,5717
55
55
  upgini/utils/phone_utils.py,sha256=IrbztLuOJBiePqqxllfABWfYlfAjYevPhXKipl95wUI,10432
56
56
  upgini/utils/postal_code_utils.py,sha256=5M0sUqH2DAr33kARWCTXR-ACyzWbjDq_-0mmEml6ZcU,1716
57
57
  upgini/utils/progress_bar.py,sha256=N-Sfdah2Hg8lXP_fV9EfUTXz_PyRt4lo9fAHoUDOoLc,1550
@@ -59,7 +59,7 @@ upgini/utils/sklearn_ext.py,sha256=13jQS_k7v0aUtudXV6nGUEWjttPQzAW9AFYL5wgEz9k,4
59
59
  upgini/utils/target_utils.py,sha256=Ed5IXkPjV9AfAZQAwCYksAmKaPGQliplvDYS_yeWdfk,11330
60
60
  upgini/utils/track_info.py,sha256=G5Lu1xxakg2_TQjKZk4b5SvrHsATTXNVV3NbvWtT8k8,5663
61
61
  upgini/utils/warning_counter.py,sha256=-GRY8EUggEBKODPSuXAkHn9KnEQwAORC0mmz_tim-PM,254
62
- upgini-1.2.33.dist-info/METADATA,sha256=EG9Nr1Z8cls4rBaqrPykCTWZhSSoSxPaICd1EylsiKE,48587
63
- upgini-1.2.33.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
64
- upgini-1.2.33.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
65
- upgini-1.2.33.dist-info/RECORD,,
62
+ upgini-1.2.34a1.dist-info/METADATA,sha256=JXU1l_ufUpd3UDrDM93WUN9zraD-BECLoI7AFczeB2A,48589
63
+ upgini-1.2.34a1.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
64
+ upgini-1.2.34a1.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
65
+ upgini-1.2.34a1.dist-info/RECORD,,