upgini 1.2.50__py3-none-any.whl → 1.2.51__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of upgini might be problematic. Click here for more details.

upgini/__about__.py CHANGED
@@ -1 +1 @@
1
- __version__ = "1.2.50"
1
+ __version__ = "1.2.51"
@@ -2270,6 +2270,7 @@ if response.status_code == 200:
2270
2270
  df = converter.convert(df)
2271
2271
 
2272
2272
  ip_column = self._get_ip_column(search_keys)
2273
+ ip_prefix_column = None
2273
2274
  if ip_column:
2274
2275
  converter = IpSearchKeyConverter(
2275
2276
  ip_column,
@@ -2280,6 +2281,7 @@ if response.status_code == 200:
2280
2281
  self.logger,
2281
2282
  )
2282
2283
  df = converter.convert(df)
2284
+ ip_prefix_column = converter.ip_prefix_column
2283
2285
 
2284
2286
  phone_column = self._get_phone_column(search_keys)
2285
2287
  country_column = self._get_country_column(search_keys)
@@ -2299,12 +2301,15 @@ if response.status_code == 200:
2299
2301
  # generated_features = [f for f in generated_features if f in self.fit_generated_features]
2300
2302
 
2301
2303
  meaning_types = {col: key.value for col, key in search_keys.items()}
2304
+ if ip_prefix_column:
2305
+ meaning_types[ip_prefix_column] = FileColumnMeaningType.IP_PREFIX
2302
2306
  for col in features_for_transform:
2303
2307
  meaning_types[col] = FileColumnMeaningType.FEATURE
2304
2308
  features_not_to_pass = [
2305
2309
  c
2306
2310
  for c in df.columns
2307
2311
  if c not in search_keys.keys()
2312
+ and c != ip_prefix_column
2308
2313
  and c not in features_for_transform
2309
2314
  and c not in [ENTITY_SYSTEM_RECORD_ID, SEARCH_KEY_UNNEST]
2310
2315
  ]
@@ -2766,6 +2771,7 @@ if response.status_code == 200:
2766
2771
  df = converter.convert(df)
2767
2772
 
2768
2773
  ip_column = self._get_ip_column(self.fit_search_keys)
2774
+ ip_prefix_column = None
2769
2775
  if ip_column:
2770
2776
  converter = IpSearchKeyConverter(
2771
2777
  ip_column,
@@ -2776,7 +2782,7 @@ if response.status_code == 200:
2776
2782
  self.logger,
2777
2783
  )
2778
2784
  df = converter.convert(df)
2779
-
2785
+ ip_prefix_column = converter.ip_prefix_column
2780
2786
  phone_column = self._get_phone_column(self.fit_search_keys)
2781
2787
  country_column = self._get_country_column(self.fit_search_keys)
2782
2788
  if phone_column:
@@ -2792,9 +2798,13 @@ if response.status_code == 200:
2792
2798
  converter = PostalCodeSearchKeyConverter(postal_code)
2793
2799
  df = converter.convert(df)
2794
2800
 
2795
- non_feature_columns = [self.TARGET_NAME, EVAL_SET_INDEX, ENTITY_SYSTEM_RECORD_ID, SEARCH_KEY_UNNEST] + list(
2796
- self.fit_search_keys.keys()
2797
- )
2801
+ non_feature_columns = [
2802
+ self.TARGET_NAME,
2803
+ EVAL_SET_INDEX,
2804
+ ENTITY_SYSTEM_RECORD_ID,
2805
+ SEARCH_KEY_UNNEST,
2806
+ ip_prefix_column,
2807
+ ] + list(self.fit_search_keys.keys())
2798
2808
  if DateTimeSearchKeyConverter.DATETIME_COL in df.columns:
2799
2809
  non_feature_columns.append(DateTimeSearchKeyConverter.DATETIME_COL)
2800
2810
 
@@ -2815,6 +2825,8 @@ if response.status_code == 200:
2815
2825
  **{col: key.value for col, key in self.fit_search_keys.items()},
2816
2826
  **{str(c): FileColumnMeaningType.FEATURE for c in df.columns if c not in non_feature_columns},
2817
2827
  }
2828
+ if ip_prefix_column:
2829
+ meaning_types[ip_prefix_column] = FileColumnMeaningType.IP_PREFIX
2818
2830
  meaning_types[self.TARGET_NAME] = FileColumnMeaningType.TARGET
2819
2831
  meaning_types[ENTITY_SYSTEM_RECORD_ID] = FileColumnMeaningType.ENTITY_SYSTEM_RECORD_ID
2820
2832
  if SEARCH_KEY_UNNEST in df.columns:
upgini/metadata.py CHANGED
@@ -44,6 +44,7 @@ class FileColumnMeaningType(Enum):
44
44
  ENTITY_SYSTEM_RECORD_ID = "ENTITY_SYSTEM_RECORD_ID"
45
45
  UNNEST_KEY = "UNNEST_KEY"
46
46
  IP_BINARY = "IP_BINARY"
47
+ IP_PREFIX = "IP_PREFIX"
47
48
  IP_RANGE_FROM_BINARY = "IP_RANGE_FROM_BINARY"
48
49
  IP_RANGE_TO_BINARY = "IP_RANGE_TO_BINARY"
49
50
 
upgini/utils/ip_utils.py CHANGED
@@ -33,9 +33,12 @@ class IpSearchKeyConverter:
33
33
  else:
34
34
  self.logger = logging.getLogger()
35
35
  self.logger.setLevel("FATAL")
36
+ self.ip_prefix_column = None
36
37
 
37
38
  @staticmethod
38
39
  def _ip_to_int(ip: Optional[_BaseAddress]) -> Optional[int]:
40
+ if ip is None:
41
+ return None
39
42
  try:
40
43
  if isinstance(ip, (IPv4Address, IPv6Address)):
41
44
  return int(ip)
@@ -44,6 +47,8 @@ class IpSearchKeyConverter:
44
47
 
45
48
  @staticmethod
46
49
  def _ip_to_binary(ip: Optional[_BaseAddress]) -> Optional[bytes]:
50
+ if ip is None:
51
+ return None
47
52
  try:
48
53
  if isinstance(ip, IPv6Address) and ip.ipv4_mapped is not None:
49
54
  return ip.ipv4_mapped.packed
@@ -52,6 +57,20 @@ class IpSearchKeyConverter:
52
57
  except Exception:
53
58
  pass
54
59
 
60
+ @staticmethod
61
+ def _ip_to_prefix(ip: Optional[_BaseAddress]) -> Optional[str]:
62
+ if ip is None:
63
+ return None
64
+ try:
65
+ if isinstance(ip, IPv6Address):
66
+ if ip.ipv4_mapped is not None:
67
+ return ".".join(ip.ipv4_mapped.exploded.split(".")[:2])
68
+ return ":".join(ip.exploded.split(":")[:2]) # TODO use 3 in future
69
+ else:
70
+ return ".".join(ip.exploded.split(".")[:2])
71
+ except Exception:
72
+ pass
73
+
55
74
  @staticmethod
56
75
  def _ip_to_int_str(ip: Optional[_BaseAddress]) -> Optional[str]:
57
76
  try:
@@ -102,24 +121,26 @@ class IpSearchKeyConverter:
102
121
  # self.search_keys[ipv4] = SearchKey.IP
103
122
  # self.columns_renaming[ipv4] = original_ip
104
123
 
105
- ipv6 = self.ip_column + "_v6"
106
- df[ipv6] = (
107
- df[self.ip_column]
108
- .apply(self._to_ipv6)
109
- .apply(self._ip_to_int_str)
110
- .astype("string")
111
- # .str.replace(".0", "", regex=False)
112
- )
113
- # ip_binary = self.ip_column + "_binary"
114
- # df[ip_binary] = df[self.ip_column].apply(self._ip_to_binary)
124
+ # ipv6 = self.ip_column + "_v6"
125
+ # df[ipv6] = (
126
+ # df[self.ip_column]
127
+ # .apply(self._to_ipv6)
128
+ # .apply(self._ip_to_int_str)
129
+ # .astype("string")
130
+ # # .str.replace(".0", "", regex=False)
131
+ # )
132
+ ip_binary = self.ip_column + "_binary"
133
+ df[ip_binary] = df[self.ip_column].apply(self._ip_to_binary)
134
+ self.ip_prefix_column = self.ip_column + "_prefix"
135
+ df[self.ip_prefix_column] = df[self.ip_column].apply(self._ip_to_prefix)
115
136
 
116
137
  df = df.drop(columns=self.ip_column)
117
138
  del self.search_keys[self.ip_column]
118
139
  del self.columns_renaming[self.ip_column]
119
- self.search_keys[ipv6] = SearchKey.IPV6_ADDRESS
120
- # self.search_keys[ip_binary] = SearchKey.IP_BINARY
121
- self.columns_renaming[ipv6] = original_ip
122
- # self.columns_renaming[ip_binary] = original_ip
140
+ # self.search_keys[ipv6] = SearchKey.IPV6_ADDRESS
141
+ self.search_keys[ip_binary] = SearchKey.IP_BINARY
142
+ # self.columns_renaming[ipv6] = original_ip
143
+ self.columns_renaming[ip_binary] = original_ip
123
144
 
124
145
  return df
125
146
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: upgini
3
- Version: 1.2.50
3
+ Version: 1.2.51
4
4
  Summary: Intelligent data search & enrichment for Machine Learning
5
5
  Project-URL: Bug Reports, https://github.com/upgini/upgini/issues
6
6
  Project-URL: Homepage, https://upgini.com/
@@ -1,12 +1,12 @@
1
- upgini/__about__.py,sha256=Mi5DzFmquYseHnFMuFvsBrEztpwNZnhZs1G4xpE08KQ,23
1
+ upgini/__about__.py,sha256=kgsz9u_lLDc3N0akch6v9PpMXz_PW7_aEHXRb1pWgHg,23
2
2
  upgini/__init__.py,sha256=LXSfTNU0HnlOkE69VCxkgIKDhWP-JFo_eBQ71OxTr5Y,261
3
3
  upgini/ads.py,sha256=nvuRxRx5MHDMgPr9SiU-fsqRdFaBv8p4_v1oqiysKpc,2714
4
4
  upgini/dataset.py,sha256=vT4JyHmafLNbj54SySXr93f5hNS6-t94aFslbBy-7No,33535
5
5
  upgini/errors.py,sha256=2b_Wbo0OYhLUbrZqdLIx5jBnAsiD1Mcenh-VjR4HCTw,950
6
- upgini/features_enricher.py,sha256=O-0ZLFp1SPDNf5Yq-dysH8Jm-1c_LpNv2cIdXZ15nK8,200592
6
+ upgini/features_enricher.py,sha256=80h-1a-UxhuknvuEk1tQk5q5dckqlD_DHzNfufNuaPI,201110
7
7
  upgini/http.py,sha256=danPeX7nTMa_70S-pk-4UUm5yOvXYlR84jgyjoHYBkU,43367
8
8
  upgini/lazy_import.py,sha256=74gQ8JuA48BGRLxAo7lNHNKY2D2emMxrUxKGdxVGhuY,1012
9
- upgini/metadata.py,sha256=-ibqiNjD7dTagqg53FoEJNEqvAYbwgfyn9PGTRQ_YKU,12054
9
+ upgini/metadata.py,sha256=zuLdt5XyO_ZH4VsUNshzRHgv6VfYiXy0M8jeohloFBw,12082
10
10
  upgini/metrics.py,sha256=hr7UwLphbZ_FEglLuO2lzr_pFgxOJ4c3WBeg7H-fNqY,35521
11
11
  upgini/search_task.py,sha256=qxUxAD-bed-FpZYmTB_4orW7YJsW_O6a1TcgnZIRFr4,17307
12
12
  upgini/spinner.py,sha256=4iMd-eIe_BnkqFEMIliULTbj6rNI2HkN_VJ4qYe0cUc,1118
@@ -51,7 +51,7 @@ upgini/utils/fallback_progress_bar.py,sha256=PDaKb8dYpVZaWMroNcOHsTc3pSjgi9mOm0-
51
51
  upgini/utils/feature_info.py,sha256=0rOXSyCj-sw-8migWP0ge8qrOzGU50dQvH0JUJUrDfQ,6766
52
52
  upgini/utils/features_validator.py,sha256=lEfmk4DoxZ4ooOE1HC0ZXtUb_lFKRFHIrnFULZ4_rL8,3746
53
53
  upgini/utils/format.py,sha256=Yv5cvvSs2bOLUzzNu96Pu33VMDNbabio92QepUj41jU,243
54
- upgini/utils/ip_utils.py,sha256=VORRmtKlItcbBVVK5SiwXD7J-6Y5rn7UQ5m6WcBXt7E,5698
54
+ upgini/utils/ip_utils.py,sha256=GZqBaV-nky-_Yb9KclmTrYovCG4kawYcbdjEpw1e5Mo,6500
55
55
  upgini/utils/phone_utils.py,sha256=IrbztLuOJBiePqqxllfABWfYlfAjYevPhXKipl95wUI,10432
56
56
  upgini/utils/postal_code_utils.py,sha256=5M0sUqH2DAr33kARWCTXR-ACyzWbjDq_-0mmEml6ZcU,1716
57
57
  upgini/utils/progress_bar.py,sha256=N-Sfdah2Hg8lXP_fV9EfUTXz_PyRt4lo9fAHoUDOoLc,1550
@@ -59,7 +59,7 @@ upgini/utils/sklearn_ext.py,sha256=13jQS_k7v0aUtudXV6nGUEWjttPQzAW9AFYL5wgEz9k,4
59
59
  upgini/utils/target_utils.py,sha256=RlpKGss9kMibVSlA8iZuO_qxmyeplqzn7X8g6hiGGGs,14341
60
60
  upgini/utils/track_info.py,sha256=G5Lu1xxakg2_TQjKZk4b5SvrHsATTXNVV3NbvWtT8k8,5663
61
61
  upgini/utils/warning_counter.py,sha256=-GRY8EUggEBKODPSuXAkHn9KnEQwAORC0mmz_tim-PM,254
62
- upgini-1.2.50.dist-info/METADATA,sha256=PH8ms19Lbu3cuZxySGo9kcBeMkErCLGL8j8X3t2gxbw,49055
63
- upgini-1.2.50.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
64
- upgini-1.2.50.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
65
- upgini-1.2.50.dist-info/RECORD,,
62
+ upgini-1.2.51.dist-info/METADATA,sha256=WXti81Fx4H5NawX2D7XvQ5cPEUVi4mlMkykbn94gXKI,49055
63
+ upgini-1.2.51.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
64
+ upgini-1.2.51.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
65
+ upgini-1.2.51.dist-info/RECORD,,