upgini 1.2.50__tar.gz → 1.2.52a1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of upgini might be problematic. Click here for more details.

Files changed (67) hide show
  1. {upgini-1.2.50 → upgini-1.2.52a1}/PKG-INFO +1 -1
  2. upgini-1.2.52a1/src/upgini/__about__.py +1 -0
  3. {upgini-1.2.50 → upgini-1.2.52a1}/src/upgini/features_enricher.py +6 -4
  4. {upgini-1.2.50 → upgini-1.2.52a1}/src/upgini/metadata.py +1 -0
  5. {upgini-1.2.50 → upgini-1.2.52a1}/src/upgini/utils/ip_utils.py +36 -14
  6. upgini-1.2.50/src/upgini/__about__.py +0 -1
  7. {upgini-1.2.50 → upgini-1.2.52a1}/.gitignore +0 -0
  8. {upgini-1.2.50 → upgini-1.2.52a1}/LICENSE +0 -0
  9. {upgini-1.2.50 → upgini-1.2.52a1}/README.md +0 -0
  10. {upgini-1.2.50 → upgini-1.2.52a1}/pyproject.toml +0 -0
  11. {upgini-1.2.50 → upgini-1.2.52a1}/src/upgini/__init__.py +0 -0
  12. {upgini-1.2.50 → upgini-1.2.52a1}/src/upgini/ads.py +0 -0
  13. {upgini-1.2.50 → upgini-1.2.52a1}/src/upgini/ads_management/__init__.py +0 -0
  14. {upgini-1.2.50 → upgini-1.2.52a1}/src/upgini/ads_management/ads_manager.py +0 -0
  15. {upgini-1.2.50 → upgini-1.2.52a1}/src/upgini/autofe/__init__.py +0 -0
  16. {upgini-1.2.50 → upgini-1.2.52a1}/src/upgini/autofe/all_operands.py +0 -0
  17. {upgini-1.2.50 → upgini-1.2.52a1}/src/upgini/autofe/binary.py +0 -0
  18. {upgini-1.2.50 → upgini-1.2.52a1}/src/upgini/autofe/date.py +0 -0
  19. {upgini-1.2.50 → upgini-1.2.52a1}/src/upgini/autofe/feature.py +0 -0
  20. {upgini-1.2.50 → upgini-1.2.52a1}/src/upgini/autofe/groupby.py +0 -0
  21. {upgini-1.2.50 → upgini-1.2.52a1}/src/upgini/autofe/operand.py +0 -0
  22. {upgini-1.2.50 → upgini-1.2.52a1}/src/upgini/autofe/unary.py +0 -0
  23. {upgini-1.2.50 → upgini-1.2.52a1}/src/upgini/autofe/vector.py +0 -0
  24. {upgini-1.2.50 → upgini-1.2.52a1}/src/upgini/data_source/__init__.py +0 -0
  25. {upgini-1.2.50 → upgini-1.2.52a1}/src/upgini/data_source/data_source_publisher.py +0 -0
  26. {upgini-1.2.50 → upgini-1.2.52a1}/src/upgini/dataset.py +0 -0
  27. {upgini-1.2.50 → upgini-1.2.52a1}/src/upgini/errors.py +0 -0
  28. {upgini-1.2.50 → upgini-1.2.52a1}/src/upgini/http.py +0 -0
  29. {upgini-1.2.50 → upgini-1.2.52a1}/src/upgini/lazy_import.py +0 -0
  30. {upgini-1.2.50 → upgini-1.2.52a1}/src/upgini/mdc/__init__.py +0 -0
  31. {upgini-1.2.50 → upgini-1.2.52a1}/src/upgini/mdc/context.py +0 -0
  32. {upgini-1.2.50 → upgini-1.2.52a1}/src/upgini/metrics.py +0 -0
  33. {upgini-1.2.50 → upgini-1.2.52a1}/src/upgini/normalizer/__init__.py +0 -0
  34. {upgini-1.2.50 → upgini-1.2.52a1}/src/upgini/normalizer/normalize_utils.py +0 -0
  35. {upgini-1.2.50 → upgini-1.2.52a1}/src/upgini/resource_bundle/__init__.py +0 -0
  36. {upgini-1.2.50 → upgini-1.2.52a1}/src/upgini/resource_bundle/exceptions.py +0 -0
  37. {upgini-1.2.50 → upgini-1.2.52a1}/src/upgini/resource_bundle/strings.properties +0 -0
  38. {upgini-1.2.50 → upgini-1.2.52a1}/src/upgini/resource_bundle/strings_widget.properties +0 -0
  39. {upgini-1.2.50 → upgini-1.2.52a1}/src/upgini/sampler/__init__.py +0 -0
  40. {upgini-1.2.50 → upgini-1.2.52a1}/src/upgini/sampler/base.py +0 -0
  41. {upgini-1.2.50 → upgini-1.2.52a1}/src/upgini/sampler/random_under_sampler.py +0 -0
  42. {upgini-1.2.50 → upgini-1.2.52a1}/src/upgini/sampler/utils.py +0 -0
  43. {upgini-1.2.50 → upgini-1.2.52a1}/src/upgini/search_task.py +0 -0
  44. {upgini-1.2.50 → upgini-1.2.52a1}/src/upgini/spinner.py +0 -0
  45. {upgini-1.2.50 → upgini-1.2.52a1}/src/upgini/utils/Roboto-Regular.ttf +0 -0
  46. {upgini-1.2.50 → upgini-1.2.52a1}/src/upgini/utils/__init__.py +0 -0
  47. {upgini-1.2.50 → upgini-1.2.52a1}/src/upgini/utils/base_search_key_detector.py +0 -0
  48. {upgini-1.2.50 → upgini-1.2.52a1}/src/upgini/utils/blocked_time_series.py +0 -0
  49. {upgini-1.2.50 → upgini-1.2.52a1}/src/upgini/utils/country_utils.py +0 -0
  50. {upgini-1.2.50 → upgini-1.2.52a1}/src/upgini/utils/custom_loss_utils.py +0 -0
  51. {upgini-1.2.50 → upgini-1.2.52a1}/src/upgini/utils/cv_utils.py +0 -0
  52. {upgini-1.2.50 → upgini-1.2.52a1}/src/upgini/utils/datetime_utils.py +0 -0
  53. {upgini-1.2.50 → upgini-1.2.52a1}/src/upgini/utils/deduplicate_utils.py +0 -0
  54. {upgini-1.2.50 → upgini-1.2.52a1}/src/upgini/utils/display_utils.py +0 -0
  55. {upgini-1.2.50 → upgini-1.2.52a1}/src/upgini/utils/email_utils.py +0 -0
  56. {upgini-1.2.50 → upgini-1.2.52a1}/src/upgini/utils/fallback_progress_bar.py +0 -0
  57. {upgini-1.2.50 → upgini-1.2.52a1}/src/upgini/utils/feature_info.py +0 -0
  58. {upgini-1.2.50 → upgini-1.2.52a1}/src/upgini/utils/features_validator.py +0 -0
  59. {upgini-1.2.50 → upgini-1.2.52a1}/src/upgini/utils/format.py +0 -0
  60. {upgini-1.2.50 → upgini-1.2.52a1}/src/upgini/utils/phone_utils.py +0 -0
  61. {upgini-1.2.50 → upgini-1.2.52a1}/src/upgini/utils/postal_code_utils.py +0 -0
  62. {upgini-1.2.50 → upgini-1.2.52a1}/src/upgini/utils/progress_bar.py +0 -0
  63. {upgini-1.2.50 → upgini-1.2.52a1}/src/upgini/utils/sklearn_ext.py +0 -0
  64. {upgini-1.2.50 → upgini-1.2.52a1}/src/upgini/utils/target_utils.py +0 -0
  65. {upgini-1.2.50 → upgini-1.2.52a1}/src/upgini/utils/track_info.py +0 -0
  66. {upgini-1.2.50 → upgini-1.2.52a1}/src/upgini/utils/warning_counter.py +0 -0
  67. {upgini-1.2.50 → upgini-1.2.52a1}/src/upgini/version_validator.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: upgini
3
- Version: 1.2.50
3
+ Version: 1.2.52a1
4
4
  Summary: Intelligent data search & enrichment for Machine Learning
5
5
  Project-URL: Bug Reports, https://github.com/upgini/upgini/issues
6
6
  Project-URL: Homepage, https://upgini.com/
@@ -0,0 +1 @@
1
+ __version__ = "1.2.52a1"
@@ -2776,7 +2776,6 @@ if response.status_code == 200:
2776
2776
  self.logger,
2777
2777
  )
2778
2778
  df = converter.convert(df)
2779
-
2780
2779
  phone_column = self._get_phone_column(self.fit_search_keys)
2781
2780
  country_column = self._get_country_column(self.fit_search_keys)
2782
2781
  if phone_column:
@@ -2792,9 +2791,12 @@ if response.status_code == 200:
2792
2791
  converter = PostalCodeSearchKeyConverter(postal_code)
2793
2792
  df = converter.convert(df)
2794
2793
 
2795
- non_feature_columns = [self.TARGET_NAME, EVAL_SET_INDEX, ENTITY_SYSTEM_RECORD_ID, SEARCH_KEY_UNNEST] + list(
2796
- self.fit_search_keys.keys()
2797
- )
2794
+ non_feature_columns = [
2795
+ self.TARGET_NAME,
2796
+ EVAL_SET_INDEX,
2797
+ ENTITY_SYSTEM_RECORD_ID,
2798
+ SEARCH_KEY_UNNEST,
2799
+ ] + list(self.fit_search_keys.keys())
2798
2800
  if DateTimeSearchKeyConverter.DATETIME_COL in df.columns:
2799
2801
  non_feature_columns.append(DateTimeSearchKeyConverter.DATETIME_COL)
2800
2802
 
@@ -44,6 +44,7 @@ class FileColumnMeaningType(Enum):
44
44
  ENTITY_SYSTEM_RECORD_ID = "ENTITY_SYSTEM_RECORD_ID"
45
45
  UNNEST_KEY = "UNNEST_KEY"
46
46
  IP_BINARY = "IP_BINARY"
47
+ IP_PREFIX = "IP_PREFIX"
47
48
  IP_RANGE_FROM_BINARY = "IP_RANGE_FROM_BINARY"
48
49
  IP_RANGE_TO_BINARY = "IP_RANGE_TO_BINARY"
49
50
 
@@ -36,6 +36,8 @@ class IpSearchKeyConverter:
36
36
 
37
37
  @staticmethod
38
38
  def _ip_to_int(ip: Optional[_BaseAddress]) -> Optional[int]:
39
+ if ip is None:
40
+ return None
39
41
  try:
40
42
  if isinstance(ip, (IPv4Address, IPv6Address)):
41
43
  return int(ip)
@@ -44,6 +46,8 @@ class IpSearchKeyConverter:
44
46
 
45
47
  @staticmethod
46
48
  def _ip_to_binary(ip: Optional[_BaseAddress]) -> Optional[bytes]:
49
+ if ip is None:
50
+ return None
47
51
  try:
48
52
  if isinstance(ip, IPv6Address) and ip.ipv4_mapped is not None:
49
53
  return ip.ipv4_mapped.packed
@@ -52,6 +56,20 @@ class IpSearchKeyConverter:
52
56
  except Exception:
53
57
  pass
54
58
 
59
+ @staticmethod
60
+ def _ip_to_prefix(ip: Optional[_BaseAddress]) -> Optional[str]:
61
+ if ip is None:
62
+ return None
63
+ try:
64
+ if isinstance(ip, IPv6Address):
65
+ if ip.ipv4_mapped is not None:
66
+ return ".".join(ip.ipv4_mapped.exploded.split(".")[:2])
67
+ return ":".join(ip.exploded.split(":")[:2]) # TODO use 3 in future
68
+ else:
69
+ return ".".join(ip.exploded.split(".")[:2])
70
+ except Exception:
71
+ pass
72
+
55
73
  @staticmethod
56
74
  def _ip_to_int_str(ip: Optional[_BaseAddress]) -> Optional[str]:
57
75
  try:
@@ -102,24 +120,28 @@ class IpSearchKeyConverter:
102
120
  # self.search_keys[ipv4] = SearchKey.IP
103
121
  # self.columns_renaming[ipv4] = original_ip
104
122
 
105
- ipv6 = self.ip_column + "_v6"
106
- df[ipv6] = (
107
- df[self.ip_column]
108
- .apply(self._to_ipv6)
109
- .apply(self._ip_to_int_str)
110
- .astype("string")
111
- # .str.replace(".0", "", regex=False)
112
- )
113
- # ip_binary = self.ip_column + "_binary"
114
- # df[ip_binary] = df[self.ip_column].apply(self._ip_to_binary)
123
+ # ipv6 = self.ip_column + "_v6"
124
+ # df[ipv6] = (
125
+ # df[self.ip_column]
126
+ # .apply(self._to_ipv6)
127
+ # .apply(self._ip_to_int_str)
128
+ # .astype("string")
129
+ # # .str.replace(".0", "", regex=False)
130
+ # )
131
+ ip_binary = self.ip_column + "_binary"
132
+ df[ip_binary] = df[self.ip_column].apply(self._ip_to_binary)
133
+ ip_prefix_column = self.ip_column + "_prefix"
134
+ df[ip_prefix_column] = df[self.ip_column].apply(self._ip_to_prefix)
115
135
 
116
136
  df = df.drop(columns=self.ip_column)
117
137
  del self.search_keys[self.ip_column]
118
138
  del self.columns_renaming[self.ip_column]
119
- self.search_keys[ipv6] = SearchKey.IPV6_ADDRESS
120
- # self.search_keys[ip_binary] = SearchKey.IP_BINARY
121
- self.columns_renaming[ipv6] = original_ip
122
- # self.columns_renaming[ip_binary] = original_ip
139
+ # self.search_keys[ipv6] = SearchKey.IPV6_ADDRESS
140
+ self.search_keys[ip_binary] = SearchKey.IP_BINARY
141
+ self.search_keys[ip_prefix_column] = SearchKey.IP_PREFIX
142
+ # self.columns_renaming[ipv6] = original_ip
143
+ self.columns_renaming[ip_binary] = original_ip
144
+ self.columns_renaming[ip_prefix_column] = original_ip
123
145
 
124
146
  return df
125
147
 
@@ -1 +0,0 @@
1
- __version__ = "1.2.50"
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes