upgini 1.2.50__tar.gz → 1.2.52__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of upgini might be problematic. Click here for more details.
- {upgini-1.2.50 → upgini-1.2.52}/PKG-INFO +1 -1
- upgini-1.2.52/src/upgini/__about__.py +1 -0
- {upgini-1.2.50 → upgini-1.2.52}/src/upgini/features_enricher.py +6 -4
- {upgini-1.2.50 → upgini-1.2.52}/src/upgini/metadata.py +2 -0
- {upgini-1.2.50 → upgini-1.2.52}/src/upgini/utils/ip_utils.py +36 -14
- upgini-1.2.50/src/upgini/__about__.py +0 -1
- {upgini-1.2.50 → upgini-1.2.52}/.gitignore +0 -0
- {upgini-1.2.50 → upgini-1.2.52}/LICENSE +0 -0
- {upgini-1.2.50 → upgini-1.2.52}/README.md +0 -0
- {upgini-1.2.50 → upgini-1.2.52}/pyproject.toml +0 -0
- {upgini-1.2.50 → upgini-1.2.52}/src/upgini/__init__.py +0 -0
- {upgini-1.2.50 → upgini-1.2.52}/src/upgini/ads.py +0 -0
- {upgini-1.2.50 → upgini-1.2.52}/src/upgini/ads_management/__init__.py +0 -0
- {upgini-1.2.50 → upgini-1.2.52}/src/upgini/ads_management/ads_manager.py +0 -0
- {upgini-1.2.50 → upgini-1.2.52}/src/upgini/autofe/__init__.py +0 -0
- {upgini-1.2.50 → upgini-1.2.52}/src/upgini/autofe/all_operands.py +0 -0
- {upgini-1.2.50 → upgini-1.2.52}/src/upgini/autofe/binary.py +0 -0
- {upgini-1.2.50 → upgini-1.2.52}/src/upgini/autofe/date.py +0 -0
- {upgini-1.2.50 → upgini-1.2.52}/src/upgini/autofe/feature.py +0 -0
- {upgini-1.2.50 → upgini-1.2.52}/src/upgini/autofe/groupby.py +0 -0
- {upgini-1.2.50 → upgini-1.2.52}/src/upgini/autofe/operand.py +0 -0
- {upgini-1.2.50 → upgini-1.2.52}/src/upgini/autofe/unary.py +0 -0
- {upgini-1.2.50 → upgini-1.2.52}/src/upgini/autofe/vector.py +0 -0
- {upgini-1.2.50 → upgini-1.2.52}/src/upgini/data_source/__init__.py +0 -0
- {upgini-1.2.50 → upgini-1.2.52}/src/upgini/data_source/data_source_publisher.py +0 -0
- {upgini-1.2.50 → upgini-1.2.52}/src/upgini/dataset.py +0 -0
- {upgini-1.2.50 → upgini-1.2.52}/src/upgini/errors.py +0 -0
- {upgini-1.2.50 → upgini-1.2.52}/src/upgini/http.py +0 -0
- {upgini-1.2.50 → upgini-1.2.52}/src/upgini/lazy_import.py +0 -0
- {upgini-1.2.50 → upgini-1.2.52}/src/upgini/mdc/__init__.py +0 -0
- {upgini-1.2.50 → upgini-1.2.52}/src/upgini/mdc/context.py +0 -0
- {upgini-1.2.50 → upgini-1.2.52}/src/upgini/metrics.py +0 -0
- {upgini-1.2.50 → upgini-1.2.52}/src/upgini/normalizer/__init__.py +0 -0
- {upgini-1.2.50 → upgini-1.2.52}/src/upgini/normalizer/normalize_utils.py +0 -0
- {upgini-1.2.50 → upgini-1.2.52}/src/upgini/resource_bundle/__init__.py +0 -0
- {upgini-1.2.50 → upgini-1.2.52}/src/upgini/resource_bundle/exceptions.py +0 -0
- {upgini-1.2.50 → upgini-1.2.52}/src/upgini/resource_bundle/strings.properties +0 -0
- {upgini-1.2.50 → upgini-1.2.52}/src/upgini/resource_bundle/strings_widget.properties +0 -0
- {upgini-1.2.50 → upgini-1.2.52}/src/upgini/sampler/__init__.py +0 -0
- {upgini-1.2.50 → upgini-1.2.52}/src/upgini/sampler/base.py +0 -0
- {upgini-1.2.50 → upgini-1.2.52}/src/upgini/sampler/random_under_sampler.py +0 -0
- {upgini-1.2.50 → upgini-1.2.52}/src/upgini/sampler/utils.py +0 -0
- {upgini-1.2.50 → upgini-1.2.52}/src/upgini/search_task.py +0 -0
- {upgini-1.2.50 → upgini-1.2.52}/src/upgini/spinner.py +0 -0
- {upgini-1.2.50 → upgini-1.2.52}/src/upgini/utils/Roboto-Regular.ttf +0 -0
- {upgini-1.2.50 → upgini-1.2.52}/src/upgini/utils/__init__.py +0 -0
- {upgini-1.2.50 → upgini-1.2.52}/src/upgini/utils/base_search_key_detector.py +0 -0
- {upgini-1.2.50 → upgini-1.2.52}/src/upgini/utils/blocked_time_series.py +0 -0
- {upgini-1.2.50 → upgini-1.2.52}/src/upgini/utils/country_utils.py +0 -0
- {upgini-1.2.50 → upgini-1.2.52}/src/upgini/utils/custom_loss_utils.py +0 -0
- {upgini-1.2.50 → upgini-1.2.52}/src/upgini/utils/cv_utils.py +0 -0
- {upgini-1.2.50 → upgini-1.2.52}/src/upgini/utils/datetime_utils.py +0 -0
- {upgini-1.2.50 → upgini-1.2.52}/src/upgini/utils/deduplicate_utils.py +0 -0
- {upgini-1.2.50 → upgini-1.2.52}/src/upgini/utils/display_utils.py +0 -0
- {upgini-1.2.50 → upgini-1.2.52}/src/upgini/utils/email_utils.py +0 -0
- {upgini-1.2.50 → upgini-1.2.52}/src/upgini/utils/fallback_progress_bar.py +0 -0
- {upgini-1.2.50 → upgini-1.2.52}/src/upgini/utils/feature_info.py +0 -0
- {upgini-1.2.50 → upgini-1.2.52}/src/upgini/utils/features_validator.py +0 -0
- {upgini-1.2.50 → upgini-1.2.52}/src/upgini/utils/format.py +0 -0
- {upgini-1.2.50 → upgini-1.2.52}/src/upgini/utils/phone_utils.py +0 -0
- {upgini-1.2.50 → upgini-1.2.52}/src/upgini/utils/postal_code_utils.py +0 -0
- {upgini-1.2.50 → upgini-1.2.52}/src/upgini/utils/progress_bar.py +0 -0
- {upgini-1.2.50 → upgini-1.2.52}/src/upgini/utils/sklearn_ext.py +0 -0
- {upgini-1.2.50 → upgini-1.2.52}/src/upgini/utils/target_utils.py +0 -0
- {upgini-1.2.50 → upgini-1.2.52}/src/upgini/utils/track_info.py +0 -0
- {upgini-1.2.50 → upgini-1.2.52}/src/upgini/utils/warning_counter.py +0 -0
- {upgini-1.2.50 → upgini-1.2.52}/src/upgini/version_validator.py +0 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "1.2.52"
|
|
@@ -2776,7 +2776,6 @@ if response.status_code == 200:
|
|
|
2776
2776
|
self.logger,
|
|
2777
2777
|
)
|
|
2778
2778
|
df = converter.convert(df)
|
|
2779
|
-
|
|
2780
2779
|
phone_column = self._get_phone_column(self.fit_search_keys)
|
|
2781
2780
|
country_column = self._get_country_column(self.fit_search_keys)
|
|
2782
2781
|
if phone_column:
|
|
@@ -2792,9 +2791,12 @@ if response.status_code == 200:
|
|
|
2792
2791
|
converter = PostalCodeSearchKeyConverter(postal_code)
|
|
2793
2792
|
df = converter.convert(df)
|
|
2794
2793
|
|
|
2795
|
-
non_feature_columns = [
|
|
2796
|
-
self.
|
|
2797
|
-
|
|
2794
|
+
non_feature_columns = [
|
|
2795
|
+
self.TARGET_NAME,
|
|
2796
|
+
EVAL_SET_INDEX,
|
|
2797
|
+
ENTITY_SYSTEM_RECORD_ID,
|
|
2798
|
+
SEARCH_KEY_UNNEST,
|
|
2799
|
+
] + list(self.fit_search_keys.keys())
|
|
2798
2800
|
if DateTimeSearchKeyConverter.DATETIME_COL in df.columns:
|
|
2799
2801
|
non_feature_columns.append(DateTimeSearchKeyConverter.DATETIME_COL)
|
|
2800
2802
|
|
|
@@ -44,6 +44,7 @@ class FileColumnMeaningType(Enum):
|
|
|
44
44
|
ENTITY_SYSTEM_RECORD_ID = "ENTITY_SYSTEM_RECORD_ID"
|
|
45
45
|
UNNEST_KEY = "UNNEST_KEY"
|
|
46
46
|
IP_BINARY = "IP_BINARY"
|
|
47
|
+
IP_PREFIX = "IP_PREFIX"
|
|
47
48
|
IP_RANGE_FROM_BINARY = "IP_RANGE_FROM_BINARY"
|
|
48
49
|
IP_RANGE_TO_BINARY = "IP_RANGE_TO_BINARY"
|
|
49
50
|
|
|
@@ -66,6 +67,7 @@ class SearchKey(Enum):
|
|
|
66
67
|
IP_BINARY = FileColumnMeaningType.IP_BINARY
|
|
67
68
|
IP_RANGE_FROM_BINARY = FileColumnMeaningType.IP_RANGE_FROM_BINARY
|
|
68
69
|
IP_RANGE_TO_BINARY = FileColumnMeaningType.IP_RANGE_TO_BINARY
|
|
70
|
+
IP_PREFIX = FileColumnMeaningType.IP_PREFIX
|
|
69
71
|
|
|
70
72
|
# For data source registration. Don't use it for FeaturesEnricher
|
|
71
73
|
EMAIL_ONE_DOMAIN = FileColumnMeaningType.EMAIL_ONE_DOMAIN
|
|
@@ -36,6 +36,8 @@ class IpSearchKeyConverter:
|
|
|
36
36
|
|
|
37
37
|
@staticmethod
|
|
38
38
|
def _ip_to_int(ip: Optional[_BaseAddress]) -> Optional[int]:
|
|
39
|
+
if ip is None:
|
|
40
|
+
return None
|
|
39
41
|
try:
|
|
40
42
|
if isinstance(ip, (IPv4Address, IPv6Address)):
|
|
41
43
|
return int(ip)
|
|
@@ -44,6 +46,8 @@ class IpSearchKeyConverter:
|
|
|
44
46
|
|
|
45
47
|
@staticmethod
|
|
46
48
|
def _ip_to_binary(ip: Optional[_BaseAddress]) -> Optional[bytes]:
|
|
49
|
+
if ip is None:
|
|
50
|
+
return None
|
|
47
51
|
try:
|
|
48
52
|
if isinstance(ip, IPv6Address) and ip.ipv4_mapped is not None:
|
|
49
53
|
return ip.ipv4_mapped.packed
|
|
@@ -52,6 +56,20 @@ class IpSearchKeyConverter:
|
|
|
52
56
|
except Exception:
|
|
53
57
|
pass
|
|
54
58
|
|
|
59
|
+
@staticmethod
|
|
60
|
+
def _ip_to_prefix(ip: Optional[_BaseAddress]) -> Optional[str]:
|
|
61
|
+
if ip is None:
|
|
62
|
+
return None
|
|
63
|
+
try:
|
|
64
|
+
if isinstance(ip, IPv6Address):
|
|
65
|
+
if ip.ipv4_mapped is not None:
|
|
66
|
+
return ".".join(ip.ipv4_mapped.exploded.split(".")[:2])
|
|
67
|
+
return ":".join(ip.exploded.split(":")[:2]) # TODO use 3 in future
|
|
68
|
+
else:
|
|
69
|
+
return ".".join(ip.exploded.split(".")[:2])
|
|
70
|
+
except Exception:
|
|
71
|
+
pass
|
|
72
|
+
|
|
55
73
|
@staticmethod
|
|
56
74
|
def _ip_to_int_str(ip: Optional[_BaseAddress]) -> Optional[str]:
|
|
57
75
|
try:
|
|
@@ -102,24 +120,28 @@ class IpSearchKeyConverter:
|
|
|
102
120
|
# self.search_keys[ipv4] = SearchKey.IP
|
|
103
121
|
# self.columns_renaming[ipv4] = original_ip
|
|
104
122
|
|
|
105
|
-
ipv6 = self.ip_column + "_v6"
|
|
106
|
-
df[ipv6] = (
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
)
|
|
113
|
-
|
|
114
|
-
|
|
123
|
+
# ipv6 = self.ip_column + "_v6"
|
|
124
|
+
# df[ipv6] = (
|
|
125
|
+
# df[self.ip_column]
|
|
126
|
+
# .apply(self._to_ipv6)
|
|
127
|
+
# .apply(self._ip_to_int_str)
|
|
128
|
+
# .astype("string")
|
|
129
|
+
# # .str.replace(".0", "", regex=False)
|
|
130
|
+
# )
|
|
131
|
+
ip_binary = self.ip_column + "_binary"
|
|
132
|
+
df[ip_binary] = df[self.ip_column].apply(self._ip_to_binary)
|
|
133
|
+
ip_prefix_column = self.ip_column + "_prefix"
|
|
134
|
+
df[ip_prefix_column] = df[self.ip_column].apply(self._ip_to_prefix)
|
|
115
135
|
|
|
116
136
|
df = df.drop(columns=self.ip_column)
|
|
117
137
|
del self.search_keys[self.ip_column]
|
|
118
138
|
del self.columns_renaming[self.ip_column]
|
|
119
|
-
self.search_keys[ipv6] = SearchKey.IPV6_ADDRESS
|
|
120
|
-
|
|
121
|
-
self.
|
|
122
|
-
# self.columns_renaming[
|
|
139
|
+
# self.search_keys[ipv6] = SearchKey.IPV6_ADDRESS
|
|
140
|
+
self.search_keys[ip_binary] = SearchKey.IP_BINARY
|
|
141
|
+
self.search_keys[ip_prefix_column] = SearchKey.IP_PREFIX
|
|
142
|
+
# self.columns_renaming[ipv6] = original_ip
|
|
143
|
+
self.columns_renaming[ip_binary] = original_ip
|
|
144
|
+
self.columns_renaming[ip_prefix_column] = original_ip
|
|
123
145
|
|
|
124
146
|
return df
|
|
125
147
|
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
__version__ = "1.2.50"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|