upgini 1.2.51__tar.gz → 1.2.52__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of upgini might be problematic. Click here for more details.
- {upgini-1.2.51 → upgini-1.2.52}/PKG-INFO +1 -1
- upgini-1.2.52/src/upgini/__about__.py +1 -0
- {upgini-1.2.51 → upgini-1.2.52}/src/upgini/features_enricher.py +0 -10
- {upgini-1.2.51 → upgini-1.2.52}/src/upgini/metadata.py +1 -0
- {upgini-1.2.51 → upgini-1.2.52}/src/upgini/utils/ip_utils.py +4 -3
- upgini-1.2.51/src/upgini/__about__.py +0 -1
- {upgini-1.2.51 → upgini-1.2.52}/.gitignore +0 -0
- {upgini-1.2.51 → upgini-1.2.52}/LICENSE +0 -0
- {upgini-1.2.51 → upgini-1.2.52}/README.md +0 -0
- {upgini-1.2.51 → upgini-1.2.52}/pyproject.toml +0 -0
- {upgini-1.2.51 → upgini-1.2.52}/src/upgini/__init__.py +0 -0
- {upgini-1.2.51 → upgini-1.2.52}/src/upgini/ads.py +0 -0
- {upgini-1.2.51 → upgini-1.2.52}/src/upgini/ads_management/__init__.py +0 -0
- {upgini-1.2.51 → upgini-1.2.52}/src/upgini/ads_management/ads_manager.py +0 -0
- {upgini-1.2.51 → upgini-1.2.52}/src/upgini/autofe/__init__.py +0 -0
- {upgini-1.2.51 → upgini-1.2.52}/src/upgini/autofe/all_operands.py +0 -0
- {upgini-1.2.51 → upgini-1.2.52}/src/upgini/autofe/binary.py +0 -0
- {upgini-1.2.51 → upgini-1.2.52}/src/upgini/autofe/date.py +0 -0
- {upgini-1.2.51 → upgini-1.2.52}/src/upgini/autofe/feature.py +0 -0
- {upgini-1.2.51 → upgini-1.2.52}/src/upgini/autofe/groupby.py +0 -0
- {upgini-1.2.51 → upgini-1.2.52}/src/upgini/autofe/operand.py +0 -0
- {upgini-1.2.51 → upgini-1.2.52}/src/upgini/autofe/unary.py +0 -0
- {upgini-1.2.51 → upgini-1.2.52}/src/upgini/autofe/vector.py +0 -0
- {upgini-1.2.51 → upgini-1.2.52}/src/upgini/data_source/__init__.py +0 -0
- {upgini-1.2.51 → upgini-1.2.52}/src/upgini/data_source/data_source_publisher.py +0 -0
- {upgini-1.2.51 → upgini-1.2.52}/src/upgini/dataset.py +0 -0
- {upgini-1.2.51 → upgini-1.2.52}/src/upgini/errors.py +0 -0
- {upgini-1.2.51 → upgini-1.2.52}/src/upgini/http.py +0 -0
- {upgini-1.2.51 → upgini-1.2.52}/src/upgini/lazy_import.py +0 -0
- {upgini-1.2.51 → upgini-1.2.52}/src/upgini/mdc/__init__.py +0 -0
- {upgini-1.2.51 → upgini-1.2.52}/src/upgini/mdc/context.py +0 -0
- {upgini-1.2.51 → upgini-1.2.52}/src/upgini/metrics.py +0 -0
- {upgini-1.2.51 → upgini-1.2.52}/src/upgini/normalizer/__init__.py +0 -0
- {upgini-1.2.51 → upgini-1.2.52}/src/upgini/normalizer/normalize_utils.py +0 -0
- {upgini-1.2.51 → upgini-1.2.52}/src/upgini/resource_bundle/__init__.py +0 -0
- {upgini-1.2.51 → upgini-1.2.52}/src/upgini/resource_bundle/exceptions.py +0 -0
- {upgini-1.2.51 → upgini-1.2.52}/src/upgini/resource_bundle/strings.properties +0 -0
- {upgini-1.2.51 → upgini-1.2.52}/src/upgini/resource_bundle/strings_widget.properties +0 -0
- {upgini-1.2.51 → upgini-1.2.52}/src/upgini/sampler/__init__.py +0 -0
- {upgini-1.2.51 → upgini-1.2.52}/src/upgini/sampler/base.py +0 -0
- {upgini-1.2.51 → upgini-1.2.52}/src/upgini/sampler/random_under_sampler.py +0 -0
- {upgini-1.2.51 → upgini-1.2.52}/src/upgini/sampler/utils.py +0 -0
- {upgini-1.2.51 → upgini-1.2.52}/src/upgini/search_task.py +0 -0
- {upgini-1.2.51 → upgini-1.2.52}/src/upgini/spinner.py +0 -0
- {upgini-1.2.51 → upgini-1.2.52}/src/upgini/utils/Roboto-Regular.ttf +0 -0
- {upgini-1.2.51 → upgini-1.2.52}/src/upgini/utils/__init__.py +0 -0
- {upgini-1.2.51 → upgini-1.2.52}/src/upgini/utils/base_search_key_detector.py +0 -0
- {upgini-1.2.51 → upgini-1.2.52}/src/upgini/utils/blocked_time_series.py +0 -0
- {upgini-1.2.51 → upgini-1.2.52}/src/upgini/utils/country_utils.py +0 -0
- {upgini-1.2.51 → upgini-1.2.52}/src/upgini/utils/custom_loss_utils.py +0 -0
- {upgini-1.2.51 → upgini-1.2.52}/src/upgini/utils/cv_utils.py +0 -0
- {upgini-1.2.51 → upgini-1.2.52}/src/upgini/utils/datetime_utils.py +0 -0
- {upgini-1.2.51 → upgini-1.2.52}/src/upgini/utils/deduplicate_utils.py +0 -0
- {upgini-1.2.51 → upgini-1.2.52}/src/upgini/utils/display_utils.py +0 -0
- {upgini-1.2.51 → upgini-1.2.52}/src/upgini/utils/email_utils.py +0 -0
- {upgini-1.2.51 → upgini-1.2.52}/src/upgini/utils/fallback_progress_bar.py +0 -0
- {upgini-1.2.51 → upgini-1.2.52}/src/upgini/utils/feature_info.py +0 -0
- {upgini-1.2.51 → upgini-1.2.52}/src/upgini/utils/features_validator.py +0 -0
- {upgini-1.2.51 → upgini-1.2.52}/src/upgini/utils/format.py +0 -0
- {upgini-1.2.51 → upgini-1.2.52}/src/upgini/utils/phone_utils.py +0 -0
- {upgini-1.2.51 → upgini-1.2.52}/src/upgini/utils/postal_code_utils.py +0 -0
- {upgini-1.2.51 → upgini-1.2.52}/src/upgini/utils/progress_bar.py +0 -0
- {upgini-1.2.51 → upgini-1.2.52}/src/upgini/utils/sklearn_ext.py +0 -0
- {upgini-1.2.51 → upgini-1.2.52}/src/upgini/utils/target_utils.py +0 -0
- {upgini-1.2.51 → upgini-1.2.52}/src/upgini/utils/track_info.py +0 -0
- {upgini-1.2.51 → upgini-1.2.52}/src/upgini/utils/warning_counter.py +0 -0
- {upgini-1.2.51 → upgini-1.2.52}/src/upgini/version_validator.py +0 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "1.2.52"
|
|
@@ -2270,7 +2270,6 @@ if response.status_code == 200:
|
|
|
2270
2270
|
df = converter.convert(df)
|
|
2271
2271
|
|
|
2272
2272
|
ip_column = self._get_ip_column(search_keys)
|
|
2273
|
-
ip_prefix_column = None
|
|
2274
2273
|
if ip_column:
|
|
2275
2274
|
converter = IpSearchKeyConverter(
|
|
2276
2275
|
ip_column,
|
|
@@ -2281,7 +2280,6 @@ if response.status_code == 200:
|
|
|
2281
2280
|
self.logger,
|
|
2282
2281
|
)
|
|
2283
2282
|
df = converter.convert(df)
|
|
2284
|
-
ip_prefix_column = converter.ip_prefix_column
|
|
2285
2283
|
|
|
2286
2284
|
phone_column = self._get_phone_column(search_keys)
|
|
2287
2285
|
country_column = self._get_country_column(search_keys)
|
|
@@ -2301,15 +2299,12 @@ if response.status_code == 200:
|
|
|
2301
2299
|
# generated_features = [f for f in generated_features if f in self.fit_generated_features]
|
|
2302
2300
|
|
|
2303
2301
|
meaning_types = {col: key.value for col, key in search_keys.items()}
|
|
2304
|
-
if ip_prefix_column:
|
|
2305
|
-
meaning_types[ip_prefix_column] = FileColumnMeaningType.IP_PREFIX
|
|
2306
2302
|
for col in features_for_transform:
|
|
2307
2303
|
meaning_types[col] = FileColumnMeaningType.FEATURE
|
|
2308
2304
|
features_not_to_pass = [
|
|
2309
2305
|
c
|
|
2310
2306
|
for c in df.columns
|
|
2311
2307
|
if c not in search_keys.keys()
|
|
2312
|
-
and c != ip_prefix_column
|
|
2313
2308
|
and c not in features_for_transform
|
|
2314
2309
|
and c not in [ENTITY_SYSTEM_RECORD_ID, SEARCH_KEY_UNNEST]
|
|
2315
2310
|
]
|
|
@@ -2771,7 +2766,6 @@ if response.status_code == 200:
|
|
|
2771
2766
|
df = converter.convert(df)
|
|
2772
2767
|
|
|
2773
2768
|
ip_column = self._get_ip_column(self.fit_search_keys)
|
|
2774
|
-
ip_prefix_column = None
|
|
2775
2769
|
if ip_column:
|
|
2776
2770
|
converter = IpSearchKeyConverter(
|
|
2777
2771
|
ip_column,
|
|
@@ -2782,7 +2776,6 @@ if response.status_code == 200:
|
|
|
2782
2776
|
self.logger,
|
|
2783
2777
|
)
|
|
2784
2778
|
df = converter.convert(df)
|
|
2785
|
-
ip_prefix_column = converter.ip_prefix_column
|
|
2786
2779
|
phone_column = self._get_phone_column(self.fit_search_keys)
|
|
2787
2780
|
country_column = self._get_country_column(self.fit_search_keys)
|
|
2788
2781
|
if phone_column:
|
|
@@ -2803,7 +2796,6 @@ if response.status_code == 200:
|
|
|
2803
2796
|
EVAL_SET_INDEX,
|
|
2804
2797
|
ENTITY_SYSTEM_RECORD_ID,
|
|
2805
2798
|
SEARCH_KEY_UNNEST,
|
|
2806
|
-
ip_prefix_column,
|
|
2807
2799
|
] + list(self.fit_search_keys.keys())
|
|
2808
2800
|
if DateTimeSearchKeyConverter.DATETIME_COL in df.columns:
|
|
2809
2801
|
non_feature_columns.append(DateTimeSearchKeyConverter.DATETIME_COL)
|
|
@@ -2825,8 +2817,6 @@ if response.status_code == 200:
|
|
|
2825
2817
|
**{col: key.value for col, key in self.fit_search_keys.items()},
|
|
2826
2818
|
**{str(c): FileColumnMeaningType.FEATURE for c in df.columns if c not in non_feature_columns},
|
|
2827
2819
|
}
|
|
2828
|
-
if ip_prefix_column:
|
|
2829
|
-
meaning_types[ip_prefix_column] = FileColumnMeaningType.IP_PREFIX
|
|
2830
2820
|
meaning_types[self.TARGET_NAME] = FileColumnMeaningType.TARGET
|
|
2831
2821
|
meaning_types[ENTITY_SYSTEM_RECORD_ID] = FileColumnMeaningType.ENTITY_SYSTEM_RECORD_ID
|
|
2832
2822
|
if SEARCH_KEY_UNNEST in df.columns:
|
|
@@ -67,6 +67,7 @@ class SearchKey(Enum):
|
|
|
67
67
|
IP_BINARY = FileColumnMeaningType.IP_BINARY
|
|
68
68
|
IP_RANGE_FROM_BINARY = FileColumnMeaningType.IP_RANGE_FROM_BINARY
|
|
69
69
|
IP_RANGE_TO_BINARY = FileColumnMeaningType.IP_RANGE_TO_BINARY
|
|
70
|
+
IP_PREFIX = FileColumnMeaningType.IP_PREFIX
|
|
70
71
|
|
|
71
72
|
# For data source registration. Don't use it for FeaturesEnricher
|
|
72
73
|
EMAIL_ONE_DOMAIN = FileColumnMeaningType.EMAIL_ONE_DOMAIN
|
|
@@ -33,7 +33,6 @@ class IpSearchKeyConverter:
|
|
|
33
33
|
else:
|
|
34
34
|
self.logger = logging.getLogger()
|
|
35
35
|
self.logger.setLevel("FATAL")
|
|
36
|
-
self.ip_prefix_column = None
|
|
37
36
|
|
|
38
37
|
@staticmethod
|
|
39
38
|
def _ip_to_int(ip: Optional[_BaseAddress]) -> Optional[int]:
|
|
@@ -131,16 +130,18 @@ class IpSearchKeyConverter:
|
|
|
131
130
|
# )
|
|
132
131
|
ip_binary = self.ip_column + "_binary"
|
|
133
132
|
df[ip_binary] = df[self.ip_column].apply(self._ip_to_binary)
|
|
134
|
-
|
|
135
|
-
df[
|
|
133
|
+
ip_prefix_column = self.ip_column + "_prefix"
|
|
134
|
+
df[ip_prefix_column] = df[self.ip_column].apply(self._ip_to_prefix)
|
|
136
135
|
|
|
137
136
|
df = df.drop(columns=self.ip_column)
|
|
138
137
|
del self.search_keys[self.ip_column]
|
|
139
138
|
del self.columns_renaming[self.ip_column]
|
|
140
139
|
# self.search_keys[ipv6] = SearchKey.IPV6_ADDRESS
|
|
141
140
|
self.search_keys[ip_binary] = SearchKey.IP_BINARY
|
|
141
|
+
self.search_keys[ip_prefix_column] = SearchKey.IP_PREFIX
|
|
142
142
|
# self.columns_renaming[ipv6] = original_ip
|
|
143
143
|
self.columns_renaming[ip_binary] = original_ip
|
|
144
|
+
self.columns_renaming[ip_prefix_column] = original_ip
|
|
144
145
|
|
|
145
146
|
return df
|
|
146
147
|
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
__version__ = "1.2.51"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|