upgini 1.2.96a3906.dev2__py3-none-any.whl → 1.2.98a3922.dev1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of upgini might be problematic. Click here for more details.

upgini/__about__.py CHANGED
@@ -1 +1 @@
1
- __version__ = "1.2.96a3906.dev2"
1
+ __version__ = "1.2.98a3922.dev1"
upgini/autofe/vector.py CHANGED
@@ -47,3 +47,19 @@ class OnnxModel(PandasOperator, metaclass=OperatorRegistry):
47
47
  }
48
48
  )
49
49
  return res
50
+
51
+
52
+ class CatboostModel(PandasOperator, metaclass=OperatorRegistry):
53
+ name: str = "catboost"
54
+ is_vector: bool = True
55
+ output_type: Optional[str] = "float"
56
+ model_name: str = ""
57
+
58
+ def get_params(self) -> Dict[str, Optional[str]]:
59
+ res = super().get_params()
60
+ res.update(
61
+ {
62
+ "model_name": self.model_name,
63
+ }
64
+ )
65
+ return res
@@ -149,9 +149,8 @@ class DataSourcePublisher:
149
149
  existing_secondary_keys = {item for sublist in row["secondarySearchKeys"] for item in sublist}
150
150
  if existing_secondary_keys == {v.value.name for v in secondary_search_keys.values()}:
151
151
  existing_search_keys = {item for sublist in row["searchKeys"] for item in sublist}
152
- if (
153
- existing_search_keys == {v.value.name for v in search_keys.values()}
154
- or ("IP" in str(existing_search_keys) and "IP" in str(search_keys.values()))
152
+ if existing_search_keys == {v.value.name for v in search_keys.values()} or (
153
+ "IP" in str(existing_search_keys) and "IP" in str(search_keys.values())
155
154
  ):
156
155
  raise ValidationError(
157
156
  "ADS with the same PRIMARY_KEYS -> SECONDARY_KEYS mapping "
@@ -494,3 +493,30 @@ class DataSourcePublisher:
494
493
  raise Exception("Failed to reannounce all ADS: " + status_response["errorMessage"])
495
494
  except Exception:
496
495
  self.logger.exception("Failed to reannounce all ADS-es")
496
+
497
+ def upload_autofe_model(
498
+ self,
499
+ file_path: str,
500
+ name: str,
501
+ input_names: List[str],
502
+ search_id: str,
503
+ model_type: Optional[Literal["ONNX", "CATBOOST"]] = None,
504
+ description: str = "",
505
+ ):
506
+ if model_type is not None and model_type not in ["ONNX", "CATBOOST"]:
507
+ raise ValueError(f"Invalid model type: {model_type}. Available values: ONNX")
508
+ metadata = {
509
+ "modelName": name,
510
+ "inputNames": input_names,
511
+ "searchTaskId": search_id,
512
+ "modelType": model_type or "ONNX",
513
+ "description": description,
514
+ }
515
+
516
+ trace_id = str(uuid.uuid4())
517
+ with MDC(trace_id=trace_id):
518
+ try:
519
+ self._rest_client.upload_autofe_model(file_path, metadata, trace_id)
520
+ except Exception:
521
+ self.logger.exception("Failed to upload autofe model")
522
+ raise
@@ -71,10 +71,7 @@ from upgini.search_task import SearchTask
71
71
  from upgini.spinner import Spinner
72
72
  from upgini.utils import combine_search_keys, find_numbers_with_decimal_comma
73
73
  from upgini.utils.blocked_time_series import BlockedTimeSeriesSplit
74
- from upgini.utils.country_utils import (
75
- CountrySearchKeyConverter,
76
- CountrySearchKeyDetector,
77
- )
74
+ from upgini.utils.country_utils import CountrySearchKeyDetector
78
75
  from upgini.utils.custom_loss_utils import (
79
76
  get_additional_params_custom_loss,
80
77
  get_runtime_params_custom_loss,
@@ -105,11 +102,8 @@ from upgini.utils.feature_info import FeatureInfo, _round_shap_value
105
102
  from upgini.utils.features_validator import FeaturesValidator
106
103
  from upgini.utils.format import Format
107
104
  from upgini.utils.ip_utils import IpSearchKeyConverter
108
- from upgini.utils.phone_utils import PhoneSearchKeyConverter, PhoneSearchKeyDetector
109
- from upgini.utils.postal_code_utils import (
110
- PostalCodeSearchKeyConverter,
111
- PostalCodeSearchKeyDetector,
112
- )
105
+ from upgini.utils.phone_utils import PhoneSearchKeyDetector
106
+ from upgini.utils.postal_code_utils import PostalCodeSearchKeyDetector
113
107
 
114
108
  try:
115
109
  from upgini.utils.progress_bar import CustomProgressBar as ProgressBar
@@ -1122,6 +1116,7 @@ class FeaturesEnricher(TransformerMixin):
1122
1116
  # and calculate final metric (and uplift)
1123
1117
  enriched_metric = None
1124
1118
  uplift = None
1119
+ uplift_perc = None
1125
1120
  enriched_estimator = None
1126
1121
  if set(fitting_X.columns) != set(fitting_enriched_X.columns):
1127
1122
  self.logger.info(
@@ -1153,6 +1148,7 @@ class FeaturesEnricher(TransformerMixin):
1153
1148
  self.logger.info(f"Enriched {metric} on train combined features: {enriched_metric}")
1154
1149
  if baseline_metric is not None and enriched_metric is not None:
1155
1150
  uplift = (enriched_cv_result.metric - baseline_cv_result.metric) * multiplier
1151
+ uplift_perc = uplift / abs(baseline_cv_result.metric) * 100
1156
1152
 
1157
1153
  train_metrics = {
1158
1154
  self.bundle.get("quality_metrics_segment_header"): self.bundle.get(
@@ -1179,7 +1175,10 @@ class FeaturesEnricher(TransformerMixin):
1179
1175
  enriched_metric
1180
1176
  )
1181
1177
  if uplift is not None:
1182
- train_metrics[self.bundle.get("quality_metrics_uplift_header")] = uplift
1178
+ train_metrics[self.bundle.get("quality_metrics_uplift_header")] = round(uplift, 3)
1179
+ train_metrics[self.bundle.get("quality_metrics_uplift_perc_header")] = (
1180
+ f"{round(uplift_perc, 1)}%"
1181
+ )
1183
1182
  metrics = [train_metrics]
1184
1183
 
1185
1184
  # 3 If eval_set is presented - fit final model on train enriched data and score each
@@ -1228,8 +1227,10 @@ class FeaturesEnricher(TransformerMixin):
1228
1227
 
1229
1228
  if etalon_eval_metric is not None and enriched_eval_metric is not None:
1230
1229
  eval_uplift = (enriched_eval_results.metric - etalon_eval_results.metric) * multiplier
1230
+ eval_uplift_perc = eval_uplift / abs(etalon_eval_results.metric) * 100
1231
1231
  else:
1232
1232
  eval_uplift = None
1233
+ eval_uplift_perc = None
1233
1234
 
1234
1235
  eval_metrics = {
1235
1236
  self.bundle.get("quality_metrics_segment_header"): self.bundle.get(
@@ -1260,7 +1261,10 @@ class FeaturesEnricher(TransformerMixin):
1260
1261
  enriched_eval_metric
1261
1262
  )
1262
1263
  if eval_uplift is not None:
1263
- eval_metrics[self.bundle.get("quality_metrics_uplift_header")] = eval_uplift
1264
+ eval_metrics[self.bundle.get("quality_metrics_uplift_header")] = round(eval_uplift, 3)
1265
+ eval_metrics[self.bundle.get("quality_metrics_uplift_perc_header")] = (
1266
+ f"{round(eval_uplift_perc, 1)}%"
1267
+ )
1264
1268
 
1265
1269
  metrics.append(eval_metrics)
1266
1270
 
@@ -2495,21 +2499,6 @@ if response.status_code == 200:
2495
2499
  )
2496
2500
  df = converter.convert(df)
2497
2501
 
2498
- phone_column = self._get_phone_column(search_keys)
2499
- country_column = self._get_country_column(search_keys)
2500
- if phone_column:
2501
- converter = PhoneSearchKeyConverter(phone_column, country_column)
2502
- df = converter.convert(df)
2503
-
2504
- if country_column:
2505
- converter = CountrySearchKeyConverter(country_column)
2506
- df = converter.convert(df)
2507
-
2508
- postal_code = self._get_postal_column(search_keys)
2509
- if postal_code:
2510
- converter = PostalCodeSearchKeyConverter(postal_code)
2511
- df = converter.convert(df)
2512
-
2513
2502
  meaning_types = {}
2514
2503
  meaning_types.update({col: FileColumnMeaningType.FEATURE for col in features_for_transform})
2515
2504
  meaning_types.update({col: key.value for col, key in search_keys.items()})
@@ -2904,6 +2893,7 @@ if response.status_code == 200:
2904
2893
  self.fit_generated_features.extend(converter.generated_features)
2905
2894
  else:
2906
2895
  self.logger.info("Input dataset hasn't date column")
2896
+ # TODO remove when this logic will be implemented on the back
2907
2897
  if self.__should_add_date_column():
2908
2898
  df = self._add_current_date_as_key(df, self.fit_search_keys, self.logger, self.bundle)
2909
2899
 
@@ -2935,6 +2925,26 @@ if response.status_code == 200:
2935
2925
  if normalizer.removed_features:
2936
2926
  self.__log_warning(self.bundle.get("dataset_date_features").format(normalizer.removed_features))
2937
2927
 
2928
+ non_feature_columns = [
2929
+ self.TARGET_NAME,
2930
+ EVAL_SET_INDEX,
2931
+ ] + list(self.fit_search_keys.keys())
2932
+ if DateTimeSearchKeyConverter.DATETIME_COL in df.columns:
2933
+ non_feature_columns.append(DateTimeSearchKeyConverter.DATETIME_COL)
2934
+
2935
+ features_columns = [c for c in df.columns if c not in non_feature_columns]
2936
+
2937
+ features_to_drop, feature_validator_warnings = FeaturesValidator(self.logger).validate(
2938
+ df, features_columns, self.generate_features, self.fit_columns_renaming
2939
+ )
2940
+ if feature_validator_warnings:
2941
+ for warning in feature_validator_warnings:
2942
+ self.__log_warning(warning)
2943
+ self.fit_dropped_features.update(features_to_drop)
2944
+ df = df.drop(columns=features_to_drop)
2945
+
2946
+ self.fit_generated_features = [f for f in self.fit_generated_features if f not in self.fit_dropped_features]
2947
+
2938
2948
  self.__adjust_cv(df)
2939
2949
 
2940
2950
  if self.id_columns is not None and self.cv is not None and self.cv.is_time_series():
@@ -2974,6 +2984,7 @@ if response.status_code == 200:
2974
2984
  # Convert EMAIL to HEM etc after unnesting to do it only with one column
2975
2985
  df = self.__convert_unnestable_keys(df, unnest_search_keys)
2976
2986
 
2987
+ # refresh features columns
2977
2988
  non_feature_columns = [
2978
2989
  self.TARGET_NAME,
2979
2990
  EVAL_SET_INDEX,
@@ -2985,17 +2996,6 @@ if response.status_code == 200:
2985
2996
 
2986
2997
  features_columns = [c for c in df.columns if c not in non_feature_columns]
2987
2998
 
2988
- features_to_drop, feature_validator_warnings = FeaturesValidator(self.logger).validate(
2989
- df, features_columns, self.generate_features, self.fit_columns_renaming
2990
- )
2991
- if feature_validator_warnings:
2992
- for warning in feature_validator_warnings:
2993
- self.__log_warning(warning)
2994
- self.fit_dropped_features.update(features_to_drop)
2995
- df = df.drop(columns=features_to_drop)
2996
-
2997
- self.fit_generated_features = [f for f in self.fit_generated_features if f not in self.fit_dropped_features]
2998
-
2999
2999
  meaning_types = {
3000
3000
  **{col: key.value for col, key in self.fit_search_keys.items()},
3001
3001
  **{str(c): FileColumnMeaningType.FEATURE for c in df.columns if c not in non_feature_columns},
@@ -3225,20 +3225,6 @@ if response.status_code == 200:
3225
3225
  self.logger,
3226
3226
  )
3227
3227
  df = converter.convert(df)
3228
- phone_column = self._get_phone_column(self.fit_search_keys)
3229
- country_column = self._get_country_column(self.fit_search_keys)
3230
- if phone_column:
3231
- converter = PhoneSearchKeyConverter(phone_column, country_column)
3232
- df = converter.convert(df)
3233
-
3234
- if country_column:
3235
- converter = CountrySearchKeyConverter(country_column)
3236
- df = converter.convert(df)
3237
-
3238
- postal_code = self._get_postal_column(self.fit_search_keys)
3239
- if postal_code:
3240
- converter = PostalCodeSearchKeyConverter(postal_code)
3241
- df = converter.convert(df)
3242
3228
 
3243
3229
  return df
3244
3230
 
@@ -4642,42 +4628,59 @@ if response.status_code == 200:
4642
4628
  if isinstance(X_, pd.Series):
4643
4629
  X_ = X_.to_frame()
4644
4630
 
4645
- # TODO check that this file was already uploaded
4646
-
4647
4631
  with tempfile.TemporaryDirectory() as tmp_dir:
4648
4632
  X_.to_parquet(f"{tmp_dir}/x.parquet", compression="zstd")
4633
+ x_digest_sha256 = self.rest_client.compute_file_digest(f"{tmp_dir}/x.parquet")
4634
+ if self.rest_client.is_file_uploaded(trace_id, x_digest_sha256):
4635
+ self.logger.info(f"File x.parquet was already uploaded with digest {x_digest_sha256}, skipping")
4636
+ else:
4637
+ self.rest_client.dump_input_file(trace_id, f"{tmp_dir}/x.parquet", "x.parquet")
4649
4638
 
4650
4639
  if y_ is not None:
4651
4640
  if isinstance(y_, pd.Series):
4652
4641
  y_ = y_.to_frame()
4653
4642
  y_.to_parquet(f"{tmp_dir}/y.parquet", compression="zstd")
4654
- if eval_set_ and _num_samples(eval_set_[0][0]) > 0:
4655
- eval_x_ = eval_set_[0][0]
4656
- eval_y_ = eval_set_[0][1]
4657
- if isinstance(eval_x_, pd.Series):
4658
- eval_x_ = eval_x_.to_frame()
4659
- eval_x_.to_parquet(f"{tmp_dir}/eval_x.parquet", compression="zstd")
4660
- if isinstance(eval_y_, pd.Series):
4661
- eval_y_ = eval_y_.to_frame()
4662
- eval_y_.to_parquet(f"{tmp_dir}/eval_y.parquet", compression="zstd")
4663
- self.rest_client.dump_input_files(
4664
- trace_id,
4665
- f"{tmp_dir}/x.parquet",
4666
- f"{tmp_dir}/y.parquet",
4667
- f"{tmp_dir}/eval_x.parquet",
4668
- f"{tmp_dir}/eval_y.parquet",
4643
+ y_digest_sha256 = self.rest_client.compute_file_digest(f"{tmp_dir}/y.parquet")
4644
+ if self.rest_client.is_file_uploaded(trace_id, y_digest_sha256):
4645
+ self.logger.info(
4646
+ f"File y.parquet was already uploaded with digest {y_digest_sha256}, skipping"
4669
4647
  )
4670
4648
  else:
4671
- self.rest_client.dump_input_files(
4672
- trace_id,
4673
- f"{tmp_dir}/x.parquet",
4674
- f"{tmp_dir}/y.parquet",
4675
- )
4676
- else:
4677
- self.rest_client.dump_input_files(
4678
- trace_id,
4679
- f"{tmp_dir}/x.parquet",
4680
- )
4649
+ self.rest_client.dump_input_file(trace_id, f"{tmp_dir}/y.parquet", "y.parquet")
4650
+
4651
+ if eval_set_ is not None and len(eval_set_) > 0:
4652
+ for idx, (eval_x_, eval_y_) in enumerate(eval_set_):
4653
+ if isinstance(eval_x_, pd.Series):
4654
+ eval_x_ = eval_x_.to_frame()
4655
+ eval_x_.to_parquet(f"{tmp_dir}/eval_x_{idx}.parquet", compression="zstd")
4656
+ eval_x_digest_sha256 = self.rest_client.compute_file_digest(
4657
+ f"{tmp_dir}/eval_x_{idx}.parquet"
4658
+ )
4659
+ if self.rest_client.is_file_uploaded(trace_id, eval_x_digest_sha256):
4660
+ self.logger.info(
4661
+ f"File eval_x_{idx}.parquet was already uploaded with"
4662
+ f" digest {eval_x_digest_sha256}, skipping"
4663
+ )
4664
+ else:
4665
+ self.rest_client.dump_input_file(
4666
+ trace_id, f"{tmp_dir}/eval_x_{idx}.parquet", f"eval_x_{idx}.parquet"
4667
+ )
4668
+
4669
+ if isinstance(eval_y_, pd.Series):
4670
+ eval_y_ = eval_y_.to_frame()
4671
+ eval_y_.to_parquet(f"{tmp_dir}/eval_y_{idx}.parquet", compression="zstd")
4672
+ eval_y_digest_sha256 = self.rest_client.compute_file_digest(
4673
+ f"{tmp_dir}/eval_y_{idx}.parquet"
4674
+ )
4675
+ if self.rest_client.is_file_uploaded(trace_id, eval_y_digest_sha256):
4676
+ self.logger.info(
4677
+ f"File eval_y_{idx}.parquet was already uploaded"
4678
+ f" with digest {eval_y_digest_sha256}, skipping"
4679
+ )
4680
+ else:
4681
+ self.rest_client.dump_input_file(
4682
+ trace_id, f"{tmp_dir}/eval_y_{idx}.parquet", f"eval_y_{idx}.parquet"
4683
+ )
4681
4684
  except Exception:
4682
4685
  self.logger.warning("Failed to dump input files", exc_info=True)
4683
4686
 
upgini/http.py CHANGED
@@ -12,6 +12,7 @@ from enum import Enum
12
12
  from functools import lru_cache
13
13
  from http.client import HTTPConnection
14
14
  from json import dumps
15
+ from pathlib import Path
15
16
  from typing import Any, Dict, List, Optional, Tuple
16
17
  from urllib.parse import urljoin
17
18
 
@@ -292,6 +293,7 @@ class _RestClient:
292
293
  UPLOAD_ONLINE_URI = "private/api/v2/ads/upload-online"
293
294
  STOP_ADS_MANAGEMENT_TASK_URI_FMT = "private/api/v2/ads/management-task/{0}/stop"
294
295
  UNION_SEARCH_TASKS_URI_FMT = SERVICE_ROOT_V2 + "search/merge"
296
+ UPLOAD_AUTOFE_MODEL_URI_FMT = "private/api/v2/autofe/model/upload"
295
297
 
296
298
  ACCESS_TOKEN_HEADER_NAME = "Authorization"
297
299
  CONTENT_TYPE_HEADER_NAME = "Content-Type"
@@ -404,6 +406,16 @@ class _RestClient:
404
406
  meaning_types = [_RestClient.meaning_type_by_name(name, metadata) for name in search_key_names]
405
407
  return [meaning_type.value for meaning_type in meaning_types if meaning_type is not None]
406
408
 
409
+ def dump_input_file(self, trace_id: str, path: str, file_name: str):
410
+ api_path = self.SEARCH_DUMP_INPUT_FILE_FMT
411
+ with open(path, "rb") as file:
412
+ files = {"file": (file_name, file, "application/octet-stream")}
413
+ self._with_unauth_retry(
414
+ lambda: self._send_post_file_req_v2(
415
+ api_path, files, trace_id=trace_id, need_json_response=False
416
+ )
417
+ )
418
+
407
419
  def dump_input_files(
408
420
  self,
409
421
  trace_id: str,
@@ -811,6 +823,17 @@ class _RestClient:
811
823
  api_path = self.UNION_SEARCH_TASKS_URI_FMT
812
824
  return self._with_unauth_retry(lambda: self._send_post_req(api_path, trace_id, request, result_format=None))
813
825
 
826
+ def upload_autofe_model(self, file_path: str, metadata: dict, trace_id: str):
827
+ api_path = self.UPLOAD_AUTOFE_MODEL_URI_FMT
828
+ with open(file_path, "rb") as file:
829
+ files = {
830
+ "meta": ("metadata.json", dumps(metadata).encode(), "application/json"),
831
+ "model": (Path(file_path).name, file, "application/octet-stream"),
832
+ }
833
+ return self._with_unauth_retry(
834
+ lambda: self._send_post_file_req_v2(api_path, files, trace_id=trace_id, need_json_response=False)
835
+ )
836
+
814
837
  # ---
815
838
 
816
839
  def _send_get_req(self, api_path: str, trace_id: Optional[str], additional_headers: Optional[dict] = None):
@@ -24,8 +24,11 @@ from upgini.metadata import (
24
24
  )
25
25
  from upgini.resource_bundle import ResourceBundle, get_custom_bundle
26
26
  from upgini.utils import find_numbers_with_decimal_comma
27
+ from upgini.utils.country_utils import CountrySearchKeyConverter
27
28
  from upgini.utils.datetime_utils import DateTimeSearchKeyConverter
29
+ from upgini.utils.ip_utils import IpSearchKeyConverter
28
30
  from upgini.utils.phone_utils import PhoneSearchKeyConverter
31
+ from upgini.utils.postal_code_utils import PostalCodeSearchKeyConverter
29
32
 
30
33
 
31
34
  class Normalizer:
@@ -65,6 +68,12 @@ class Normalizer:
65
68
 
66
69
  df = self._convert_phone_numbers(df)
67
70
 
71
+ df = self._convert_ip_addresses(df)
72
+
73
+ df = self._convert_postal_codes(df)
74
+
75
+ df = self._convert_countries(df)
76
+
68
77
  df = self.__convert_features_types(df)
69
78
 
70
79
  return df, self.search_keys, self.generated_features
@@ -177,6 +186,22 @@ class Normalizer:
177
186
  df = converter.convert(df)
178
187
  return df
179
188
 
189
+ def _convert_ip_addresses(self, df: pd.DataFrame) -> pd.DataFrame:
190
+ for ip_col in SearchKey.find_all_keys(self.search_keys, SearchKey.IP):
191
+ df[ip_col] = df[ip_col].apply(IpSearchKeyConverter.safe_ip_parse)
192
+ return df
193
+
194
+ def _convert_postal_codes(self, df: pd.DataFrame) -> pd.DataFrame:
195
+ for postal_code_col in SearchKey.find_all_keys(self.search_keys, SearchKey.POSTAL_CODE):
196
+ df = PostalCodeSearchKeyConverter(postal_code_col).convert(df)
197
+ return df
198
+
199
+ def _convert_countries(self, df: pd.DataFrame) -> pd.DataFrame:
200
+ maybe_country_col = SearchKey.find_key(self.search_keys, SearchKey.COUNTRY)
201
+ if maybe_country_col:
202
+ df = CountrySearchKeyConverter(maybe_country_col).convert(df)
203
+ return df
204
+
180
205
  def __convert_features_types(self, df: pd.DataFrame):
181
206
  # self.logger.info("Convert features to supported data types")
182
207
 
@@ -284,8 +284,8 @@ quality_metrics_segment_header=Dataset type
284
284
  quality_metrics_match_rate_header=Match rate
285
285
  quality_metrics_baseline_header=Baseline {}
286
286
  quality_metrics_enriched_header=Enriched {}
287
- quality_metrics_uplift_header=Uplift
288
- quality_metrics_uplift_prc_header=Uplift, %
287
+ quality_metrics_uplift_header=Uplift, abs
288
+ quality_metrics_uplift_perc_header=Uplift, %
289
289
 
290
290
  # Legacy native api messages
291
291
  dataset_dataframe_or_path_empty=Either `df` or `path` must be provided
upgini/utils/ip_utils.py CHANGED
@@ -79,7 +79,7 @@ class IpSearchKeyConverter:
79
79
  pass
80
80
 
81
81
  @staticmethod
82
- def _safe_ip_parse(ip: Union[str, int, IPv4Address, IPv6Address]) -> Optional[_BaseAddress]:
82
+ def safe_ip_parse(ip: Union[str, int, IPv4Address, IPv6Address, bytes]) -> Optional[_BaseAddress]:
83
83
  try:
84
84
  return ip_address(ip)
85
85
  except ValueError:
@@ -110,7 +110,7 @@ class IpSearchKeyConverter:
110
110
  self.logger.info("Convert ip address to int")
111
111
  original_ip = self.columns_renaming[self.ip_column]
112
112
 
113
- df[self.ip_column] = df[self.ip_column].apply(self._safe_ip_parse)
113
+ df[self.ip_column] = df[self.ip_column].apply(self.safe_ip_parse)
114
114
  if df[self.ip_column].isnull().all():
115
115
  raise ValidationError(self.bundle.get("invalid_ip").format(self.ip_column))
116
116
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: upgini
3
- Version: 1.2.96a3906.dev2
3
+ Version: 1.2.98a3922.dev1
4
4
  Summary: Intelligent data search & enrichment for Machine Learning
5
5
  Project-URL: Bug Reports, https://github.com/upgini/upgini/issues
6
6
  Project-URL: Homepage, https://upgini.com/
@@ -1,10 +1,10 @@
1
- upgini/__about__.py,sha256=8ZaMc0M4yKUigIQciHTdkff0EFfiqt8pmRDvJz70MsQ,33
1
+ upgini/__about__.py,sha256=Xd-ctCimaoTAKxmWfidUhJ3JvDrk9S-Dlh78Hfit0mw,33
2
2
  upgini/__init__.py,sha256=LXSfTNU0HnlOkE69VCxkgIKDhWP-JFo_eBQ71OxTr5Y,261
3
3
  upgini/ads.py,sha256=nvuRxRx5MHDMgPr9SiU-fsqRdFaBv8p4_v1oqiysKpc,2714
4
4
  upgini/dataset.py,sha256=e6JDYTZ2AwC5aF-dqclKZKkiKrHo2f6cFmMQO2ZZmjM,32724
5
5
  upgini/errors.py,sha256=2b_Wbo0OYhLUbrZqdLIx5jBnAsiD1Mcenh-VjR4HCTw,950
6
- upgini/features_enricher.py,sha256=DFBA-3_yZSDcvJnfZjPCvNFFSC8OZwDl992-dlathm0,218432
7
- upgini/http.py,sha256=4i7fQwrwU3WzDUOWzrgR-4C8eJwj_5dBwRAR-UjUtlc,44345
6
+ upgini/features_enricher.py,sha256=4rKoV-3jM876Fk0fM4XlnW3fLwXvk1KN2ymcwlAfPm0,219941
7
+ upgini/http.py,sha256=DNcoS7qdxG0mOJn6I8r6O5I6XdIJTdzDzW3hkz3NgG4,45443
8
8
  upgini/metadata.py,sha256=vsbbHyPCP3Rs8WkeDgQg99uAA_zmsbDStAT-NwDYhO4,12455
9
9
  upgini/metrics.py,sha256=UbKEsHB7XDzoyGNqDx846zbh1t65GpqdnnhViccdoKU,45615
10
10
  upgini/search_task.py,sha256=Q5HjBpLIB3OCxAD1zNv5yQ3ZNJx696WCK_-H35_y7Rs,17912
@@ -21,7 +21,7 @@ upgini/autofe/groupby.py,sha256=IYmQV9uoCdRcpkeWZj_kI3ObzoNCNx3ff3h8sTL01tk,3603
21
21
  upgini/autofe/operator.py,sha256=EOffJw6vKXpEh5yymqb1RFNJPxGxmnHdFRo9dB5SCFo,4969
22
22
  upgini/autofe/unary.py,sha256=N76Pehn-hO8FWlSdqJ2Wm-yoU1MSR7m6yb2GWYBcumU,5933
23
23
  upgini/autofe/utils.py,sha256=dYrtyAM8Vcc_R8u4dNo54IsGrHKagTHDJTKhGho0bRg,2967
24
- upgini/autofe/vector.py,sha256=jHs0nNTOaHspYUlxW7fjQepk4cvr_JDQ65L1OCiVsds,1360
24
+ upgini/autofe/vector.py,sha256=0MTfVPm7fEkPCPrmFgIvJ1wnzrWi5OYX1irJo1A1qqA,1761
25
25
  upgini/autofe/timeseries/__init__.py,sha256=PGwwDAMwvkXl3el12tXVEmZUgDUvlmIPlXtROm6bD18,738
26
26
  upgini/autofe/timeseries/base.py,sha256=rWJqRuFAzTZEsUdWG5s1Vhif9zzRRmalASXvarufRxI,3610
27
27
  upgini/autofe/timeseries/cross.py,sha256=BTINVwuZSbm_4NKkVm0FGM68SrvZLENZKXN7-UyvhYI,5319
@@ -31,14 +31,14 @@ upgini/autofe/timeseries/roll.py,sha256=zADKXU-eYWQnQ5R3am1yEal8uU6Tm0jLAixwPb_a
31
31
  upgini/autofe/timeseries/trend.py,sha256=K1_iw2ko_LIUU8YCUgrvN3n0MkHtsi7-63-8x9er1k4,2129
32
32
  upgini/autofe/timeseries/volatility.py,sha256=SvZfhM_ZAWCNpTf87WjSnZsnlblARgruDlu4By4Zvhc,8078
33
33
  upgini/data_source/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
34
- upgini/data_source/data_source_publisher.py,sha256=ufL8qK1vg8iUKd5bLWz6hEMGiC3JepUaWYx-nBKVqjA,24294
34
+ upgini/data_source/data_source_publisher.py,sha256=0UeZS_HOTp45jeeRg-AhBIiJr5qM9GoRNvvOoizulS0,25202
35
35
  upgini/mdc/__init__.py,sha256=iHJlXQg6xRM1-ZOUtaPSJqw5SpQDszvxp4LyqviNLIQ,1027
36
36
  upgini/mdc/context.py,sha256=3u1B-jXt7tXEvNcV3qmR9SDCseudnY7KYsLclBdwVLk,1405
37
37
  upgini/normalizer/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
38
- upgini/normalizer/normalize_utils.py,sha256=hMHi5u6Oleqp885UW0Q0Uf1F8fRdZ5jJ7NYoY52SqaI,7403
38
+ upgini/normalizer/normalize_utils.py,sha256=mDh2mBW3aQMB4EFP2aHbf2dGMVkOcWnp4sKKvKDBh8w,8511
39
39
  upgini/resource_bundle/__init__.py,sha256=S5F2G47pnJd2LDpmFsjDqEwiKkP8Hm-hcseDbMka6Ko,8345
40
40
  upgini/resource_bundle/exceptions.py,sha256=5fRvx0_vWdE1-7HcSgF0tckB4A9AKyf5RiinZkInTsI,621
41
- upgini/resource_bundle/strings.properties,sha256=Hfpr2-I5Ws6ugIN1QSz549OHayZeLYglRsbrGDT6g9g,28491
41
+ upgini/resource_bundle/strings.properties,sha256=UO6K0wwvutyOyClOnJYlFYAETzMSen6hHnj3--5AIAs,28497
42
42
  upgini/resource_bundle/strings_widget.properties,sha256=gOdqvZWntP2LCza_tyVk1_yRYcG4c04K9sQOAVhF_gw,1577
43
43
  upgini/sampler/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
44
44
  upgini/sampler/base.py,sha256=7GpjYqjOp58vYcJLiX__1R5wjUlyQbxvHJ2klFnup_M,6389
@@ -59,7 +59,7 @@ upgini/utils/fallback_progress_bar.py,sha256=PDaKb8dYpVZaWMroNcOHsTc3pSjgi9mOm0-
59
59
  upgini/utils/feature_info.py,sha256=b3RvAeOHSEu-ZXWTrf42Dll_3ZUBL0pw7sdk7hgUKD0,7284
60
60
  upgini/utils/features_validator.py,sha256=lEfmk4DoxZ4ooOE1HC0ZXtUb_lFKRFHIrnFULZ4_rL8,3746
61
61
  upgini/utils/format.py,sha256=Yv5cvvSs2bOLUzzNu96Pu33VMDNbabio92QepUj41jU,243
62
- upgini/utils/ip_utils.py,sha256=TSQ_qDsLlVnm09X1HacpabEf_HNqSWpxBF4Sdc2xs08,6580
62
+ upgini/utils/ip_utils.py,sha256=wmnnwVQdjX9o1cNQw6VQMk6maHhvsq6hNsZBYf9knrw,6585
63
63
  upgini/utils/mstats.py,sha256=u3gQVUtDRbyrOQK6V1UJ2Rx1QbkSNYGjXa6m3Z_dPVs,6286
64
64
  upgini/utils/phone_utils.py,sha256=IrbztLuOJBiePqqxllfABWfYlfAjYevPhXKipl95wUI,10432
65
65
  upgini/utils/postal_code_utils.py,sha256=5M0sUqH2DAr33kARWCTXR-ACyzWbjDq_-0mmEml6ZcU,1716
@@ -71,7 +71,7 @@ upgini/utils/target_utils.py,sha256=i3Xt5l9ybB2_nF_ma5cfPuL3OeFTs2dY2xDI0p4Azpg,
71
71
  upgini/utils/track_info.py,sha256=G5Lu1xxakg2_TQjKZk4b5SvrHsATTXNVV3NbvWtT8k8,5663
72
72
  upgini/utils/ts_utils.py,sha256=26vhC0pN7vLXK6R09EEkMK3Lwb9IVPH7LRdqFIQ3kPs,1383
73
73
  upgini/utils/warning_counter.py,sha256=-GRY8EUggEBKODPSuXAkHn9KnEQwAORC0mmz_tim-PM,254
74
- upgini-1.2.96a3906.dev2.dist-info/METADATA,sha256=IiF040yVE4K20_1kus6Y4tzj6rDeA2X8kYkhJ0_Kxr8,49538
75
- upgini-1.2.96a3906.dev2.dist-info/WHEEL,sha256=zEMcRr9Kr03x1ozGwg5v9NQBKn3kndp6LSoSlVg-jhU,87
76
- upgini-1.2.96a3906.dev2.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
77
- upgini-1.2.96a3906.dev2.dist-info/RECORD,,
74
+ upgini-1.2.98a3922.dev1.dist-info/METADATA,sha256=PGqkfTScvXJH9Yx5UQd2HEmu5ty86Ss9_OE5JAY1Yzo,49538
75
+ upgini-1.2.98a3922.dev1.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
76
+ upgini-1.2.98a3922.dev1.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
77
+ upgini-1.2.98a3922.dev1.dist-info/RECORD,,
@@ -1,4 +1,4 @@
1
1
  Wheel-Version: 1.0
2
- Generator: hatchling 1.24.2
2
+ Generator: hatchling 1.25.0
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any