upgini 1.1.275__py3-none-any.whl → 1.1.275a1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of upgini might be problematic. Click here for more details.

upgini/metadata.py CHANGED
@@ -4,6 +4,8 @@ from typing import Dict, List, Optional, Set
4
4
  from pydantic import BaseModel
5
5
 
6
6
  SYSTEM_RECORD_ID = "system_record_id"
7
+ ENTITY_SYSTEM_RECORD_ID = "entity_system_record_id"
8
+ SEARCH_KEY_UNNEST = "search_key_unnest"
7
9
  SORT_ID = "sort_id"
8
10
  EVAL_SET_INDEX = "eval_set_index"
9
11
  TARGET = "target"
@@ -11,7 +13,7 @@ COUNTRY = "country_iso_code"
11
13
  RENAMED_INDEX = "index_col"
12
14
  DEFAULT_INDEX = "index"
13
15
  ORIGINAL_INDEX = "original_index"
14
- SYSTEM_COLUMNS = {SYSTEM_RECORD_ID, EVAL_SET_INDEX, TARGET, COUNTRY, SORT_ID}
16
+ SYSTEM_COLUMNS = {SYSTEM_RECORD_ID, ENTITY_SYSTEM_RECORD_ID, SEARCH_KEY_UNNEST, EVAL_SET_INDEX, TARGET, COUNTRY}
15
17
 
16
18
 
17
19
  class FileColumnMeaningType(Enum):
@@ -37,6 +39,8 @@ class FileColumnMeaningType(Enum):
37
39
  POSTAL_CODE = "POSTAL_CODE"
38
40
  SYSTEM_RECORD_ID = "SYSTEM_RECORD_ID"
39
41
  EVAL_SET_INDEX = "EVAL_SET_INDEX"
42
+ ENTITY_SYSTEM_RECORD_ID = "ENTITY_SYSTEM_RECORD_ID"
43
+ UNNEST_KEY = "UNNEST_KEY"
40
44
 
41
45
 
42
46
  class SearchKey(Enum):
@@ -182,6 +186,10 @@ class FileColumnMetadata(BaseModel):
182
186
  meaningType: FileColumnMeaningType
183
187
  minMaxValues: Optional[NumericInterval] = None
184
188
  originalName: Optional[str]
189
+ # is this column contains keys from multiple key columns like msisdn1, msisdn2
190
+ isUnnest: bool = False,
191
+ # list of original etalon key column names like msisdn1, msisdn2
192
+ unnestKeyNames: Optional[list[str]]
185
193
 
186
194
 
187
195
  class FileMetadata(BaseModel):
upgini/metrics.py CHANGED
@@ -1,4 +1,3 @@
1
- import inspect
2
1
  import logging
3
2
  import re
4
3
  from copy import deepcopy
@@ -382,11 +381,6 @@ class EstimatorWrapper:
382
381
  kwargs["estimator"] = estimator_copy
383
382
  if isinstance(estimator, CatBoostClassifier) or isinstance(estimator, CatBoostRegressor):
384
383
  if cat_features is not None:
385
- for cat_feature in cat_features:
386
- if cat_feature not in X.columns:
387
- logger.error(
388
- f"Client cat_feature `{cat_feature}` not found in X columns: {X.columns.to_list()}"
389
- )
390
384
  estimator_copy.set_params(
391
385
  cat_features=[X.columns.get_loc(cat_feature) for cat_feature in cat_features]
392
386
  )
@@ -653,12 +647,6 @@ class OtherEstimatorWrapper(EstimatorWrapper):
653
647
  def validate_scoring_argument(scoring: Union[Callable, str, None]):
654
648
  if isinstance(scoring, str) and scoring is not None:
655
649
  _get_scorer_by_name(scoring)
656
- elif isinstance(scoring, Callable):
657
- spec = inspect.getfullargspec(scoring)
658
- if len(spec.args) < 3:
659
- raise ValidationError(
660
- f"Invalid scoring function passed {scoring}. It should accept 3 input arguments: estimator, X, y"
661
- )
662
650
 
663
651
 
664
652
  def _get_scorer_by_name(scoring: str) -> Tuple[Callable, str, int]:
@@ -1,7 +1,7 @@
1
1
  from typing import Optional
2
2
 
3
3
  import pandas as pd
4
- from pandas.api.types import is_float_dtype, is_int64_dtype, is_string_dtype, is_object_dtype
4
+ from pandas.api.types import is_float_dtype, is_int64_dtype, is_string_dtype
5
5
 
6
6
  from upgini.errors import ValidationError
7
7
 
@@ -44,7 +44,7 @@ class PhoneNormalizer:
44
44
  Method will remove all non numeric chars from string and convert it to int.
45
45
  None will be set for phone numbers that couldn"t be converted to int
46
46
  """
47
- if is_string_dtype(self.df[self.phone_column_name]) or is_object_dtype(self.df[self.phone_column_name]):
47
+ if is_string_dtype(self.df[self.phone_column_name]):
48
48
  convert_func = self.phone_str_to_int_safe
49
49
  elif is_float_dtype(self.df[self.phone_column_name]):
50
50
  convert_func = self.phone_float_to_int_safe
@@ -38,7 +38,6 @@ loss_selection_warn=\nWARNING: Loss `{0}` is not supported for feature selection
38
38
  loss_calc_metrics_warn=\nWARNING: Loss `{0}` is not supported for metrics calculation with {1}
39
39
  multivariate_timeseries_detected=\nWARNING: Multivariate TimeSeries detected. Blocked time series cross-validation split selected.\nMore details: https://github.com/upgini/upgini#-time-series-prediction-support
40
40
  group_k_fold_in_classification=\nWARNING: Using group K-fold cross-validation split for classification task.
41
- current_date_added=\nWARNING: No date/datetime column was detected in X to be used as a search key. The current date will be used to match the latest version of data sources
42
41
 
43
42
  # Errors
44
43
  failed_search_by_task_id=Failed to retrieve the specified search results
@@ -88,6 +87,7 @@ unsupported_search_key_type=Unsupported type of key in search_keys: {}
88
87
  search_key_country_and_country_code=\nWARNING: SearchKey.COUNTRY and country_code parameter were passed simultaniously. Parameter country_code will be ignored
89
88
  empty_search_key=Search key {} is empty. Please fill values or remove this search key
90
89
  single_constant_search_key=\nWARNING: Constant value detected for the {} search key in the X dataframe: {}.\nThat search key will add constant features for different y values.\nPlease add extra search keys with non constant values, like the COUNTRY, POSTAL_CODE, DATE, PHONE NUMBER, EMAIL/HEM or IPv4
90
+ unsupported_multi_key=Search key {} cannot be used multiple times
91
91
  unsupported_index_column=\nWARNING: Your column with name `index` was dropped because it's reserved name is booked for system needs.
92
92
  date_string_without_format=Date column `{}` has string type, but date_format is not specified. Convert column to datetime type or pass date_format
93
93
  invalid_date_format=Failed to parse date in column `{}`. Try to pass explicit date format in date_format argument of FeaturesEnricher constructor
@@ -159,7 +159,7 @@ dataset_invalid_multiclass_target=Unexpected dtype of target for multiclass task
159
159
  dataset_invalid_regression_target=Unexpected dtype of target for regression task type: {}. Expected float
160
160
  dataset_invalid_timeseries_target=Unexpected dtype of target for timeseries task type: {}. Expected float
161
161
  dataset_to_many_multiclass_targets=The number of target classes {} exceeds the allowed threshold: {}. Please, correct your data and try again
162
- dataset_rarest_class_less_min=Count of rows with the rarest class `{}` is {}, minimum count must be > {} for each class\nPlease, remove rows with rarest class from your dataframe
162
+ dataset_rarest_class_less_min=Frequency of the rarest class `{}` is {}, minimum frequency must be > {} for each class\nPlease, remove rows with rarest class from your dataframe
163
163
  dataset_rarest_class_less_threshold=\nWARNING: Target is imbalanced and will be undersampled to the rarest class. Frequency of the rarest class `{}` is {}\nMinimum number of observations for each class to avoid undersampling {} ({}%)
164
164
  dataset_date_features=\nWARNING: Columns {} is a datetime or period type but not used as a search key, removed from X
165
165
  dataset_too_many_features=Too many features. Maximum number of features is {}
upgini/utils/__init__.py CHANGED
@@ -2,7 +2,7 @@ import itertools
2
2
  from typing import List, Tuple
3
3
 
4
4
  import pandas as pd
5
- from pandas.api.types import is_string_dtype, is_object_dtype
5
+ from pandas.api.types import is_string_dtype
6
6
 
7
7
 
8
8
  def combine_search_keys(search_keys: List[str]) -> List[Tuple[str]]:
@@ -20,6 +20,5 @@ def find_numbers_with_decimal_comma(df: pd.DataFrame) -> pd.DataFrame:
20
20
  return [
21
21
  col
22
22
  for col in tmp.columns
23
- if (is_string_dtype(tmp[col]) or is_object_dtype(tmp[col]))
24
- and tmp[col].astype("string").str.match("^[0-9]+,[0-9]*$").any()
23
+ if is_string_dtype(tmp[col]) and tmp[col].astype("string").str.match("^[0-9]+,[0-9]*$").any()
25
24
  ]
@@ -1,4 +1,4 @@
1
- from typing import List, Optional
1
+ from typing import List
2
2
 
3
3
  import pandas as pd
4
4
 
@@ -10,16 +10,18 @@ class BaseSearchKeyDetector:
10
10
  def _is_search_key_by_values(self, column: pd.Series) -> bool:
11
11
  raise NotImplementedError()
12
12
 
13
- def _get_search_key_by_name(self, column_names: List[str]) -> Optional[str]:
14
- for column_name in column_names:
15
- if self._is_search_key_by_name(column_name):
16
- return column_name
13
+ def _get_search_keys_by_name(self, column_names: List[str]) -> List[str]:
14
+ return [
15
+ column_name
16
+ for column_name in column_names
17
+ if self._is_search_key_by_name(column_name)
18
+ ]
17
19
 
18
- def get_search_key_column(self, df: pd.DataFrame) -> Optional[str]:
19
- maybe_column = self._get_search_key_by_name(df.columns.to_list())
20
- if maybe_column is not None:
21
- return maybe_column
22
-
23
- for column_name in df.columns:
20
+ def get_search_key_columns(self, df: pd.DataFrame, existing_search_keys: List[str]) -> List[str]:
21
+ other_columns = [col for col in df.columns if col not in existing_search_keys]
22
+ columns_by_names = self._get_search_keys_by_name(other_columns)
23
+ columns_by_values = []
24
+ for column_name in other_columns:
24
25
  if self._is_search_key_by_values(df[column_name]):
25
- return column_name
26
+ columns_by_values.append(column_name)
27
+ return list(set(columns_by_names + columns_by_values))
@@ -1,5 +1,5 @@
1
1
  import pandas as pd
2
- from pandas.api.types import is_string_dtype, is_object_dtype
2
+ from pandas.api.types import is_string_dtype
3
3
 
4
4
  from upgini.utils.base_search_key_detector import BaseSearchKeyDetector
5
5
 
@@ -9,7 +9,7 @@ class CountrySearchKeyDetector(BaseSearchKeyDetector):
9
9
  return "country" in str(column_name).lower()
10
10
 
11
11
  def _is_search_key_by_values(self, column: pd.Series) -> bool:
12
- if not is_string_dtype(column) and not is_object_dtype(column):
12
+ if not is_string_dtype(column):
13
13
  return False
14
14
 
15
15
  all_count = len(column)
@@ -6,10 +6,7 @@ from typing import Dict, List, Optional
6
6
  import numpy as np
7
7
  import pandas as pd
8
8
  from dateutil.relativedelta import relativedelta
9
- from pandas.api.types import (
10
- is_numeric_dtype,
11
- is_period_dtype,
12
- )
9
+ from pandas.api.types import is_numeric_dtype, is_period_dtype, is_string_dtype
13
10
 
14
11
  from upgini.errors import ValidationError
15
12
  from upgini.metadata import SearchKey
@@ -81,6 +78,9 @@ class DateTimeSearchKeyConverter:
81
78
  df[self.date_column] = df[self.date_column].apply(lambda x: x.replace(tzinfo=None))
82
79
  elif isinstance(df[self.date_column].values[0], datetime.date):
83
80
  df[self.date_column] = pd.to_datetime(df[self.date_column], errors="coerce")
81
+ elif is_string_dtype(df[self.date_column]):
82
+ df[self.date_column] = df[self.date_column].apply(self.clean_date)
83
+ df[self.date_column] = self.parse_date(df)
84
84
  elif is_period_dtype(df[self.date_column]):
85
85
  df[self.date_column] = pd.to_datetime(df[self.date_column].astype("string"))
86
86
  elif is_numeric_dtype(df[self.date_column]):
@@ -100,9 +100,6 @@ class DateTimeSearchKeyConverter:
100
100
  msg = self.bundle.get("unsupported_date_type").format(self.date_column)
101
101
  self.logger.warning(msg)
102
102
  raise ValidationError(msg)
103
- else:
104
- df[self.date_column] = df[self.date_column].astype("string").apply(self.clean_date)
105
- df[self.date_column] = self.parse_date(df)
106
103
 
107
104
  # If column with date is datetime then extract seconds of the day and minute of the hour
108
105
  # as additional features
@@ -3,7 +3,15 @@ from typing import Dict, List, Optional, Union
3
3
 
4
4
  import pandas as pd
5
5
 
6
- from upgini.metadata import EVAL_SET_INDEX, SORT_ID, SYSTEM_RECORD_ID, TARGET, ModelTaskType, SearchKey
6
+ from upgini.metadata import (
7
+ ENTITY_SYSTEM_RECORD_ID,
8
+ EVAL_SET_INDEX,
9
+ SORT_ID,
10
+ SYSTEM_RECORD_ID,
11
+ TARGET,
12
+ ModelTaskType,
13
+ SearchKey,
14
+ )
7
15
  from upgini.resource_bundle import ResourceBundle
8
16
  from upgini.utils.datetime_utils import DateTimeSearchKeyConverter
9
17
  from upgini.utils.target_utils import define_task
@@ -143,6 +151,8 @@ def clean_full_duplicates(
143
151
  unique_columns = df.columns.tolist()
144
152
  if SYSTEM_RECORD_ID in unique_columns:
145
153
  unique_columns.remove(SYSTEM_RECORD_ID)
154
+ if ENTITY_SYSTEM_RECORD_ID in unique_columns:
155
+ unique_columns.remove(ENTITY_SYSTEM_RECORD_ID)
146
156
  if SORT_ID in unique_columns:
147
157
  unique_columns.remove(SORT_ID)
148
158
  if EVAL_SET_INDEX in unique_columns:
@@ -4,7 +4,7 @@ from hashlib import sha256
4
4
  from typing import Dict, List, Optional
5
5
 
6
6
  import pandas as pd
7
- from pandas.api.types import is_string_dtype, is_object_dtype
7
+ from pandas.api.types import is_string_dtype
8
8
  from upgini.resource_bundle import bundle
9
9
 
10
10
  from upgini.metadata import SearchKey
@@ -18,7 +18,7 @@ class EmailSearchKeyDetector(BaseSearchKeyDetector):
18
18
  return str(column_name).lower() in ["email", "e_mail", "e-mail"]
19
19
 
20
20
  def _is_search_key_by_values(self, column: pd.Series) -> bool:
21
- if not is_string_dtype(column) and not is_object_dtype:
21
+ if not is_string_dtype(column):
22
22
  return False
23
23
  if not column.astype("string").str.contains("@").any():
24
24
  return False
@@ -38,11 +38,13 @@ class EmailSearchKeyConverter:
38
38
  email_column: str,
39
39
  hem_column: Optional[str],
40
40
  search_keys: Dict[str, SearchKey],
41
+ unnest_search_keys: Optional[List[str]] = None,
41
42
  logger: Optional[logging.Logger] = None,
42
43
  ):
43
44
  self.email_column = email_column
44
45
  self.hem_column = hem_column
45
46
  self.search_keys = search_keys
47
+ self.unnest_search_keys = unnest_search_keys
46
48
  if logger is not None:
47
49
  self.logger = logger
48
50
  else:
@@ -80,9 +82,12 @@ class EmailSearchKeyConverter:
80
82
  del self.search_keys[self.email_column]
81
83
  return df
82
84
  self.search_keys[self.HEM_COLUMN_NAME] = SearchKey.HEM
85
+ self.unnest_search_keys.append(self.HEM_COLUMN_NAME)
83
86
  self.email_converted_to_hem = True
84
87
 
85
88
  del self.search_keys[self.email_column]
89
+ if self.email_column in self.unnest_search_keys:
90
+ self.unnest_search_keys.remove(self.email_column)
86
91
 
87
92
  df[self.EMAIL_ONE_DOMAIN_COLUMN_NAME] = df[self.email_column].apply(self._email_to_one_domain)
88
93
 
@@ -81,8 +81,7 @@ class FeaturesValidator:
81
81
  return [
82
82
  i
83
83
  for i in df
84
- if (is_object_dtype(df[i]) or is_string_dtype(df[i]) or is_integer_dtype(df[i]))
85
- and (df[i].nunique(dropna=False) / row_count >= 0.85)
84
+ if (is_string_dtype(df[i]) or is_integer_dtype(df[i])) and (df[i].nunique(dropna=False) / row_count >= 0.95)
86
85
  ]
87
86
 
88
87
  @staticmethod
@@ -107,7 +107,7 @@ def balance_undersample(
107
107
  min_class_count = vc[min_class_value]
108
108
 
109
109
  min_class_percent = imbalance_threshold / target_classes_count
110
- min_class_threshold = int(min_class_percent * count)
110
+ min_class_threshold = min_class_percent * count
111
111
 
112
112
  resampled_data = df
113
113
  df = df.copy().sort_values(by=SYSTEM_RECORD_ID)
@@ -55,7 +55,7 @@ def _get_execution_ide() -> str:
55
55
  def get_track_metrics(client_ip: Optional[str] = None, client_visitorid: Optional[str] = None) -> dict:
56
56
  # default values
57
57
  track = {"ide": _get_execution_ide()}
58
- ident_res = "https://api64.ipify.org"
58
+ ident_res = "https://api.ipify.org"
59
59
 
60
60
  try:
61
61
  track["hostname"] = socket.gethostname()
@@ -74,20 +74,17 @@ def get_track_metrics(client_ip: Optional[str] = None, client_visitorid: Optiona
74
74
  display(
75
75
  Javascript(
76
76
  """
77
- async function getVisitorId() {
78
- return import('https://upgini.github.io/upgini/js/a.js')
77
+ import('https://upgini.github.io/upgini/js/a.js')
79
78
  .then(FingerprintJS => FingerprintJS.load())
80
79
  .then(fp => fp.get())
81
- .then(result => result.visitorId);
82
- }
80
+ .then(result => window.visitorId = result.visitorId);
83
81
  """
84
82
  )
85
83
  )
86
- track["visitorId"] = output.eval_js("getVisitorId()", timeout_sec=30)
84
+ track["visitorId"] = output.eval_js("window.visitorId", timeout_sec=10)
87
85
  except Exception as e:
88
86
  track["err"] = str(e)
89
- if "visitorId" not in track:
90
- track["visitorId"] = "None"
87
+ track["visitorId"] = "None"
91
88
  if client_ip:
92
89
  track["ip"] = client_ip
93
90
  else:
@@ -98,19 +95,16 @@ def get_track_metrics(client_ip: Optional[str] = None, client_visitorid: Optiona
98
95
  display(
99
96
  Javascript(
100
97
  f"""
101
- async function getIP() {{
102
- return fetch("{ident_res}")
98
+ fetch("{ident_res}")
103
99
  .then(response => response.text())
104
- .then(data => data);
105
- }}
100
+ .then(data => window.clientIP = data);
106
101
  """
107
102
  )
108
103
  )
109
- track["ip"] = output.eval_js("getIP()", timeout_sec=10)
104
+ track["ip"] = output.eval_js("window.clientIP", timeout_sec=10)
110
105
  except Exception as e:
111
106
  track["err"] = str(e)
112
- if "ip" not in track:
113
- track["ip"] = "0.0.0.0"
107
+ track["ip"] = "0.0.0.0"
114
108
 
115
109
  elif track["ide"] == "binder":
116
110
  try:
@@ -122,10 +116,8 @@ def get_track_metrics(client_ip: Optional[str] = None, client_visitorid: Optiona
122
116
  track["visitorId"] = sha256(os.environ["CLIENT_IP"].encode()).hexdigest()
123
117
  except Exception as e:
124
118
  track["err"] = str(e)
125
- if "ip" not in track:
126
- track["ip"] = "0.0.0.0"
127
- if "visitorId" not in track:
128
- track["visitorId"] = "None"
119
+ track["ip"] = "0.0.0.0"
120
+ track["visitorId"] = "None"
129
121
 
130
122
  elif track["ide"] == "kaggle":
131
123
  try:
@@ -144,8 +136,8 @@ def get_track_metrics(client_ip: Optional[str] = None, client_visitorid: Optiona
144
136
  raise Exception(err)
145
137
  except Exception as e:
146
138
  track["err"] = str(e)
147
- if "visitorId" not in track:
148
- track["visitorId"] = "None"
139
+ track["ip"] = "0.0.0.0"
140
+ track["visitorId"] = "None"
149
141
  else:
150
142
  try:
151
143
  if client_ip:
@@ -158,9 +150,5 @@ def get_track_metrics(client_ip: Optional[str] = None, client_visitorid: Optiona
158
150
  track["visitorId"] = sha256(str(getnode()).encode()).hexdigest()
159
151
  except Exception as e:
160
152
  track["err"] = str(e)
161
- if "visitorId" not in track:
162
- track["visitorId"] = "None"
163
- if "ip" not in track:
164
- track["ip"] = "0.0.0.0"
165
153
 
166
154
  return track
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: upgini
3
- Version: 1.1.275
3
+ Version: 1.1.275a1
4
4
  Summary: Intelligent data search & enrichment for Machine Learning
5
5
  Home-page: https://upgini.com/
6
6
  Author: Upgini Developers
@@ -28,7 +28,7 @@ Description-Content-Type: text/markdown
28
28
  License-File: LICENSE
29
29
  Requires-Dist: python-dateutil >=2.8.0
30
30
  Requires-Dist: requests >=2.8.0
31
- Requires-Dist: pandas <3.0.0,>=1.1.0
31
+ Requires-Dist: pandas <2.0.0,>=1.1.0
32
32
  Requires-Dist: numpy >=1.19.0
33
33
  Requires-Dist: scikit-learn >=1.3.0
34
34
  Requires-Dist: pydantic <2.0.0,>=1.8.2
@@ -1,12 +1,12 @@
1
1
  upgini/__init__.py,sha256=asENHgEVHQBIkV-e_0IhE_ZWqkCG6398U3ZLrNzAH6k,407
2
- upgini/ads.py,sha256=nvuRxRx5MHDMgPr9SiU-fsqRdFaBv8p4_v1oqiysKpc,2714
3
- upgini/dataset.py,sha256=HwL2syoMf3F9k9SmsJJMhhqnAddZcx28RZ1aYam7Lhs,45665
2
+ upgini/ads.py,sha256=mre6xn44wcC_fg63iLT_kTh4mViZqR9AKRJZAtpQz8Y,2592
3
+ upgini/dataset.py,sha256=g10BnbayclZMno9mAabpz_Zu0iyMiW0f_jOwt_xJr8U,45947
4
4
  upgini/errors.py,sha256=pdzQl3MKuK52yvncxMWMRWeSIOGhUFzpQoszoRFBOk0,958
5
- upgini/features_enricher.py,sha256=XKN-SdzX5EHKJHiPWvmEGDiCy6iK2ZaNPw75DYfcev0,176176
5
+ upgini/features_enricher.py,sha256=CgUBRCPW_itgBfaup3Tg_yfPYMbQpufoOqu4yYvn6VU,179316
6
6
  upgini/fingerprint.js,sha256=VygVIQlN1v4NGZfjHqtRogOw8zjTnnMNJg_f7M5iGQU,33442
7
7
  upgini/http.py,sha256=zaO86LBBLmkieGbgYifk29eVoPCxXimZQ8YkQtKcM0I,42244
8
- upgini/metadata.py,sha256=fwVxtkR6Mn4iRoOqV6BfMJvJrx65I3YwZUMbZjhPyOI,9673
9
- upgini/metrics.py,sha256=tGzdn0jgup86OlH_GS4eoza8ZJZ9wgaJr7SaX3Upwzo,29652
8
+ upgini/metadata.py,sha256=FFwTnoMxdJ-7oKXbRgght1yk7e2u90WpeqljKDWUj18,10106
9
+ upgini/metrics.py,sha256=VmxVc-plbRPZ1U3Ve3E-FZkhYqi0X2r7x8H5L-shux4,29058
10
10
  upgini/search_task.py,sha256=tmJ17WUxv3J5NWrYUJB_NKdZ792Ifz8Z8UnDXeQnpss,17077
11
11
  upgini/spinner.py,sha256=Dm1dQ5F_z_Ua2odLxZX7OypcOX9tSx_vE5MGaKtUmfw,1118
12
12
  upgini/version_validator.py,sha256=rDIncP6BEko4J2F2hUcMOtKm_vZbI4ICWcNcw8hrwM4,1400
@@ -15,49 +15,49 @@ upgini/ads_management/ads_manager.py,sha256=fP4Yqx3h2Snw5X335TbXEwFoupq1RYsE7y0P
15
15
  upgini/autofe/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
16
16
  upgini/autofe/all_operands.py,sha256=H66wqVLD-H9k8A4-q2wslhV9QaNxlb49f8YiT0Xfkps,2356
17
17
  upgini/autofe/binary.py,sha256=f8LQqZi9zyaMUAv-jASMmWNA_vT05ncYCjZq0qx3USs,3972
18
- upgini/autofe/date.py,sha256=408p8P2OTPM2D3LsEGGtaiCepKGgM1BbOCQNRzAmI6c,4223
18
+ upgini/autofe/date.py,sha256=cc0GMAJR0QZOI_Qp2V5UDklaXLNS_79O1GhU6GlOYzg,3895
19
19
  upgini/autofe/feature.py,sha256=2FQRGtIumNz60hFAjfLReaY18SI7HxzYZOoC5avzSjQ,11847
20
20
  upgini/autofe/groupby.py,sha256=iXRfOmOc84ooSzRhsh9GmmG7rTafX0-ekXko8s9Qs68,3089
21
21
  upgini/autofe/operand.py,sha256=dhtToPDGWtP_0u_RjayUpezJJZAgq_TzNbPH0bI9OXI,2805
22
22
  upgini/autofe/unary.py,sha256=YRTzQLttbDdOnkogWBPnBexpu7uHWSLSFAxSCu3iFdY,3145
23
23
  upgini/autofe/vector.py,sha256=5qhI_bdwaWM1l7fgCkx1tMt9R9gxWzoYCl-7WO4KiOs,604
24
24
  upgini/data_source/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
25
- upgini/data_source/data_source_publisher.py,sha256=taRzyGgrPrTTSGw4Y-Ca5k4bf30aiTa68rxqT9zfqeI,16478
25
+ upgini/data_source/data_source_publisher.py,sha256=J2lrpPuysUHPeqTSfoybBtPRTBCFu7R5KzaakhjaRDc,16485
26
26
  upgini/mdc/__init__.py,sha256=ETDh3JKbrDdPMOECiYLAa8lvKYe68mv4IY6fZa9FimA,1126
27
27
  upgini/mdc/context.py,sha256=Sl1S_InKlzzRxYqwJ2k24lawJdCKWgGJ-RIRfvzWJrk,1468
28
28
  upgini/normalizer/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
29
- upgini/normalizer/phone_normalizer.py,sha256=_SYMX4GTgwzRXArK54Jp3vUBE5d4jZxSVyze-0tqzg0,9996
29
+ upgini/normalizer/phone_normalizer.py,sha256=lhwsPEnfyjeIsndW2EcQGZksXYsfxaQ1ghAzVYoDRKM,9927
30
30
  upgini/resource_bundle/__init__.py,sha256=hdvbqL0b0xMWbY6-kiYGsW1ro2GMiWpxxsO9uCv-h9Q,8379
31
31
  upgini/resource_bundle/exceptions.py,sha256=5fRvx0_vWdE1-7HcSgF0tckB4A9AKyf5RiinZkInTsI,621
32
- upgini/resource_bundle/strings.properties,sha256=1O779a0-Ai0j7W-Z5AznvjuV69YkJvgGhJda-6VMLOQ,26287
32
+ upgini/resource_bundle/strings.properties,sha256=AK5xktWWYa0smEa_ZVT7BFlXPSx7M_NTMIfXhgsnE2Y,26177
33
33
  upgini/resource_bundle/strings_widget.properties,sha256=gOdqvZWntP2LCza_tyVk1_yRYcG4c04K9sQOAVhF_gw,1577
34
34
  upgini/sampler/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
35
35
  upgini/sampler/base.py,sha256=CC-DvPbrN7zp5--SVFuUqkVmdWM_5F7R0Do98ETV82U,6421
36
36
  upgini/sampler/random_under_sampler.py,sha256=XU4c2swPIFxVXHOPpxgM2bUao0Xm-aoMmd6fKjIuV5s,4068
37
37
  upgini/sampler/utils.py,sha256=PYOk3kKSnFlyxcpdtDNLBEEhTB4lO_iP7pQHqeUcmAc,20211
38
- upgini/utils/__init__.py,sha256=YVum3lRKpyfqoJy_7HJyU6SmIgbmG8QLkHIpibE_ud8,842
39
- upgini/utils/base_search_key_detector.py,sha256=DGwhXLvc8i5VZWMDr0rncFfV5GEHdsCSnLGon_W9TPs,859
38
+ upgini/utils/__init__.py,sha256=dQ4-s8-sZ5eOBZ-mH3gEwDHTdI0wI1bUAVgVqUKKPx4,786
39
+ upgini/utils/base_search_key_detector.py,sha256=VvEdamjJT1wypsH6NAfOkPp7dHo7nxhl7LhwX7Z9N5w,1025
40
40
  upgini/utils/blocked_time_series.py,sha256=dMz5ewk3PsoeOrc3lDzInCVPS9u_2XQkV0W6PuMMjPg,3380
41
- upgini/utils/country_utils.py,sha256=pV8TBURthYqwSOfH1lxfYc2blm3OvfLFCMvRv8rKTp4,6511
41
+ upgini/utils/country_utils.py,sha256=1KXhLSNqkNYVL3on8-zK0Arc_SspUH7AMZvGZICysOU,6462
42
42
  upgini/utils/custom_loss_utils.py,sha256=DBslpjWGPt7xTeypt78baR59012SYphbPsO_YLKdilo,3972
43
43
  upgini/utils/cv_utils.py,sha256=Tn01RJvpZGZh0PUQUimlBkV-AXwe7s6yjCNFtw352Uc,3525
44
- upgini/utils/datetime_utils.py,sha256=_mfhWb5ogEThvanQ-py1Lb6VvUvF2vT20tQgNprNz6o,10321
45
- upgini/utils/deduplicate_utils.py,sha256=6AbARehUCghJZ4PppFtrej2s3gFRruh41MEm6mzakHs,8607
44
+ upgini/utils/datetime_utils.py,sha256=4ii5WphAHlb_NRmdJx35VZpTarJbAr-AnDw3XSzUSow,10346
45
+ upgini/utils/deduplicate_utils.py,sha256=Zvs7zW4QzaERQmJNPrTVf2ZTVBkBLOycFCzyMwtXuV8,8770
46
46
  upgini/utils/display_utils.py,sha256=LKoSwjrE0xgS5_cqVhc2og2CQ1UCZ1nTI2VKboIhoQA,10858
47
- upgini/utils/email_utils.py,sha256=R9bVOfbS-oVkA8PdwZfQBxm7B4mQlRtkwqx2cf6zPCY,3520
47
+ upgini/utils/email_utils.py,sha256=0EPCxMU-huzTgb_vySiAQ8tmSUhS31Mz2BpaHGwwYO4,3772
48
48
  upgini/utils/fallback_progress_bar.py,sha256=cdbd1XGcWm4Ed4eAqV2_St3z7uC_kkH22gEyrN5ub6M,1090
49
- upgini/utils/features_validator.py,sha256=PgKNt5dyqfErTvjtRNNUS9g7GFqHBtAtnsfA-V5UO1A,3307
49
+ upgini/utils/features_validator.py,sha256=P-dfjBLAMxgzOcUX1Jo1bhVp8-8WyTyF3Ef0YZ5nfRI,3269
50
50
  upgini/utils/format.py,sha256=Yv5cvvSs2bOLUzzNu96Pu33VMDNbabio92QepUj41jU,243
51
51
  upgini/utils/ip_utils.py,sha256=Zf3F2cnQmOCH09QLQHetpjMFu1PnD0cTmDymn0SnSy8,1672
52
52
  upgini/utils/phone_utils.py,sha256=JNSkF8G6mgsN8Czy11pamaJdsY6rBINEMpi7jbVt_RA,408
53
53
  upgini/utils/postal_code_utils.py,sha256=_8CR9tBqsPptQsmMUvnrCAmBaMIQSWH3JfJ4ly3x_zs,409
54
54
  upgini/utils/progress_bar.py,sha256=iNXyqT3vKCeHpfiG5HHwr7Lk2cTtKViM93Fl8iZnjGc,1564
55
55
  upgini/utils/sklearn_ext.py,sha256=e1aMNXk1zUt7uFnl0FcUF0zOnaXSE7z5xBHmJPknUVs,44014
56
- upgini/utils/target_utils.py,sha256=Y96_PJ5cC-WsEbeqg20v9uqywDQobLoTb-xoP7S3o4E,7807
57
- upgini/utils/track_info.py,sha256=p8gmuHhLamZF5JG7K9DeK-PcytQhlFCR29lyRr-wq_U,5665
56
+ upgini/utils/target_utils.py,sha256=9K67tkY7LWhQMO-vbbPqBaO-KriAmg_6fVz5RQRaLQc,7802
57
+ upgini/utils/track_info.py,sha256=EPcJ13Jqa17_T0JjM37Ac9kWDz5Zk0GVsIZKutOb8aU,5207
58
58
  upgini/utils/warning_counter.py,sha256=dIWBB4dI5XRRJZudvIlqlIYKEiwLLPcXarsZuYRt338,227
59
- upgini-1.1.275.dist-info/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
60
- upgini-1.1.275.dist-info/METADATA,sha256=6RZCJLAqN3qIrXOvyAaQIr75-TZw4NcLkp5yXS637ls,48156
61
- upgini-1.1.275.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
62
- upgini-1.1.275.dist-info/top_level.txt,sha256=OFhTGiDIWKl5gFI49qvWq1R9IKflPaE2PekcbDXDtx4,7
63
- upgini-1.1.275.dist-info/RECORD,,
59
+ upgini-1.1.275a1.dist-info/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
60
+ upgini-1.1.275a1.dist-info/METADATA,sha256=ocZUhdmjsYXKoCXt0W3M4gfPGQ8UlFtQlYIjdD_6_w0,48158
61
+ upgini-1.1.275a1.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
62
+ upgini-1.1.275a1.dist-info/top_level.txt,sha256=OFhTGiDIWKl5gFI49qvWq1R9IKflPaE2PekcbDXDtx4,7
63
+ upgini-1.1.275a1.dist-info/RECORD,,