upgini 1.1.275a1__py3-none-any.whl → 1.1.276__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of upgini might be problematic. Click here for more details.

upgini/metadata.py CHANGED
@@ -4,8 +4,6 @@ from typing import Dict, List, Optional, Set
4
4
  from pydantic import BaseModel
5
5
 
6
6
  SYSTEM_RECORD_ID = "system_record_id"
7
- ENTITY_SYSTEM_RECORD_ID = "entity_system_record_id"
8
- SEARCH_KEY_UNNEST = "search_key_unnest"
9
7
  SORT_ID = "sort_id"
10
8
  EVAL_SET_INDEX = "eval_set_index"
11
9
  TARGET = "target"
@@ -13,7 +11,7 @@ COUNTRY = "country_iso_code"
13
11
  RENAMED_INDEX = "index_col"
14
12
  DEFAULT_INDEX = "index"
15
13
  ORIGINAL_INDEX = "original_index"
16
- SYSTEM_COLUMNS = {SYSTEM_RECORD_ID, ENTITY_SYSTEM_RECORD_ID, SEARCH_KEY_UNNEST, EVAL_SET_INDEX, TARGET, COUNTRY}
14
+ SYSTEM_COLUMNS = {SYSTEM_RECORD_ID, EVAL_SET_INDEX, TARGET, COUNTRY, SORT_ID}
17
15
 
18
16
 
19
17
  class FileColumnMeaningType(Enum):
@@ -39,8 +37,6 @@ class FileColumnMeaningType(Enum):
39
37
  POSTAL_CODE = "POSTAL_CODE"
40
38
  SYSTEM_RECORD_ID = "SYSTEM_RECORD_ID"
41
39
  EVAL_SET_INDEX = "EVAL_SET_INDEX"
42
- ENTITY_SYSTEM_RECORD_ID = "ENTITY_SYSTEM_RECORD_ID"
43
- UNNEST_KEY = "UNNEST_KEY"
44
40
 
45
41
 
46
42
  class SearchKey(Enum):
@@ -186,10 +182,6 @@ class FileColumnMetadata(BaseModel):
186
182
  meaningType: FileColumnMeaningType
187
183
  minMaxValues: Optional[NumericInterval] = None
188
184
  originalName: Optional[str]
189
- # is this column contains keys from multiple key columns like msisdn1, msisdn2
190
- isUnnest: bool = False,
191
- # list of original etalon key column names like msisdn1, msisdn2
192
- unnestKeyNames: Optional[list[str]]
193
185
 
194
186
 
195
187
  class FileMetadata(BaseModel):
upgini/metrics.py CHANGED
@@ -1,3 +1,4 @@
1
+ import inspect
1
2
  import logging
2
3
  import re
3
4
  from copy import deepcopy
@@ -381,6 +382,11 @@ class EstimatorWrapper:
381
382
  kwargs["estimator"] = estimator_copy
382
383
  if isinstance(estimator, CatBoostClassifier) or isinstance(estimator, CatBoostRegressor):
383
384
  if cat_features is not None:
385
+ for cat_feature in cat_features:
386
+ if cat_feature not in X.columns:
387
+ logger.error(
388
+ f"Client cat_feature `{cat_feature}` not found in X columns: {X.columns.to_list()}"
389
+ )
384
390
  estimator_copy.set_params(
385
391
  cat_features=[X.columns.get_loc(cat_feature) for cat_feature in cat_features]
386
392
  )
@@ -647,6 +653,12 @@ class OtherEstimatorWrapper(EstimatorWrapper):
647
653
  def validate_scoring_argument(scoring: Union[Callable, str, None]):
648
654
  if isinstance(scoring, str) and scoring is not None:
649
655
  _get_scorer_by_name(scoring)
656
+ elif isinstance(scoring, Callable):
657
+ spec = inspect.getfullargspec(scoring)
658
+ if len(spec.args) < 3:
659
+ raise ValidationError(
660
+ f"Invalid scoring function passed {scoring}. It should accept 3 input arguments: estimator, X, y"
661
+ )
650
662
 
651
663
 
652
664
  def _get_scorer_by_name(scoring: str) -> Tuple[Callable, str, int]:
@@ -1,7 +1,7 @@
1
1
  from typing import Optional
2
2
 
3
3
  import pandas as pd
4
- from pandas.api.types import is_float_dtype, is_int64_dtype, is_string_dtype
4
+ from pandas.api.types import is_float_dtype, is_int64_dtype, is_string_dtype, is_object_dtype
5
5
 
6
6
  from upgini.errors import ValidationError
7
7
 
@@ -44,7 +44,7 @@ class PhoneNormalizer:
44
44
  Method will remove all non numeric chars from string and convert it to int.
45
45
  None will be set for phone numbers that couldn"t be converted to int
46
46
  """
47
- if is_string_dtype(self.df[self.phone_column_name]):
47
+ if is_string_dtype(self.df[self.phone_column_name]) or is_object_dtype(self.df[self.phone_column_name]):
48
48
  convert_func = self.phone_str_to_int_safe
49
49
  elif is_float_dtype(self.df[self.phone_column_name]):
50
50
  convert_func = self.phone_float_to_int_safe
@@ -38,6 +38,7 @@ loss_selection_warn=\nWARNING: Loss `{0}` is not supported for feature selection
38
38
  loss_calc_metrics_warn=\nWARNING: Loss `{0}` is not supported for metrics calculation with {1}
39
39
  multivariate_timeseries_detected=\nWARNING: Multivariate TimeSeries detected. Blocked time series cross-validation split selected.\nMore details: https://github.com/upgini/upgini#-time-series-prediction-support
40
40
  group_k_fold_in_classification=\nWARNING: Using group K-fold cross-validation split for classification task.
41
+ current_date_added=\nWARNING: No date/datetime column was detected in X to be used as a search key. The current date will be used to match the latest version of data sources
41
42
 
42
43
  # Errors
43
44
  failed_search_by_task_id=Failed to retrieve the specified search results
@@ -87,7 +88,6 @@ unsupported_search_key_type=Unsupported type of key in search_keys: {}
87
88
  search_key_country_and_country_code=\nWARNING: SearchKey.COUNTRY and country_code parameter were passed simultaniously. Parameter country_code will be ignored
88
89
  empty_search_key=Search key {} is empty. Please fill values or remove this search key
89
90
  single_constant_search_key=\nWARNING: Constant value detected for the {} search key in the X dataframe: {}.\nThat search key will add constant features for different y values.\nPlease add extra search keys with non constant values, like the COUNTRY, POSTAL_CODE, DATE, PHONE NUMBER, EMAIL/HEM or IPv4
90
- unsupported_multi_key=Search key {} cannot be used multiple times
91
91
  unsupported_index_column=\nWARNING: Your column with name `index` was dropped because it's reserved name is booked for system needs.
92
92
  date_string_without_format=Date column `{}` has string type, but date_format is not specified. Convert column to datetime type or pass date_format
93
93
  invalid_date_format=Failed to parse date in column `{}`. Try to pass explicit date format in date_format argument of FeaturesEnricher constructor
@@ -159,7 +159,7 @@ dataset_invalid_multiclass_target=Unexpected dtype of target for multiclass task
159
159
  dataset_invalid_regression_target=Unexpected dtype of target for regression task type: {}. Expected float
160
160
  dataset_invalid_timeseries_target=Unexpected dtype of target for timeseries task type: {}. Expected float
161
161
  dataset_to_many_multiclass_targets=The number of target classes {} exceeds the allowed threshold: {}. Please, correct your data and try again
162
- dataset_rarest_class_less_min=Frequency of the rarest class `{}` is {}, minimum frequency must be > {} for each class\nPlease, remove rows with rarest class from your dataframe
162
+ dataset_rarest_class_less_min=Count of rows with the rarest class `{}` is {}, minimum count must be > {} for each class\nPlease, remove rows with rarest class from your dataframe
163
163
  dataset_rarest_class_less_threshold=\nWARNING: Target is imbalanced and will be undersampled to the rarest class. Frequency of the rarest class `{}` is {}\nMinimum number of observations for each class to avoid undersampling {} ({}%)
164
164
  dataset_date_features=\nWARNING: Columns {} is a datetime or period type but not used as a search key, removed from X
165
165
  dataset_too_many_features=Too many features. Maximum number of features is {}
upgini/utils/__init__.py CHANGED
@@ -2,7 +2,7 @@ import itertools
2
2
  from typing import List, Tuple
3
3
 
4
4
  import pandas as pd
5
- from pandas.api.types import is_string_dtype
5
+ from pandas.api.types import is_string_dtype, is_object_dtype
6
6
 
7
7
 
8
8
  def combine_search_keys(search_keys: List[str]) -> List[Tuple[str]]:
@@ -20,5 +20,6 @@ def find_numbers_with_decimal_comma(df: pd.DataFrame) -> pd.DataFrame:
20
20
  return [
21
21
  col
22
22
  for col in tmp.columns
23
- if is_string_dtype(tmp[col]) and tmp[col].astype("string").str.match("^[0-9]+,[0-9]*$").any()
23
+ if (is_string_dtype(tmp[col]) or is_object_dtype(tmp[col]))
24
+ and tmp[col].astype("string").str.match("^[0-9]+,[0-9]*$").any()
24
25
  ]
@@ -1,4 +1,4 @@
1
- from typing import List
1
+ from typing import List, Optional
2
2
 
3
3
  import pandas as pd
4
4
 
@@ -10,18 +10,16 @@ class BaseSearchKeyDetector:
10
10
  def _is_search_key_by_values(self, column: pd.Series) -> bool:
11
11
  raise NotImplementedError()
12
12
 
13
- def _get_search_keys_by_name(self, column_names: List[str]) -> List[str]:
14
- return [
15
- column_name
16
- for column_name in column_names
17
- if self._is_search_key_by_name(column_name)
18
- ]
13
+ def _get_search_key_by_name(self, column_names: List[str]) -> Optional[str]:
14
+ for column_name in column_names:
15
+ if self._is_search_key_by_name(column_name):
16
+ return column_name
19
17
 
20
- def get_search_key_columns(self, df: pd.DataFrame, existing_search_keys: List[str]) -> List[str]:
21
- other_columns = [col for col in df.columns if col not in existing_search_keys]
22
- columns_by_names = self._get_search_keys_by_name(other_columns)
23
- columns_by_values = []
24
- for column_name in other_columns:
18
+ def get_search_key_column(self, df: pd.DataFrame) -> Optional[str]:
19
+ maybe_column = self._get_search_key_by_name(df.columns.to_list())
20
+ if maybe_column is not None:
21
+ return maybe_column
22
+
23
+ for column_name in df.columns:
25
24
  if self._is_search_key_by_values(df[column_name]):
26
- columns_by_values.append(column_name)
27
- return list(set(columns_by_names + columns_by_values))
25
+ return column_name
@@ -1,5 +1,5 @@
1
1
  import pandas as pd
2
- from pandas.api.types import is_string_dtype
2
+ from pandas.api.types import is_string_dtype, is_object_dtype
3
3
 
4
4
  from upgini.utils.base_search_key_detector import BaseSearchKeyDetector
5
5
 
@@ -9,7 +9,7 @@ class CountrySearchKeyDetector(BaseSearchKeyDetector):
9
9
  return "country" in str(column_name).lower()
10
10
 
11
11
  def _is_search_key_by_values(self, column: pd.Series) -> bool:
12
- if not is_string_dtype(column):
12
+ if not is_string_dtype(column) and not is_object_dtype(column):
13
13
  return False
14
14
 
15
15
  all_count = len(column)
@@ -6,7 +6,10 @@ from typing import Dict, List, Optional
6
6
  import numpy as np
7
7
  import pandas as pd
8
8
  from dateutil.relativedelta import relativedelta
9
- from pandas.api.types import is_numeric_dtype, is_period_dtype, is_string_dtype
9
+ from pandas.api.types import (
10
+ is_numeric_dtype,
11
+ is_period_dtype,
12
+ )
10
13
 
11
14
  from upgini.errors import ValidationError
12
15
  from upgini.metadata import SearchKey
@@ -78,9 +81,6 @@ class DateTimeSearchKeyConverter:
78
81
  df[self.date_column] = df[self.date_column].apply(lambda x: x.replace(tzinfo=None))
79
82
  elif isinstance(df[self.date_column].values[0], datetime.date):
80
83
  df[self.date_column] = pd.to_datetime(df[self.date_column], errors="coerce")
81
- elif is_string_dtype(df[self.date_column]):
82
- df[self.date_column] = df[self.date_column].apply(self.clean_date)
83
- df[self.date_column] = self.parse_date(df)
84
84
  elif is_period_dtype(df[self.date_column]):
85
85
  df[self.date_column] = pd.to_datetime(df[self.date_column].astype("string"))
86
86
  elif is_numeric_dtype(df[self.date_column]):
@@ -100,6 +100,9 @@ class DateTimeSearchKeyConverter:
100
100
  msg = self.bundle.get("unsupported_date_type").format(self.date_column)
101
101
  self.logger.warning(msg)
102
102
  raise ValidationError(msg)
103
+ else:
104
+ df[self.date_column] = df[self.date_column].astype("string").apply(self.clean_date)
105
+ df[self.date_column] = self.parse_date(df)
103
106
 
104
107
  # If column with date is datetime then extract seconds of the day and minute of the hour
105
108
  # as additional features
@@ -3,15 +3,7 @@ from typing import Dict, List, Optional, Union
3
3
 
4
4
  import pandas as pd
5
5
 
6
- from upgini.metadata import (
7
- ENTITY_SYSTEM_RECORD_ID,
8
- EVAL_SET_INDEX,
9
- SORT_ID,
10
- SYSTEM_RECORD_ID,
11
- TARGET,
12
- ModelTaskType,
13
- SearchKey,
14
- )
6
+ from upgini.metadata import EVAL_SET_INDEX, SORT_ID, SYSTEM_RECORD_ID, TARGET, ModelTaskType, SearchKey
15
7
  from upgini.resource_bundle import ResourceBundle
16
8
  from upgini.utils.datetime_utils import DateTimeSearchKeyConverter
17
9
  from upgini.utils.target_utils import define_task
@@ -151,8 +143,6 @@ def clean_full_duplicates(
151
143
  unique_columns = df.columns.tolist()
152
144
  if SYSTEM_RECORD_ID in unique_columns:
153
145
  unique_columns.remove(SYSTEM_RECORD_ID)
154
- if ENTITY_SYSTEM_RECORD_ID in unique_columns:
155
- unique_columns.remove(ENTITY_SYSTEM_RECORD_ID)
156
146
  if SORT_ID in unique_columns:
157
147
  unique_columns.remove(SORT_ID)
158
148
  if EVAL_SET_INDEX in unique_columns:
@@ -4,7 +4,7 @@ from hashlib import sha256
4
4
  from typing import Dict, List, Optional
5
5
 
6
6
  import pandas as pd
7
- from pandas.api.types import is_string_dtype
7
+ from pandas.api.types import is_string_dtype, is_object_dtype
8
8
  from upgini.resource_bundle import bundle
9
9
 
10
10
  from upgini.metadata import SearchKey
@@ -18,7 +18,7 @@ class EmailSearchKeyDetector(BaseSearchKeyDetector):
18
18
  return str(column_name).lower() in ["email", "e_mail", "e-mail"]
19
19
 
20
20
  def _is_search_key_by_values(self, column: pd.Series) -> bool:
21
- if not is_string_dtype(column):
21
+ if not is_string_dtype(column) and not is_object_dtype:
22
22
  return False
23
23
  if not column.astype("string").str.contains("@").any():
24
24
  return False
@@ -38,13 +38,11 @@ class EmailSearchKeyConverter:
38
38
  email_column: str,
39
39
  hem_column: Optional[str],
40
40
  search_keys: Dict[str, SearchKey],
41
- unnest_search_keys: Optional[List[str]] = None,
42
41
  logger: Optional[logging.Logger] = None,
43
42
  ):
44
43
  self.email_column = email_column
45
44
  self.hem_column = hem_column
46
45
  self.search_keys = search_keys
47
- self.unnest_search_keys = unnest_search_keys
48
46
  if logger is not None:
49
47
  self.logger = logger
50
48
  else:
@@ -82,12 +80,9 @@ class EmailSearchKeyConverter:
82
80
  del self.search_keys[self.email_column]
83
81
  return df
84
82
  self.search_keys[self.HEM_COLUMN_NAME] = SearchKey.HEM
85
- self.unnest_search_keys.append(self.HEM_COLUMN_NAME)
86
83
  self.email_converted_to_hem = True
87
84
 
88
85
  del self.search_keys[self.email_column]
89
- if self.email_column in self.unnest_search_keys:
90
- self.unnest_search_keys.remove(self.email_column)
91
86
 
92
87
  df[self.EMAIL_ONE_DOMAIN_COLUMN_NAME] = df[self.email_column].apply(self._email_to_one_domain)
93
88
 
@@ -81,7 +81,8 @@ class FeaturesValidator:
81
81
  return [
82
82
  i
83
83
  for i in df
84
- if (is_string_dtype(df[i]) or is_integer_dtype(df[i])) and (df[i].nunique(dropna=False) / row_count >= 0.95)
84
+ if (is_object_dtype(df[i]) or is_string_dtype(df[i]) or is_integer_dtype(df[i]))
85
+ and (df[i].nunique(dropna=False) / row_count >= 0.85)
85
86
  ]
86
87
 
87
88
  @staticmethod
@@ -107,7 +107,7 @@ def balance_undersample(
107
107
  min_class_count = vc[min_class_value]
108
108
 
109
109
  min_class_percent = imbalance_threshold / target_classes_count
110
- min_class_threshold = min_class_percent * count
110
+ min_class_threshold = int(min_class_percent * count)
111
111
 
112
112
  resampled_data = df
113
113
  df = df.copy().sort_values(by=SYSTEM_RECORD_ID)
@@ -55,7 +55,7 @@ def _get_execution_ide() -> str:
55
55
  def get_track_metrics(client_ip: Optional[str] = None, client_visitorid: Optional[str] = None) -> dict:
56
56
  # default values
57
57
  track = {"ide": _get_execution_ide()}
58
- ident_res = "https://api.ipify.org"
58
+ ident_res = "https://api64.ipify.org"
59
59
 
60
60
  try:
61
61
  track["hostname"] = socket.gethostname()
@@ -74,17 +74,20 @@ def get_track_metrics(client_ip: Optional[str] = None, client_visitorid: Optiona
74
74
  display(
75
75
  Javascript(
76
76
  """
77
- import('https://upgini.github.io/upgini/js/a.js')
77
+ async function getVisitorId() {
78
+ return import('https://upgini.github.io/upgini/js/a.js')
78
79
  .then(FingerprintJS => FingerprintJS.load())
79
80
  .then(fp => fp.get())
80
- .then(result => window.visitorId = result.visitorId);
81
+ .then(result => result.visitorId);
82
+ }
81
83
  """
82
84
  )
83
85
  )
84
- track["visitorId"] = output.eval_js("window.visitorId", timeout_sec=10)
86
+ track["visitorId"] = output.eval_js("getVisitorId()", timeout_sec=30)
85
87
  except Exception as e:
86
88
  track["err"] = str(e)
87
- track["visitorId"] = "None"
89
+ if "visitorId" not in track:
90
+ track["visitorId"] = "None"
88
91
  if client_ip:
89
92
  track["ip"] = client_ip
90
93
  else:
@@ -95,16 +98,19 @@ def get_track_metrics(client_ip: Optional[str] = None, client_visitorid: Optiona
95
98
  display(
96
99
  Javascript(
97
100
  f"""
98
- fetch("{ident_res}")
101
+ async function getIP() {{
102
+ return fetch("{ident_res}")
99
103
  .then(response => response.text())
100
- .then(data => window.clientIP = data);
104
+ .then(data => data);
105
+ }}
101
106
  """
102
107
  )
103
108
  )
104
- track["ip"] = output.eval_js("window.clientIP", timeout_sec=10)
109
+ track["ip"] = output.eval_js("getIP()", timeout_sec=10)
105
110
  except Exception as e:
106
111
  track["err"] = str(e)
107
- track["ip"] = "0.0.0.0"
112
+ if "ip" not in track:
113
+ track["ip"] = "0.0.0.0"
108
114
 
109
115
  elif track["ide"] == "binder":
110
116
  try:
@@ -116,8 +122,10 @@ def get_track_metrics(client_ip: Optional[str] = None, client_visitorid: Optiona
116
122
  track["visitorId"] = sha256(os.environ["CLIENT_IP"].encode()).hexdigest()
117
123
  except Exception as e:
118
124
  track["err"] = str(e)
119
- track["ip"] = "0.0.0.0"
120
- track["visitorId"] = "None"
125
+ if "ip" not in track:
126
+ track["ip"] = "0.0.0.0"
127
+ if "visitorId" not in track:
128
+ track["visitorId"] = "None"
121
129
 
122
130
  elif track["ide"] == "kaggle":
123
131
  try:
@@ -136,8 +144,8 @@ def get_track_metrics(client_ip: Optional[str] = None, client_visitorid: Optiona
136
144
  raise Exception(err)
137
145
  except Exception as e:
138
146
  track["err"] = str(e)
139
- track["ip"] = "0.0.0.0"
140
- track["visitorId"] = "None"
147
+ if "visitorId" not in track:
148
+ track["visitorId"] = "None"
141
149
  else:
142
150
  try:
143
151
  if client_ip:
@@ -150,5 +158,9 @@ def get_track_metrics(client_ip: Optional[str] = None, client_visitorid: Optiona
150
158
  track["visitorId"] = sha256(str(getnode()).encode()).hexdigest()
151
159
  except Exception as e:
152
160
  track["err"] = str(e)
161
+ if "visitorId" not in track:
162
+ track["visitorId"] = "None"
163
+ if "ip" not in track:
164
+ track["ip"] = "0.0.0.0"
153
165
 
154
166
  return track
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: upgini
3
- Version: 1.1.275a1
3
+ Version: 1.1.276
4
4
  Summary: Intelligent data search & enrichment for Machine Learning
5
5
  Home-page: https://upgini.com/
6
6
  Author: Upgini Developers
@@ -28,7 +28,7 @@ Description-Content-Type: text/markdown
28
28
  License-File: LICENSE
29
29
  Requires-Dist: python-dateutil >=2.8.0
30
30
  Requires-Dist: requests >=2.8.0
31
- Requires-Dist: pandas <2.0.0,>=1.1.0
31
+ Requires-Dist: pandas <3.0.0,>=1.1.0
32
32
  Requires-Dist: numpy >=1.19.0
33
33
  Requires-Dist: scikit-learn >=1.3.0
34
34
  Requires-Dist: pydantic <2.0.0,>=1.8.2
@@ -1,12 +1,12 @@
1
1
  upgini/__init__.py,sha256=asENHgEVHQBIkV-e_0IhE_ZWqkCG6398U3ZLrNzAH6k,407
2
- upgini/ads.py,sha256=mre6xn44wcC_fg63iLT_kTh4mViZqR9AKRJZAtpQz8Y,2592
3
- upgini/dataset.py,sha256=g10BnbayclZMno9mAabpz_Zu0iyMiW0f_jOwt_xJr8U,45947
2
+ upgini/ads.py,sha256=nvuRxRx5MHDMgPr9SiU-fsqRdFaBv8p4_v1oqiysKpc,2714
3
+ upgini/dataset.py,sha256=HwL2syoMf3F9k9SmsJJMhhqnAddZcx28RZ1aYam7Lhs,45665
4
4
  upgini/errors.py,sha256=pdzQl3MKuK52yvncxMWMRWeSIOGhUFzpQoszoRFBOk0,958
5
- upgini/features_enricher.py,sha256=CgUBRCPW_itgBfaup3Tg_yfPYMbQpufoOqu4yYvn6VU,179316
5
+ upgini/features_enricher.py,sha256=ys7RQoZsyY8-NkUZyp12K8z5aQmg7pyx0LtwclFtXkc,176358
6
6
  upgini/fingerprint.js,sha256=VygVIQlN1v4NGZfjHqtRogOw8zjTnnMNJg_f7M5iGQU,33442
7
7
  upgini/http.py,sha256=zaO86LBBLmkieGbgYifk29eVoPCxXimZQ8YkQtKcM0I,42244
8
- upgini/metadata.py,sha256=FFwTnoMxdJ-7oKXbRgght1yk7e2u90WpeqljKDWUj18,10106
9
- upgini/metrics.py,sha256=VmxVc-plbRPZ1U3Ve3E-FZkhYqi0X2r7x8H5L-shux4,29058
8
+ upgini/metadata.py,sha256=fwVxtkR6Mn4iRoOqV6BfMJvJrx65I3YwZUMbZjhPyOI,9673
9
+ upgini/metrics.py,sha256=tGzdn0jgup86OlH_GS4eoza8ZJZ9wgaJr7SaX3Upwzo,29652
10
10
  upgini/search_task.py,sha256=tmJ17WUxv3J5NWrYUJB_NKdZ792Ifz8Z8UnDXeQnpss,17077
11
11
  upgini/spinner.py,sha256=Dm1dQ5F_z_Ua2odLxZX7OypcOX9tSx_vE5MGaKtUmfw,1118
12
12
  upgini/version_validator.py,sha256=rDIncP6BEko4J2F2hUcMOtKm_vZbI4ICWcNcw8hrwM4,1400
@@ -15,49 +15,49 @@ upgini/ads_management/ads_manager.py,sha256=fP4Yqx3h2Snw5X335TbXEwFoupq1RYsE7y0P
15
15
  upgini/autofe/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
16
16
  upgini/autofe/all_operands.py,sha256=H66wqVLD-H9k8A4-q2wslhV9QaNxlb49f8YiT0Xfkps,2356
17
17
  upgini/autofe/binary.py,sha256=f8LQqZi9zyaMUAv-jASMmWNA_vT05ncYCjZq0qx3USs,3972
18
- upgini/autofe/date.py,sha256=cc0GMAJR0QZOI_Qp2V5UDklaXLNS_79O1GhU6GlOYzg,3895
18
+ upgini/autofe/date.py,sha256=408p8P2OTPM2D3LsEGGtaiCepKGgM1BbOCQNRzAmI6c,4223
19
19
  upgini/autofe/feature.py,sha256=2FQRGtIumNz60hFAjfLReaY18SI7HxzYZOoC5avzSjQ,11847
20
20
  upgini/autofe/groupby.py,sha256=iXRfOmOc84ooSzRhsh9GmmG7rTafX0-ekXko8s9Qs68,3089
21
21
  upgini/autofe/operand.py,sha256=dhtToPDGWtP_0u_RjayUpezJJZAgq_TzNbPH0bI9OXI,2805
22
22
  upgini/autofe/unary.py,sha256=YRTzQLttbDdOnkogWBPnBexpu7uHWSLSFAxSCu3iFdY,3145
23
23
  upgini/autofe/vector.py,sha256=5qhI_bdwaWM1l7fgCkx1tMt9R9gxWzoYCl-7WO4KiOs,604
24
24
  upgini/data_source/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
25
- upgini/data_source/data_source_publisher.py,sha256=J2lrpPuysUHPeqTSfoybBtPRTBCFu7R5KzaakhjaRDc,16485
25
+ upgini/data_source/data_source_publisher.py,sha256=taRzyGgrPrTTSGw4Y-Ca5k4bf30aiTa68rxqT9zfqeI,16478
26
26
  upgini/mdc/__init__.py,sha256=ETDh3JKbrDdPMOECiYLAa8lvKYe68mv4IY6fZa9FimA,1126
27
27
  upgini/mdc/context.py,sha256=Sl1S_InKlzzRxYqwJ2k24lawJdCKWgGJ-RIRfvzWJrk,1468
28
28
  upgini/normalizer/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
29
- upgini/normalizer/phone_normalizer.py,sha256=lhwsPEnfyjeIsndW2EcQGZksXYsfxaQ1ghAzVYoDRKM,9927
29
+ upgini/normalizer/phone_normalizer.py,sha256=_SYMX4GTgwzRXArK54Jp3vUBE5d4jZxSVyze-0tqzg0,9996
30
30
  upgini/resource_bundle/__init__.py,sha256=hdvbqL0b0xMWbY6-kiYGsW1ro2GMiWpxxsO9uCv-h9Q,8379
31
31
  upgini/resource_bundle/exceptions.py,sha256=5fRvx0_vWdE1-7HcSgF0tckB4A9AKyf5RiinZkInTsI,621
32
- upgini/resource_bundle/strings.properties,sha256=AK5xktWWYa0smEa_ZVT7BFlXPSx7M_NTMIfXhgsnE2Y,26177
32
+ upgini/resource_bundle/strings.properties,sha256=1O779a0-Ai0j7W-Z5AznvjuV69YkJvgGhJda-6VMLOQ,26287
33
33
  upgini/resource_bundle/strings_widget.properties,sha256=gOdqvZWntP2LCza_tyVk1_yRYcG4c04K9sQOAVhF_gw,1577
34
34
  upgini/sampler/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
35
35
  upgini/sampler/base.py,sha256=CC-DvPbrN7zp5--SVFuUqkVmdWM_5F7R0Do98ETV82U,6421
36
36
  upgini/sampler/random_under_sampler.py,sha256=XU4c2swPIFxVXHOPpxgM2bUao0Xm-aoMmd6fKjIuV5s,4068
37
37
  upgini/sampler/utils.py,sha256=PYOk3kKSnFlyxcpdtDNLBEEhTB4lO_iP7pQHqeUcmAc,20211
38
- upgini/utils/__init__.py,sha256=dQ4-s8-sZ5eOBZ-mH3gEwDHTdI0wI1bUAVgVqUKKPx4,786
39
- upgini/utils/base_search_key_detector.py,sha256=VvEdamjJT1wypsH6NAfOkPp7dHo7nxhl7LhwX7Z9N5w,1025
38
+ upgini/utils/__init__.py,sha256=YVum3lRKpyfqoJy_7HJyU6SmIgbmG8QLkHIpibE_ud8,842
39
+ upgini/utils/base_search_key_detector.py,sha256=DGwhXLvc8i5VZWMDr0rncFfV5GEHdsCSnLGon_W9TPs,859
40
40
  upgini/utils/blocked_time_series.py,sha256=dMz5ewk3PsoeOrc3lDzInCVPS9u_2XQkV0W6PuMMjPg,3380
41
- upgini/utils/country_utils.py,sha256=1KXhLSNqkNYVL3on8-zK0Arc_SspUH7AMZvGZICysOU,6462
41
+ upgini/utils/country_utils.py,sha256=pV8TBURthYqwSOfH1lxfYc2blm3OvfLFCMvRv8rKTp4,6511
42
42
  upgini/utils/custom_loss_utils.py,sha256=DBslpjWGPt7xTeypt78baR59012SYphbPsO_YLKdilo,3972
43
43
  upgini/utils/cv_utils.py,sha256=Tn01RJvpZGZh0PUQUimlBkV-AXwe7s6yjCNFtw352Uc,3525
44
- upgini/utils/datetime_utils.py,sha256=4ii5WphAHlb_NRmdJx35VZpTarJbAr-AnDw3XSzUSow,10346
45
- upgini/utils/deduplicate_utils.py,sha256=Zvs7zW4QzaERQmJNPrTVf2ZTVBkBLOycFCzyMwtXuV8,8770
44
+ upgini/utils/datetime_utils.py,sha256=_mfhWb5ogEThvanQ-py1Lb6VvUvF2vT20tQgNprNz6o,10321
45
+ upgini/utils/deduplicate_utils.py,sha256=6AbARehUCghJZ4PppFtrej2s3gFRruh41MEm6mzakHs,8607
46
46
  upgini/utils/display_utils.py,sha256=LKoSwjrE0xgS5_cqVhc2og2CQ1UCZ1nTI2VKboIhoQA,10858
47
- upgini/utils/email_utils.py,sha256=0EPCxMU-huzTgb_vySiAQ8tmSUhS31Mz2BpaHGwwYO4,3772
47
+ upgini/utils/email_utils.py,sha256=R9bVOfbS-oVkA8PdwZfQBxm7B4mQlRtkwqx2cf6zPCY,3520
48
48
  upgini/utils/fallback_progress_bar.py,sha256=cdbd1XGcWm4Ed4eAqV2_St3z7uC_kkH22gEyrN5ub6M,1090
49
- upgini/utils/features_validator.py,sha256=P-dfjBLAMxgzOcUX1Jo1bhVp8-8WyTyF3Ef0YZ5nfRI,3269
49
+ upgini/utils/features_validator.py,sha256=PgKNt5dyqfErTvjtRNNUS9g7GFqHBtAtnsfA-V5UO1A,3307
50
50
  upgini/utils/format.py,sha256=Yv5cvvSs2bOLUzzNu96Pu33VMDNbabio92QepUj41jU,243
51
51
  upgini/utils/ip_utils.py,sha256=Zf3F2cnQmOCH09QLQHetpjMFu1PnD0cTmDymn0SnSy8,1672
52
52
  upgini/utils/phone_utils.py,sha256=JNSkF8G6mgsN8Czy11pamaJdsY6rBINEMpi7jbVt_RA,408
53
53
  upgini/utils/postal_code_utils.py,sha256=_8CR9tBqsPptQsmMUvnrCAmBaMIQSWH3JfJ4ly3x_zs,409
54
54
  upgini/utils/progress_bar.py,sha256=iNXyqT3vKCeHpfiG5HHwr7Lk2cTtKViM93Fl8iZnjGc,1564
55
55
  upgini/utils/sklearn_ext.py,sha256=e1aMNXk1zUt7uFnl0FcUF0zOnaXSE7z5xBHmJPknUVs,44014
56
- upgini/utils/target_utils.py,sha256=9K67tkY7LWhQMO-vbbPqBaO-KriAmg_6fVz5RQRaLQc,7802
57
- upgini/utils/track_info.py,sha256=EPcJ13Jqa17_T0JjM37Ac9kWDz5Zk0GVsIZKutOb8aU,5207
56
+ upgini/utils/target_utils.py,sha256=Y96_PJ5cC-WsEbeqg20v9uqywDQobLoTb-xoP7S3o4E,7807
57
+ upgini/utils/track_info.py,sha256=p8gmuHhLamZF5JG7K9DeK-PcytQhlFCR29lyRr-wq_U,5665
58
58
  upgini/utils/warning_counter.py,sha256=dIWBB4dI5XRRJZudvIlqlIYKEiwLLPcXarsZuYRt338,227
59
- upgini-1.1.275a1.dist-info/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
60
- upgini-1.1.275a1.dist-info/METADATA,sha256=ocZUhdmjsYXKoCXt0W3M4gfPGQ8UlFtQlYIjdD_6_w0,48158
61
- upgini-1.1.275a1.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
62
- upgini-1.1.275a1.dist-info/top_level.txt,sha256=OFhTGiDIWKl5gFI49qvWq1R9IKflPaE2PekcbDXDtx4,7
63
- upgini-1.1.275a1.dist-info/RECORD,,
59
+ upgini-1.1.276.dist-info/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
60
+ upgini-1.1.276.dist-info/METADATA,sha256=Dgb4UJ82UknhtKS9DHiGRu-a9i3LeoKZiVWpCzkJfF4,48156
61
+ upgini-1.1.276.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
62
+ upgini-1.1.276.dist-info/top_level.txt,sha256=OFhTGiDIWKl5gFI49qvWq1R9IKflPaE2PekcbDXDtx4,7
63
+ upgini-1.1.276.dist-info/RECORD,,