upgini 1.2.69__py3-none-any.whl → 1.2.70a3832.dev1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of upgini might be problematic. Click here for more details.

upgini/__about__.py CHANGED
@@ -1 +1 @@
1
- __version__ = "1.2.69"
1
+ __version__ = "1.2.70a3832.dev1"
upgini/dataset.py CHANGED
@@ -388,7 +388,7 @@ class Dataset: # (pd.DataFrame):
388
388
  for col in columns_to_validate:
389
389
  self.data[f"{col}_is_valid"] = ~self.data[col].isnull()
390
390
  if validate_target and target is not None and col == target:
391
- self.data.loc[self.data[target] == np.Inf, f"{col}_is_valid"] = False
391
+ self.data.loc[self.data[target] == np.inf, f"{col}_is_valid"] = False
392
392
 
393
393
  if col in mandatory_columns:
394
394
  self.data["valid_mandatory"] = self.data["valid_mandatory"] & self.data[f"{col}_is_valid"]
@@ -4075,7 +4075,10 @@ if response.status_code == 200:
4075
4075
  )
4076
4076
 
4077
4077
  if all(k == SearchKey.CUSTOM_KEY for k in valid_search_keys.values()):
4078
- msg = self.bundle.get("unregistered_only_personal_keys")
4078
+ if self.__is_registered:
4079
+ msg = self.bundle.get("only_custom_keys")
4080
+ else:
4081
+ msg = self.bundle.get("unregistered_only_personal_keys")
4079
4082
  self.logger.warning(msg + f" Provided search keys: {search_keys}")
4080
4083
  raise ValidationError(msg)
4081
4084
 
upgini/http.py CHANGED
@@ -16,6 +16,7 @@ from typing import Any, Dict, List, Optional, Tuple
16
16
  from urllib.parse import urljoin
17
17
 
18
18
  import jwt
19
+
19
20
  # import pandas as pd
20
21
  import requests
21
22
  from pydantic import BaseModel
@@ -342,7 +343,9 @@ class _RestClient:
342
343
  else:
343
344
  return self._syncronized_refresh_access_token()
344
345
 
345
- def _with_unauth_retry(self, request, try_number: int = 0, need_connection_retry: bool = True):
346
+ def _with_unauth_retry(
347
+ self, request, try_number: int = 0, need_connection_retry: bool = True, silent: bool = False
348
+ ):
346
349
  try:
347
350
  return request()
348
351
  except RequestException as e:
@@ -373,8 +376,9 @@ class _RestClient:
373
376
  elif "more than one concurrent search request" in e.message.lower():
374
377
  raise ValidationError(bundle.get("concurrent_request"))
375
378
  else:
376
- print(e)
377
- show_status_error()
379
+ if not silent:
380
+ print(e)
381
+ show_status_error()
378
382
  raise e
379
383
 
380
384
  @staticmethod
@@ -706,6 +710,7 @@ class _RestClient:
706
710
  silent=True,
707
711
  ),
708
712
  need_connection_retry=False,
713
+ silent=True,
709
714
  )
710
715
  except Exception:
711
716
  self.send_log_event_unauth(log_event)
@@ -716,7 +721,7 @@ class _RestClient:
716
721
  try:
717
722
  requests.post(
718
723
  url=urljoin(_RestClient.PROD_BACKEND_URL, api_path),
719
- json=log_event.dict(exclude_none=True),
724
+ json=log_event.model_dump(exclude_none=True),
720
725
  headers=_RestClient._get_base_headers(content_type="application/json"),
721
726
  )
722
727
  except Exception:
upgini/metrics.py CHANGED
@@ -1,17 +1,17 @@
1
1
  from __future__ import annotations
2
2
 
3
- from dataclasses import dataclass
4
3
  import inspect
5
4
  import logging
6
5
  import re
6
+ import warnings
7
7
  from collections import defaultdict
8
8
  from copy import deepcopy
9
+ from dataclasses import dataclass
9
10
  from typing import Any, Callable, Dict, List, Optional, Tuple, Union
10
11
 
11
- import catboost
12
12
  import numpy as np
13
13
  import pandas as pd
14
- from catboost import CatBoost, CatBoostClassifier, CatBoostRegressor, Pool
14
+ from lightgbm import LGBMClassifier, LGBMRegressor
15
15
  from numpy import log1p
16
16
  from pandas.api.types import is_numeric_dtype
17
17
  from sklearn.metrics import check_scoring, get_scorer, make_scorer, roc_auc_score
@@ -27,11 +27,8 @@ except ImportError:
27
27
  from sklearn.metrics._scorer import SCORERS
28
28
 
29
29
  available_scorers = SCORERS
30
- from sklearn.metrics._regression import (
31
- _check_reg_targets,
32
- check_consistent_length,
33
- )
34
30
  from sklearn.metrics import mean_squared_error
31
+ from sklearn.metrics._regression import _check_reg_targets, check_consistent_length
35
32
  from sklearn.model_selection import BaseCrossValidator
36
33
 
37
34
  from upgini.errors import ValidationError
@@ -88,11 +85,73 @@ CATBOOST_MULTICLASS_PARAMS = {
88
85
 
89
86
  LIGHTGBM_PARAMS = {
90
87
  "random_state": DEFAULT_RANDOM_STATE,
91
- "num_leaves": 16,
88
+ # "num_leaves": 16,
89
+ # "n_estimators": 150,
90
+ # "min_child_weight": 1,
92
91
  "max_depth": 4,
93
- "n_estimators": 150,
92
+ "max_cat_threshold": 80,
93
+ "min_data_per_group": 25,
94
+ "num_boost_round": 150,
95
+ "cat_l2": 10,
96
+ "cat_smooth": 12,
97
+ "learning_rate": 0.05,
98
+ "feature_fraction": 1.0,
99
+ "min_sum_hessian_in_leaf": 0.01,
100
+ }
101
+
102
+ LIGHTGBM_REGRESSION_PARAMS = {
103
+ "random_state": DEFAULT_RANDOM_STATE,
104
+ "deterministic": True,
105
+ "min_gain_to_split": 0.001,
106
+ "n_estimators": 275,
107
+ "max_depth": 5,
108
+ "max_cat_threshold": 80,
109
+ "min_data_per_group": 25,
110
+ "cat_l2": 10,
111
+ "cat_smooth": 12,
94
112
  "learning_rate": 0.05,
95
- "min_child_weight": 1,
113
+ "feature_fraction": 1.0,
114
+ "min_sum_hessian_in_leaf": 0.01,
115
+ "objective": "huber",
116
+ "verbosity": -1,
117
+ }
118
+
119
+ LIGHTGBM_MULTICLASS_PARAMS = {
120
+ "random_state": DEFAULT_RANDOM_STATE,
121
+ "deterministic": True,
122
+ "min_gain_to_split": 0.001,
123
+ "n_estimators": 275,
124
+ "max_depth": 3,
125
+ "max_cat_threshold": 80,
126
+ "min_data_per_group": 25,
127
+ "cat_l2": 10,
128
+ "cat_smooth": 12,
129
+ "learning_rate": 0.25, # CatBoost 0.25
130
+ "min_sum_hessian_in_leaf": 0.01,
131
+ "class_weight": "balanced", # TODO pass dict with weights for each class
132
+ "objective": "multiclass",
133
+ "use_quantized_grad": "true",
134
+ "num_grad_quant_bins": "8",
135
+ "stochastic_rounding": "true",
136
+ "verbosity": -1,
137
+ }
138
+
139
+ LIGHTGBM_BINARY_PARAMS = {
140
+ "random_state": DEFAULT_RANDOM_STATE,
141
+ "deterministic": True,
142
+ "min_gain_to_split": 0.001,
143
+ "n_estimators": 275,
144
+ "max_depth": 5,
145
+ "max_cat_threshold": 80,
146
+ "min_data_per_group": 25,
147
+ "cat_l2": 10,
148
+ "cat_smooth": 12,
149
+ "learning_rate": 0.05,
150
+ "feature_fraction": 1.0,
151
+ "min_sum_hessian_in_leaf": 0.01,
152
+ "objective": "binary",
153
+ "class_weight": "balanced", # TODO pass dict with weights for each class
154
+ "verbosity": -1,
96
155
  }
97
156
 
98
157
  N_FOLDS = 5
@@ -211,6 +270,15 @@ SUPPORTED_CATBOOST_METRICS = {
211
270
  }
212
271
 
213
272
 
273
+ def is_catboost_estimator(estimator):
274
+ try:
275
+ from catboost import CatBoostClassifier, CatBoostRegressor
276
+
277
+ return isinstance(estimator, (CatBoostClassifier, CatBoostRegressor))
278
+ except ImportError:
279
+ return False
280
+
281
+
214
282
  @dataclass
215
283
  class _CrossValResults:
216
284
  metric: Optional[float]
@@ -292,7 +360,7 @@ class EstimatorWrapper:
292
360
  self.logger.info(f"After preparing data columns: {x.columns.to_list()}")
293
361
  return x, y, groups
294
362
 
295
- def _remove_empty_target_rows(self, x: pd.DataFrame, y: pd.Series) -> Tuple[pd.DataFrame, pd.Series]:
363
+ def _remove_empty_target_rows(self, x: pd.DataFrame, y: pd.Series) -> Tuple[pd.DataFrame, np.ndarray]:
296
364
  joined = pd.concat([x, y], axis=1)
297
365
  joined = joined[joined[y.name].notna()]
298
366
  joined = joined.reset_index(drop=True)
@@ -346,12 +414,15 @@ class EstimatorWrapper:
346
414
  for estimator, split in zip(self.cv_estimators, splits):
347
415
  _, validation_idx = split
348
416
  cv_x = x.iloc[validation_idx]
349
- cv_y = y[validation_idx]
417
+ if isinstance(y, pd.Series):
418
+ cv_y = y.iloc[validation_idx]
419
+ else:
420
+ cv_y = y[validation_idx]
350
421
  shaps = self.calculate_shap(cv_x, cv_y, estimator)
351
422
  if shaps is not None:
352
423
  for feature, shap_value in shaps.items():
353
424
  # shap_values_all_folds[feature] = shap_values_all_folds.get(feature, []) + shap_value.tolist()
354
- shap_values_all_folds[feature].extend(shap_value.tolist())
425
+ shap_values_all_folds[feature].append(shap_value)
355
426
 
356
427
  if shap_values_all_folds:
357
428
  average_shap_values = {
@@ -427,21 +498,18 @@ class EstimatorWrapper:
427
498
  }
428
499
  if estimator is None:
429
500
  params = {}
430
- params["has_time"] = has_date
431
- # if metric_name.upper() in SUPPORTED_CATBOOST_METRICS:
432
- # params["eval_metric"] = SUPPORTED_CATBOOST_METRICS[metric_name.upper()]
433
501
  if target_type == ModelTaskType.MULTICLASS:
434
- params = _get_add_params(params, CATBOOST_MULTICLASS_PARAMS)
502
+ params = _get_add_params(params, LIGHTGBM_MULTICLASS_PARAMS)
435
503
  params = _get_add_params(params, add_params)
436
- estimator = CatBoostWrapper(CatBoostClassifier(**params), **kwargs)
504
+ estimator = LightGBMWrapper(LGBMClassifier(**params), **kwargs)
437
505
  elif target_type == ModelTaskType.BINARY:
438
- params = _get_add_params(params, CATBOOST_BINARY_PARAMS)
506
+ params = _get_add_params(params, LIGHTGBM_BINARY_PARAMS)
439
507
  params = _get_add_params(params, add_params)
440
- estimator = CatBoostWrapper(CatBoostClassifier(**params), **kwargs)
508
+ estimator = LightGBMWrapper(LGBMClassifier(**params), **kwargs)
441
509
  elif target_type == ModelTaskType.REGRESSION:
442
- params = _get_add_params(params, CATBOOST_REGRESSION_PARAMS)
510
+ params = _get_add_params(params, LIGHTGBM_REGRESSION_PARAMS)
443
511
  params = _get_add_params(params, add_params)
444
- estimator = CatBoostWrapper(CatBoostRegressor(**params), **kwargs)
512
+ estimator = LightGBMWrapper(LGBMRegressor(**params), **kwargs)
445
513
  else:
446
514
  raise Exception(bundle.get("metrics_unsupported_target_type").format(target_type))
447
515
  else:
@@ -450,31 +518,21 @@ class EstimatorWrapper:
450
518
  else:
451
519
  estimator_copy = deepcopy(estimator)
452
520
  kwargs["estimator"] = estimator_copy
453
- if isinstance(estimator, (CatBoostClassifier, CatBoostRegressor)):
521
+ if is_catboost_estimator(estimator):
454
522
  if cat_features is not None:
455
523
  for cat_feature in cat_features:
456
524
  if cat_feature not in x.columns:
457
525
  logger.error(
458
526
  f"Client cat_feature `{cat_feature}` not found in x columns: {x.columns.to_list()}"
459
527
  )
460
- estimator_copy.set_params(
461
- # cat_features=[x.columns.get_loc(cat_feature) for cat_feature in cat_features]
462
- cat_features=cat_features
463
- )
528
+ estimator_copy.set_params(cat_features=cat_features, has_time=has_date)
464
529
  estimator = CatBoostWrapper(**kwargs)
465
530
  else:
466
- try:
467
- from lightgbm import LGBMClassifier, LGBMRegressor
468
-
469
- if isinstance(estimator, (LGBMClassifier, LGBMRegressor)):
470
- estimator = LightGBMWrapper(**kwargs)
471
- else:
472
- logger.warning(
473
- f"Unexpected estimator is used for metrics: {estimator}. "
474
- "Default strategy for category features will be used"
475
- )
476
- estimator = OtherEstimatorWrapper(**kwargs)
477
- except ModuleNotFoundError:
531
+ if isinstance(estimator, (LGBMClassifier, LGBMRegressor)):
532
+ estimator = LightGBMWrapper(**kwargs)
533
+ elif is_catboost_estimator(estimator):
534
+ estimator = CatBoostWrapper(**kwargs)
535
+ else:
478
536
  logger.warning(
479
537
  f"Unexpected estimator is used for metrics: {estimator}. "
480
538
  "Default strategy for category features will be used"
@@ -487,7 +545,7 @@ class EstimatorWrapper:
487
545
  class CatBoostWrapper(EstimatorWrapper):
488
546
  def __init__(
489
547
  self,
490
- estimator: Union[CatBoostClassifier, CatBoostRegressor],
548
+ estimator,
491
549
  scorer: Callable,
492
550
  metric_name: str,
493
551
  multiplier: int,
@@ -517,6 +575,9 @@ class CatBoostWrapper(EstimatorWrapper):
517
575
  x, y, groups, params = super()._prepare_to_fit(x, y)
518
576
 
519
577
  # Find embeddings
578
+ import catboost
579
+ from catboost import CatBoostClassifier
580
+
520
581
  if hasattr(CatBoostClassifier, "get_embedding_feature_indices"):
521
582
  emb_pattern = r"(.+)_emb\d+"
522
583
  self.emb_features = [c for c in x.columns if re.match(emb_pattern, c) and is_numeric_dtype(x[c])]
@@ -637,8 +698,10 @@ class CatBoostWrapper(EstimatorWrapper):
637
698
  else:
638
699
  raise e
639
700
 
640
- def calculate_shap(self, x: pd.DataFrame, y: pd.Series, estimator: CatBoost) -> Optional[Dict[str, float]]:
701
+ def calculate_shap(self, x: pd.DataFrame, y: pd.Series, estimator) -> Optional[Dict[str, float]]:
641
702
  try:
703
+ from catboost import Pool
704
+
642
705
  # Create Pool for fold data, if need (for example, when categorical features are present)
643
706
  fold_pool = Pool(
644
707
  x,
@@ -695,25 +758,59 @@ class LightGBMWrapper(EstimatorWrapper):
695
758
  self.cat_features = None
696
759
 
697
760
  def _prepare_to_fit(self, x: pd.DataFrame, y: pd.Series) -> Tuple[pd.DataFrame, pd.Series, np.ndarray, dict]:
698
- x, y, groups, params = super()._prepare_to_fit(x, y)
761
+ x, y_numpy, groups, params = super()._prepare_to_fit(x, y)
699
762
  self.cat_features = _get_cat_features(x)
700
763
  x = fill_na_cat_features(x, self.cat_features)
701
764
  for feature in self.cat_features:
702
765
  x[feature] = x[feature].astype("category").cat.codes
703
- if not is_numeric_dtype(y):
704
- y = correct_string_target(y)
766
+ if not is_numeric_dtype(y_numpy):
767
+ y_numpy = correct_string_target(y_numpy)
705
768
 
706
- return x, y, groups, params
769
+ return x, y_numpy, groups, params
707
770
 
708
771
  def _prepare_to_calculate(self, x: pd.DataFrame, y: pd.Series) -> Tuple[pd.DataFrame, np.ndarray, dict]:
709
- x, y, params = super()._prepare_to_calculate(x, y)
772
+ x, y_numpy, params = super()._prepare_to_calculate(x, y)
710
773
  if self.cat_features is not None:
711
774
  x = fill_na_cat_features(x, self.cat_features)
712
775
  for feature in self.cat_features:
713
776
  x[feature] = x[feature].astype("category").cat.codes
714
777
  if not is_numeric_dtype(y):
715
- y = correct_string_target(y)
716
- return x, y, params
778
+ y_numpy = correct_string_target(y_numpy)
779
+ return x, y_numpy, params
780
+
781
+ def calculate_shap(self, x: pd.DataFrame, y: pd.Series, estimator) -> Optional[Dict[str, float]]:
782
+ try:
783
+ # Suppress specific warning from SHAP for LightGBM binary classifier
784
+ warnings.filterwarnings(
785
+ "ignore",
786
+ message=(
787
+ "LightGBM binary classifier with TreeExplainer shap values output has changed to a list of ndarray"
788
+ ),
789
+ )
790
+ from shap import TreeExplainer
791
+
792
+ if not isinstance(estimator, (LGBMRegressor, LGBMClassifier)):
793
+ return None
794
+
795
+ explainer = TreeExplainer(estimator)
796
+
797
+ shap_values = explainer.shap_values(x)
798
+
799
+ # For classification, shap_values is returned as a list for each class
800
+ # Take values for the positive class
801
+ if isinstance(shap_values, list):
802
+ shap_values = shap_values[1]
803
+
804
+ # Calculate mean absolute SHAP value for each feature
805
+ feature_importance = {}
806
+ for i, col in enumerate(x.columns):
807
+ feature_importance[col] = np.mean(np.abs(shap_values[:, i]))
808
+
809
+ return feature_importance
810
+
811
+ except Exception as e:
812
+ self.logger.warning(f"Failed to calculate SHAP values: {str(e)}")
813
+ return None
717
814
 
718
815
 
719
816
  class OtherEstimatorWrapper(EstimatorWrapper):
@@ -80,6 +80,7 @@ email_and_hem_simultanious=EMAIL and HEM search keys cannot be used simultaneous
80
80
  postal_code_without_country=COUNTRY search key required if POSTAL_CODE is present
81
81
  multiple_search_key=Search key {} passed multiple times
82
82
  unregistered_only_personal_keys=Only personal search keys used. Api_key from profile.upgini.com required for EMAIL/HEM, PHONE NUMBER or IPv4/IPv6 search keys\nSee docs https://github.com/upgini/upgini#-open-up-all-capabilities-of-upgini
83
+ only_custom_keys=Only CUSTOM_KEY search keys were provided. At least one of DATE, COUNTRY, POSTAL_CODE, PHONE, EMAIL, HEM, IP should be provided
83
84
  search_key_not_found=Column `{}` from search_keys was not found in X dataframe: {}
84
85
  numeric_search_key_not_found=Index {} in search_keys is out of bounds for {} columns of X dataframe
85
86
  unsupported_search_key_type=Unsupported type of key in search_keys: {}
@@ -74,6 +74,8 @@ def remove_fintech_duplicates(
74
74
  # Checking for different dates by the same personal keys
75
75
  uniques = grouped_by_personal_cols[date_col].nunique()
76
76
  total = len(uniques)
77
+ if total == 0:
78
+ return segment_df, None
77
79
  diff_dates = len(uniques[uniques > 1])
78
80
  if diff_dates / total >= 0.6:
79
81
  return segment_df, None
@@ -90,7 +90,8 @@ class FeatureInfo:
90
90
  def _get_feature_sample(feature_meta: FeaturesMetadataV2, data: Optional[pd.DataFrame]) -> str:
91
91
  if data is not None and len(data) > 0 and feature_meta.name in data.columns:
92
92
  if len(data) > 3:
93
- feature_sample = np.random.choice(data[feature_meta.name].dropna().unique(), 3).tolist()
93
+ rand = np.random.RandomState(42)
94
+ feature_sample = rand.choice(data[feature_meta.name].dropna().unique(), 3).tolist()
94
95
  else:
95
96
  feature_sample = data[feature_meta.name].dropna().unique().tolist()
96
97
  if len(feature_sample) > 0 and isinstance(feature_sample[0], float):
@@ -9,7 +9,6 @@ from traceback import format_exc
9
9
 
10
10
  import numpy as np
11
11
  import scipy.sparse as sp
12
- from catboost import CatBoostClassifier, CatBoostRegressor
13
12
  from joblib import Parallel, logger
14
13
  from scipy.sparse import issparse
15
14
  from sklearn import config_context, get_config
@@ -342,6 +341,14 @@ def cross_validate(
342
341
  raise e
343
342
 
344
343
 
344
+ def is_catboost_estimator(estimator):
345
+ try:
346
+ from catboost import CatBoostClassifier, CatBoostRegressor
347
+ return isinstance(estimator, (CatBoostClassifier, CatBoostRegressor))
348
+ except ImportError:
349
+ return False
350
+
351
+
345
352
  def _fit_and_score(
346
353
  estimator,
347
354
  X,
@@ -497,7 +504,7 @@ def _fit_and_score(
497
504
  if y_train is None:
498
505
  estimator.fit(X_train, **fit_params)
499
506
  else:
500
- if isinstance(estimator, (CatBoostClassifier, CatBoostRegressor)):
507
+ if is_catboost_estimator(estimator):
501
508
  fit_params = fit_params.copy()
502
509
  fit_params["eval_set"] = [(X_test, y_test)]
503
510
  estimator.fit(X_train, y_train, **fit_params)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: upgini
3
- Version: 1.2.69
3
+ Version: 1.2.70a3832.dev1
4
4
  Summary: Intelligent data search & enrichment for Machine Learning
5
5
  Project-URL: Bug Reports, https://github.com/upgini/upgini/issues
6
6
  Project-URL: Homepage, https://upgini.com/
@@ -23,12 +23,12 @@ Classifier: Programming Language :: Python :: 3.10
23
23
  Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
24
24
  Classifier: Topic :: Scientific/Engineering :: Information Analysis
25
25
  Requires-Python: <3.12,>=3.8
26
- Requires-Dist: catboost>=1.0.3
27
26
  Requires-Dist: fastparquet>=0.8.1
28
27
  Requires-Dist: ipywidgets>=8.1.0
29
28
  Requires-Dist: jarowinkler>=2.0.0
30
29
  Requires-Dist: levenshtein>=0.25.1
31
- Requires-Dist: numpy<=1.26.4,>=1.19.0
30
+ Requires-Dist: lightgbm>=4.6.0
31
+ Requires-Dist: numpy<3.0.0,>=1.19.0
32
32
  Requires-Dist: pandas<3.0.0,>=1.1.0
33
33
  Requires-Dist: psutil>=6.0.0
34
34
  Requires-Dist: pydantic<3.0.0,>1.0.0
@@ -39,6 +39,7 @@ Requires-Dist: python-json-logger>=3.3.0
39
39
  Requires-Dist: requests>=2.8.0
40
40
  Requires-Dist: scikit-learn>=1.3.0
41
41
  Requires-Dist: scipy>=1.10.0
42
+ Requires-Dist: shap>=0.44.0
42
43
  Requires-Dist: xhtml2pdf<0.3.0,>=0.2.11
43
44
  Description-Content-Type: text/markdown
44
45
 
@@ -1,13 +1,13 @@
1
- upgini/__about__.py,sha256=b5n5Ah2b8KdU4qEsuokdYRRb9Cz2Tg3GOvmqydpG060,23
1
+ upgini/__about__.py,sha256=LyOOmtec0d_u1BUaK2G6WOED83CiI6FA8Qp2UNUzrLw,33
2
2
  upgini/__init__.py,sha256=LXSfTNU0HnlOkE69VCxkgIKDhWP-JFo_eBQ71OxTr5Y,261
3
3
  upgini/ads.py,sha256=nvuRxRx5MHDMgPr9SiU-fsqRdFaBv8p4_v1oqiysKpc,2714
4
- upgini/dataset.py,sha256=OGjpeFHbj3lWiZTOHTpWEoMMDmFY1FlNC44FKktoZvU,34956
4
+ upgini/dataset.py,sha256=1rb6BzyuiQFGVCTDmKL2wox3UFRNjtNaIJOwQnZ801A,34956
5
5
  upgini/errors.py,sha256=2b_Wbo0OYhLUbrZqdLIx5jBnAsiD1Mcenh-VjR4HCTw,950
6
- upgini/features_enricher.py,sha256=J5U6nprU-oEGUM54fS1W6daG6j4C2xYJE1lx3p6lcBc,205601
7
- upgini/http.py,sha256=ud0Cp7h0jNeHuuZGpU_1dAAEiabGoJjGxc1X5oeBQr4,43496
6
+ upgini/features_enricher.py,sha256=GwBizSoaI17Meg2bPHCz-o_U8i6-wRgrXv6ZpyJqSvk,205718
7
+ upgini/http.py,sha256=RvzcShpDXssLs6ycGN8xilkKi8ZV9XGUrrk8bwdUzbw,43607
8
8
  upgini/lazy_import.py,sha256=74gQ8JuA48BGRLxAo7lNHNKY2D2emMxrUxKGdxVGhuY,1012
9
9
  upgini/metadata.py,sha256=Jh6YTaS00m_nbaOY_owvlSyn9zgkErkqu8iTr9ZjKI8,12279
10
- upgini/metrics.py,sha256=t7uOOnlDYvP6E3DLjPMQcFBjyhJfUQY8aUlx7N0Mh-s,35477
10
+ upgini/metrics.py,sha256=I0sVJLNp4fiIq7ZFcUdNTxJjFkzStdFuKbnf2niEGjc,38207
11
11
  upgini/search_task.py,sha256=qxUxAD-bed-FpZYmTB_4orW7YJsW_O6a1TcgnZIRFr4,17307
12
12
  upgini/spinner.py,sha256=4iMd-eIe_BnkqFEMIliULTbj6rNI2HkN_VJ4qYe0cUc,1118
13
13
  upgini/version_validator.py,sha256=DvbaAvuYFoJqYt0fitpsk6Xcv-H1BYDJYHUMxaKSH_Y,1509
@@ -38,7 +38,7 @@ upgini/normalizer/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU
38
38
  upgini/normalizer/normalize_utils.py,sha256=Ft2MwSgVoBilXAORAOYAuwPD79GOLfwn4qQE3IUFzzg,7218
39
39
  upgini/resource_bundle/__init__.py,sha256=S5F2G47pnJd2LDpmFsjDqEwiKkP8Hm-hcseDbMka6Ko,8345
40
40
  upgini/resource_bundle/exceptions.py,sha256=5fRvx0_vWdE1-7HcSgF0tckB4A9AKyf5RiinZkInTsI,621
41
- upgini/resource_bundle/strings.properties,sha256=3zctRNQDJ1STTvLUfryBT72wYeHYnrllV4rG1C3HtfI,27542
41
+ upgini/resource_bundle/strings.properties,sha256=XU5ulr5ZDQfGbFk9QdFDzl3oDMaw0eDYCPoEq3ZvIkw,27687
42
42
  upgini/resource_bundle/strings_widget.properties,sha256=gOdqvZWntP2LCza_tyVk1_yRYcG4c04K9sQOAVhF_gw,1577
43
43
  upgini/sampler/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
44
44
  upgini/sampler/base.py,sha256=7GpjYqjOp58vYcJLiX__1R5wjUlyQbxvHJ2klFnup_M,6389
@@ -52,11 +52,11 @@ upgini/utils/country_utils.py,sha256=lY-eXWwFVegdVENFttbvLcgGDjFO17Sex8hd2PyJaRk
52
52
  upgini/utils/custom_loss_utils.py,sha256=kieNZYBYZm5ZGBltF1F_jOSF4ea6C29rYuCyiDcqVNY,3857
53
53
  upgini/utils/cv_utils.py,sha256=w6FQb9nO8BWDx88EF83NpjPLarK4eR4ia0Wg0kLBJC4,3525
54
54
  upgini/utils/datetime_utils.py,sha256=_jq-kn_dGNFfs-DGXcWCGzy9bkplfAjrZ8SsmN28zXc,13535
55
- upgini/utils/deduplicate_utils.py,sha256=SMZx9IKIhWI5HqXepfKiQb3uDJrogQZtG6jcWuMo5Z4,8855
55
+ upgini/utils/deduplicate_utils.py,sha256=AcMLoObMjhOTQ_fMS1LWy0GKp6WXnZ-FNux_8V3nbZU,8914
56
56
  upgini/utils/display_utils.py,sha256=DsBjJ8jEYAh8BPgfAbzq5imoGFV6IACP20PQ78BQCX0,11964
57
57
  upgini/utils/email_utils.py,sha256=pZ2vCfNxLIPUhxr0-OlABNXm12jjU44isBk8kGmqQzA,5277
58
58
  upgini/utils/fallback_progress_bar.py,sha256=PDaKb8dYpVZaWMroNcOHsTc3pSjgi9mOm0--cOFTwJ0,1074
59
- upgini/utils/feature_info.py,sha256=m1tQcT3hTChPAiXzpk0WQcEqElj8KgeCifEJFa7-gss,7247
59
+ upgini/utils/feature_info.py,sha256=Q9HN6A-fvfVD-irFWrmOqqZG9RsUSvh5MTY_k0xu-tE,7287
60
60
  upgini/utils/features_validator.py,sha256=lEfmk4DoxZ4ooOE1HC0ZXtUb_lFKRFHIrnFULZ4_rL8,3746
61
61
  upgini/utils/format.py,sha256=Yv5cvvSs2bOLUzzNu96Pu33VMDNbabio92QepUj41jU,243
62
62
  upgini/utils/ip_utils.py,sha256=TSQ_qDsLlVnm09X1HacpabEf_HNqSWpxBF4Sdc2xs08,6580
@@ -64,13 +64,13 @@ upgini/utils/mstats.py,sha256=dLJQr5Ak5BAoV-pDPpnfvMURZVkZ3_v250QzAsSlqY4,6286
64
64
  upgini/utils/phone_utils.py,sha256=IrbztLuOJBiePqqxllfABWfYlfAjYevPhXKipl95wUI,10432
65
65
  upgini/utils/postal_code_utils.py,sha256=5M0sUqH2DAr33kARWCTXR-ACyzWbjDq_-0mmEml6ZcU,1716
66
66
  upgini/utils/progress_bar.py,sha256=N-Sfdah2Hg8lXP_fV9EfUTXz_PyRt4lo9fAHoUDOoLc,1550
67
- upgini/utils/sklearn_ext.py,sha256=13jQS_k7v0aUtudXV6nGUEWjttPQzAW9AFYL5wgEz9k,44511
67
+ upgini/utils/sklearn_ext.py,sha256=E7zfYqBW597LetYXHxyM-i4f8luHsGIuP6mMJ2wtSMs,44661
68
68
  upgini/utils/sort.py,sha256=H79A17NMoHtLbqLCPFx_MBUloLZcDKjOba_H4gCE3t8,6965
69
69
  upgini/utils/target_utils.py,sha256=b1GzO8_gMcwXSZ2v98CY50MJJBzKbWHId_BJGybXfkM,16579
70
70
  upgini/utils/track_info.py,sha256=G5Lu1xxakg2_TQjKZk4b5SvrHsATTXNVV3NbvWtT8k8,5663
71
71
  upgini/utils/ts_utils.py,sha256=26vhC0pN7vLXK6R09EEkMK3Lwb9IVPH7LRdqFIQ3kPs,1383
72
72
  upgini/utils/warning_counter.py,sha256=-GRY8EUggEBKODPSuXAkHn9KnEQwAORC0mmz_tim-PM,254
73
- upgini-1.2.69.dist-info/METADATA,sha256=Z8doK3pmiKqcbPbXbG-JZwaqGwtAEsc6YJg8zfqb7cM,49113
74
- upgini-1.2.69.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
75
- upgini-1.2.69.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
76
- upgini-1.2.69.dist-info/RECORD,,
73
+ upgini-1.2.70a3832.dev1.dist-info/METADATA,sha256=-fTlW7hUtdND-FFrYtnchV4uoeo0mvQCqsYJrME-ros,49149
74
+ upgini-1.2.70a3832.dev1.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
75
+ upgini-1.2.70a3832.dev1.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
76
+ upgini-1.2.70a3832.dev1.dist-info/RECORD,,