upgini 1.2.68a3832.dev12__py3-none-any.whl → 1.2.69__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of upgini might be problematic. Click here for more details.

upgini/__about__.py CHANGED
@@ -1 +1 @@
1
- __version__ = "1.2.68a3832.dev12"
1
+ __version__ = "1.2.69"
upgini/dataset.py CHANGED
@@ -388,7 +388,7 @@ class Dataset: # (pd.DataFrame):
388
388
  for col in columns_to_validate:
389
389
  self.data[f"{col}_is_valid"] = ~self.data[col].isnull()
390
390
  if validate_target and target is not None and col == target:
391
- self.data.loc[self.data[target] == np.inf, f"{col}_is_valid"] = False
391
+ self.data.loc[self.data[target] == np.Inf, f"{col}_is_valid"] = False
392
392
 
393
393
  if col in mandatory_columns:
394
394
  self.data["valid_mandatory"] = self.data["valid_mandatory"] & self.data[f"{col}_is_valid"]
@@ -3845,6 +3845,11 @@ if response.status_code == 200:
3845
3845
  ):
3846
3846
  continue
3847
3847
 
3848
+ # Temporary workaround for duplicate features metadata
3849
+ if feature_meta.name in self.feature_names_:
3850
+ self.logger.warning(f"WARNING: Duplicate feature metadata: {feature_meta}")
3851
+ continue
3852
+
3848
3853
  self.feature_names_.append(feature_meta.name)
3849
3854
  self.feature_importances_.append(_round_shap_value(feature_meta.shap_value))
3850
3855
 
@@ -4070,10 +4075,7 @@ if response.status_code == 200:
4070
4075
  )
4071
4076
 
4072
4077
  if all(k == SearchKey.CUSTOM_KEY for k in valid_search_keys.values()):
4073
- if self.__is_registered:
4074
- msg = self.bundle.get("only_custom_keys")
4075
- else:
4076
- msg = self.bundle.get("unregistered_only_personal_keys")
4078
+ msg = self.bundle.get("unregistered_only_personal_keys")
4077
4079
  self.logger.warning(msg + f" Provided search keys: {search_keys}")
4078
4080
  raise ValidationError(msg)
4079
4081
 
upgini/http.py CHANGED
@@ -16,7 +16,6 @@ from typing import Any, Dict, List, Optional, Tuple
16
16
  from urllib.parse import urljoin
17
17
 
18
18
  import jwt
19
-
20
19
  # import pandas as pd
21
20
  import requests
22
21
  from pydantic import BaseModel
@@ -343,9 +342,7 @@ class _RestClient:
343
342
  else:
344
343
  return self._syncronized_refresh_access_token()
345
344
 
346
- def _with_unauth_retry(
347
- self, request, try_number: int = 0, need_connection_retry: bool = True, silent: bool = False
348
- ):
345
+ def _with_unauth_retry(self, request, try_number: int = 0, need_connection_retry: bool = True):
349
346
  try:
350
347
  return request()
351
348
  except RequestException as e:
@@ -376,9 +373,8 @@ class _RestClient:
376
373
  elif "more than one concurrent search request" in e.message.lower():
377
374
  raise ValidationError(bundle.get("concurrent_request"))
378
375
  else:
379
- if not silent:
380
- print(e)
381
- show_status_error()
376
+ print(e)
377
+ show_status_error()
382
378
  raise e
383
379
 
384
380
  @staticmethod
@@ -710,7 +706,6 @@ class _RestClient:
710
706
  silent=True,
711
707
  ),
712
708
  need_connection_retry=False,
713
- silent=True,
714
709
  )
715
710
  except Exception:
716
711
  self.send_log_event_unauth(log_event)
@@ -721,7 +716,7 @@ class _RestClient:
721
716
  try:
722
717
  requests.post(
723
718
  url=urljoin(_RestClient.PROD_BACKEND_URL, api_path),
724
- json=log_event.model_dump(exclude_none=True),
719
+ json=log_event.dict(exclude_none=True),
725
720
  headers=_RestClient._get_base_headers(content_type="application/json"),
726
721
  )
727
722
  except Exception:
upgini/metrics.py CHANGED
@@ -1,17 +1,17 @@
1
1
  from __future__ import annotations
2
2
 
3
+ from dataclasses import dataclass
3
4
  import inspect
4
5
  import logging
5
6
  import re
6
- import warnings
7
7
  from collections import defaultdict
8
8
  from copy import deepcopy
9
- from dataclasses import dataclass
10
9
  from typing import Any, Callable, Dict, List, Optional, Tuple, Union
11
10
 
11
+ import catboost
12
12
  import numpy as np
13
13
  import pandas as pd
14
- from lightgbm import LGBMClassifier, LGBMRegressor
14
+ from catboost import CatBoost, CatBoostClassifier, CatBoostRegressor, Pool
15
15
  from numpy import log1p
16
16
  from pandas.api.types import is_numeric_dtype
17
17
  from sklearn.metrics import check_scoring, get_scorer, make_scorer, roc_auc_score
@@ -27,8 +27,11 @@ except ImportError:
27
27
  from sklearn.metrics._scorer import SCORERS
28
28
 
29
29
  available_scorers = SCORERS
30
+ from sklearn.metrics._regression import (
31
+ _check_reg_targets,
32
+ check_consistent_length,
33
+ )
30
34
  from sklearn.metrics import mean_squared_error
31
- from sklearn.metrics._regression import _check_reg_targets, check_consistent_length
32
35
  from sklearn.model_selection import BaseCrossValidator
33
36
 
34
37
  from upgini.errors import ValidationError
@@ -85,73 +88,11 @@ CATBOOST_MULTICLASS_PARAMS = {
85
88
 
86
89
  LIGHTGBM_PARAMS = {
87
90
  "random_state": DEFAULT_RANDOM_STATE,
88
- # "num_leaves": 16,
89
- # "n_estimators": 150,
90
- # "min_child_weight": 1,
91
+ "num_leaves": 16,
91
92
  "max_depth": 4,
92
- "max_cat_threshold": 80,
93
- "min_data_per_group": 25,
94
- "num_boost_round": 150,
95
- "cat_l2": 10,
96
- "cat_smooth": 12,
97
- "learning_rate": 0.05,
98
- "feature_fraction": 1.0,
99
- "min_sum_hessian_in_leaf": 0.01,
100
- }
101
-
102
- LIGHTGBM_REGRESSION_PARAMS = {
103
- "random_state": DEFAULT_RANDOM_STATE,
104
- "deterministic": True,
105
- "min_gain_to_split": 0.001,
106
- "n_estimators": 275,
107
- "max_depth": 5,
108
- "max_cat_threshold": 80,
109
- "min_data_per_group": 25,
110
- "cat_l2": 10,
111
- "cat_smooth": 12,
93
+ "n_estimators": 150,
112
94
  "learning_rate": 0.05,
113
- "feature_fraction": 1.0,
114
- "min_sum_hessian_in_leaf": 0.01,
115
- "objective": "huber",
116
- "verbosity": -1,
117
- }
118
-
119
- LIGHTGBM_MULTICLASS_PARAMS = {
120
- "random_state": DEFAULT_RANDOM_STATE,
121
- "deterministic": True,
122
- "min_gain_to_split": 0.001,
123
- "n_estimators": 275,
124
- "max_depth": 3,
125
- "max_cat_threshold": 80,
126
- "min_data_per_group": 25,
127
- "cat_l2": 10,
128
- "cat_smooth": 12,
129
- "learning_rate": 0.25, # CatBoost 0.25
130
- "min_sum_hessian_in_leaf": 0.01,
131
- "class_weight": "balanced", # TODO pass dict with weights for each class
132
- "objective": "multiclass",
133
- "use_quantized_grad": "true",
134
- "num_grad_quant_bins": "8",
135
- "stochastic_rounding": "true",
136
- "verbosity": -1,
137
- }
138
-
139
- LIGHTGBM_BINARY_PARAMS = {
140
- "random_state": DEFAULT_RANDOM_STATE,
141
- "deterministic": True,
142
- "min_gain_to_split": 0.001,
143
- "n_estimators": 275,
144
- "max_depth": 5,
145
- "max_cat_threshold": 80,
146
- "min_data_per_group": 25,
147
- "cat_l2": 10,
148
- "cat_smooth": 12,
149
- "learning_rate": 0.05,
150
- "feature_fraction": 1.0,
151
- "min_sum_hessian_in_leaf": 0.01,
152
- "objective": "binary",
153
- "class_weight": "balanced", # TODO pass dict with weights for each class
154
- "verbosity": -1,
95
+ "min_child_weight": 1,
155
96
  }
156
97
 
157
98
  N_FOLDS = 5
@@ -270,15 +211,6 @@ SUPPORTED_CATBOOST_METRICS = {
270
211
  }
271
212
 
272
213
 
273
- def is_catboost_estimator(estimator):
274
- try:
275
- from catboost import CatBoostClassifier, CatBoostRegressor
276
-
277
- return isinstance(estimator, (CatBoostClassifier, CatBoostRegressor))
278
- except ImportError:
279
- return False
280
-
281
-
282
214
  @dataclass
283
215
  class _CrossValResults:
284
216
  metric: Optional[float]
@@ -360,7 +292,7 @@ class EstimatorWrapper:
360
292
  self.logger.info(f"After preparing data columns: {x.columns.to_list()}")
361
293
  return x, y, groups
362
294
 
363
- def _remove_empty_target_rows(self, x: pd.DataFrame, y: pd.Series) -> Tuple[pd.DataFrame, np.ndarray]:
295
+ def _remove_empty_target_rows(self, x: pd.DataFrame, y: pd.Series) -> Tuple[pd.DataFrame, pd.Series]:
364
296
  joined = pd.concat([x, y], axis=1)
365
297
  joined = joined[joined[y.name].notna()]
366
298
  joined = joined.reset_index(drop=True)
@@ -414,15 +346,12 @@ class EstimatorWrapper:
414
346
  for estimator, split in zip(self.cv_estimators, splits):
415
347
  _, validation_idx = split
416
348
  cv_x = x.iloc[validation_idx]
417
- if isinstance(y, pd.Series):
418
- cv_y = y.iloc[validation_idx]
419
- else:
420
- cv_y = y[validation_idx]
349
+ cv_y = y[validation_idx]
421
350
  shaps = self.calculate_shap(cv_x, cv_y, estimator)
422
351
  if shaps is not None:
423
352
  for feature, shap_value in shaps.items():
424
353
  # shap_values_all_folds[feature] = shap_values_all_folds.get(feature, []) + shap_value.tolist()
425
- shap_values_all_folds[feature].append(shap_value)
354
+ shap_values_all_folds[feature].extend(shap_value.tolist())
426
355
 
427
356
  if shap_values_all_folds:
428
357
  average_shap_values = {
@@ -498,18 +427,21 @@ class EstimatorWrapper:
498
427
  }
499
428
  if estimator is None:
500
429
  params = {}
430
+ params["has_time"] = has_date
431
+ # if metric_name.upper() in SUPPORTED_CATBOOST_METRICS:
432
+ # params["eval_metric"] = SUPPORTED_CATBOOST_METRICS[metric_name.upper()]
501
433
  if target_type == ModelTaskType.MULTICLASS:
502
- params = _get_add_params(params, LIGHTGBM_MULTICLASS_PARAMS)
434
+ params = _get_add_params(params, CATBOOST_MULTICLASS_PARAMS)
503
435
  params = _get_add_params(params, add_params)
504
- estimator = LightGBMWrapper(LGBMClassifier(**params), **kwargs)
436
+ estimator = CatBoostWrapper(CatBoostClassifier(**params), **kwargs)
505
437
  elif target_type == ModelTaskType.BINARY:
506
- params = _get_add_params(params, LIGHTGBM_BINARY_PARAMS)
438
+ params = _get_add_params(params, CATBOOST_BINARY_PARAMS)
507
439
  params = _get_add_params(params, add_params)
508
- estimator = LightGBMWrapper(LGBMClassifier(**params), **kwargs)
440
+ estimator = CatBoostWrapper(CatBoostClassifier(**params), **kwargs)
509
441
  elif target_type == ModelTaskType.REGRESSION:
510
- params = _get_add_params(params, LIGHTGBM_REGRESSION_PARAMS)
442
+ params = _get_add_params(params, CATBOOST_REGRESSION_PARAMS)
511
443
  params = _get_add_params(params, add_params)
512
- estimator = LightGBMWrapper(LGBMRegressor(**params), **kwargs)
444
+ estimator = CatBoostWrapper(CatBoostRegressor(**params), **kwargs)
513
445
  else:
514
446
  raise Exception(bundle.get("metrics_unsupported_target_type").format(target_type))
515
447
  else:
@@ -518,21 +450,31 @@ class EstimatorWrapper:
518
450
  else:
519
451
  estimator_copy = deepcopy(estimator)
520
452
  kwargs["estimator"] = estimator_copy
521
- if is_catboost_estimator(estimator):
453
+ if isinstance(estimator, (CatBoostClassifier, CatBoostRegressor)):
522
454
  if cat_features is not None:
523
455
  for cat_feature in cat_features:
524
456
  if cat_feature not in x.columns:
525
457
  logger.error(
526
458
  f"Client cat_feature `{cat_feature}` not found in x columns: {x.columns.to_list()}"
527
459
  )
528
- estimator_copy.set_params(cat_features=cat_features, has_time=has_date)
460
+ estimator_copy.set_params(
461
+ # cat_features=[x.columns.get_loc(cat_feature) for cat_feature in cat_features]
462
+ cat_features=cat_features
463
+ )
529
464
  estimator = CatBoostWrapper(**kwargs)
530
465
  else:
531
- if isinstance(estimator, (LGBMClassifier, LGBMRegressor)):
532
- estimator = LightGBMWrapper(**kwargs)
533
- elif is_catboost_estimator(estimator):
534
- estimator = CatBoostWrapper(**kwargs)
535
- else:
466
+ try:
467
+ from lightgbm import LGBMClassifier, LGBMRegressor
468
+
469
+ if isinstance(estimator, (LGBMClassifier, LGBMRegressor)):
470
+ estimator = LightGBMWrapper(**kwargs)
471
+ else:
472
+ logger.warning(
473
+ f"Unexpected estimator is used for metrics: {estimator}. "
474
+ "Default strategy for category features will be used"
475
+ )
476
+ estimator = OtherEstimatorWrapper(**kwargs)
477
+ except ModuleNotFoundError:
536
478
  logger.warning(
537
479
  f"Unexpected estimator is used for metrics: {estimator}. "
538
480
  "Default strategy for category features will be used"
@@ -545,7 +487,7 @@ class EstimatorWrapper:
545
487
  class CatBoostWrapper(EstimatorWrapper):
546
488
  def __init__(
547
489
  self,
548
- estimator,
490
+ estimator: Union[CatBoostClassifier, CatBoostRegressor],
549
491
  scorer: Callable,
550
492
  metric_name: str,
551
493
  multiplier: int,
@@ -575,9 +517,6 @@ class CatBoostWrapper(EstimatorWrapper):
575
517
  x, y, groups, params = super()._prepare_to_fit(x, y)
576
518
 
577
519
  # Find embeddings
578
- import catboost
579
- from catboost import CatBoostClassifier
580
-
581
520
  if hasattr(CatBoostClassifier, "get_embedding_feature_indices"):
582
521
  emb_pattern = r"(.+)_emb\d+"
583
522
  self.emb_features = [c for c in x.columns if re.match(emb_pattern, c) and is_numeric_dtype(x[c])]
@@ -698,10 +637,8 @@ class CatBoostWrapper(EstimatorWrapper):
698
637
  else:
699
638
  raise e
700
639
 
701
- def calculate_shap(self, x: pd.DataFrame, y: pd.Series, estimator) -> Optional[Dict[str, float]]:
640
+ def calculate_shap(self, x: pd.DataFrame, y: pd.Series, estimator: CatBoost) -> Optional[Dict[str, float]]:
702
641
  try:
703
- from catboost import Pool
704
-
705
642
  # Create Pool for fold data, if need (for example, when categorical features are present)
706
643
  fold_pool = Pool(
707
644
  x,
@@ -758,59 +695,25 @@ class LightGBMWrapper(EstimatorWrapper):
758
695
  self.cat_features = None
759
696
 
760
697
  def _prepare_to_fit(self, x: pd.DataFrame, y: pd.Series) -> Tuple[pd.DataFrame, pd.Series, np.ndarray, dict]:
761
- x, y_numpy, groups, params = super()._prepare_to_fit(x, y)
698
+ x, y, groups, params = super()._prepare_to_fit(x, y)
762
699
  self.cat_features = _get_cat_features(x)
763
700
  x = fill_na_cat_features(x, self.cat_features)
764
701
  for feature in self.cat_features:
765
702
  x[feature] = x[feature].astype("category").cat.codes
766
- if not is_numeric_dtype(y_numpy):
767
- y_numpy = correct_string_target(y_numpy)
703
+ if not is_numeric_dtype(y):
704
+ y = correct_string_target(y)
768
705
 
769
- return x, y_numpy, groups, params
706
+ return x, y, groups, params
770
707
 
771
708
  def _prepare_to_calculate(self, x: pd.DataFrame, y: pd.Series) -> Tuple[pd.DataFrame, np.ndarray, dict]:
772
- x, y_numpy, params = super()._prepare_to_calculate(x, y)
709
+ x, y, params = super()._prepare_to_calculate(x, y)
773
710
  if self.cat_features is not None:
774
711
  x = fill_na_cat_features(x, self.cat_features)
775
712
  for feature in self.cat_features:
776
713
  x[feature] = x[feature].astype("category").cat.codes
777
714
  if not is_numeric_dtype(y):
778
- y_numpy = correct_string_target(y_numpy)
779
- return x, y_numpy, params
780
-
781
- def calculate_shap(self, x: pd.DataFrame, y: pd.Series, estimator) -> Optional[Dict[str, float]]:
782
- try:
783
- # Suppress specific warning from SHAP for LightGBM binary classifier
784
- warnings.filterwarnings(
785
- "ignore",
786
- message=(
787
- "LightGBM binary classifier with TreeExplainer shap values output has changed to a list of ndarray"
788
- ),
789
- )
790
- from shap import TreeExplainer
791
-
792
- if not isinstance(estimator, (LGBMRegressor, LGBMClassifier)):
793
- return None
794
-
795
- explainer = TreeExplainer(estimator)
796
-
797
- shap_values = explainer.shap_values(x)
798
-
799
- # For classification, shap_values is returned as a list for each class
800
- # Take values for the positive class
801
- if isinstance(shap_values, list):
802
- shap_values = shap_values[1]
803
-
804
- # Calculate mean absolute SHAP value for each feature
805
- feature_importance = {}
806
- for i, col in enumerate(x.columns):
807
- feature_importance[col] = np.mean(np.abs(shap_values[:, i]))
808
-
809
- return feature_importance
810
-
811
- except Exception as e:
812
- self.logger.warning(f"Failed to calculate SHAP values: {str(e)}")
813
- return None
715
+ y = correct_string_target(y)
716
+ return x, y, params
814
717
 
815
718
 
816
719
  class OtherEstimatorWrapper(EstimatorWrapper):
@@ -80,7 +80,6 @@ email_and_hem_simultanious=EMAIL and HEM search keys cannot be used simultaneous
80
80
  postal_code_without_country=COUNTRY search key required if POSTAL_CODE is present
81
81
  multiple_search_key=Search key {} passed multiple times
82
82
  unregistered_only_personal_keys=Only personal search keys used. Api_key from profile.upgini.com required for EMAIL/HEM, PHONE NUMBER or IPv4/IPv6 search keys\nSee docs https://github.com/upgini/upgini#-open-up-all-capabilities-of-upgini
83
- only_custom_keys=Only CUSTOM_KEY search keys were provided. At least one of DATE, COUNTRY, POSTAL_CODE, PHONE, EMAIL, HEM, IP should be provided
84
83
  search_key_not_found=Column `{}` from search_keys was not found in X dataframe: {}
85
84
  numeric_search_key_not_found=Index {} in search_keys is out of bounds for {} columns of X dataframe
86
85
  unsupported_search_key_type=Unsupported type of key in search_keys: {}
@@ -74,8 +74,6 @@ def remove_fintech_duplicates(
74
74
  # Checking for different dates by the same personal keys
75
75
  uniques = grouped_by_personal_cols[date_col].nunique()
76
76
  total = len(uniques)
77
- if total == 0:
78
- return segment_df, None
79
77
  diff_dates = len(uniques[uniques > 1])
80
78
  if diff_dates / total >= 0.6:
81
79
  return segment_df, None
@@ -90,8 +90,7 @@ class FeatureInfo:
90
90
  def _get_feature_sample(feature_meta: FeaturesMetadataV2, data: Optional[pd.DataFrame]) -> str:
91
91
  if data is not None and len(data) > 0 and feature_meta.name in data.columns:
92
92
  if len(data) > 3:
93
- rand = np.random.RandomState(42)
94
- feature_sample = rand.choice(data[feature_meta.name].dropna().unique(), 3).tolist()
93
+ feature_sample = np.random.choice(data[feature_meta.name].dropna().unique(), 3).tolist()
95
94
  else:
96
95
  feature_sample = data[feature_meta.name].dropna().unique().tolist()
97
96
  if len(feature_sample) > 0 and isinstance(feature_sample[0], float):
@@ -9,6 +9,7 @@ from traceback import format_exc
9
9
 
10
10
  import numpy as np
11
11
  import scipy.sparse as sp
12
+ from catboost import CatBoostClassifier, CatBoostRegressor
12
13
  from joblib import Parallel, logger
13
14
  from scipy.sparse import issparse
14
15
  from sklearn import config_context, get_config
@@ -341,14 +342,6 @@ def cross_validate(
341
342
  raise e
342
343
 
343
344
 
344
- def is_catboost_estimator(estimator):
345
- try:
346
- from catboost import CatBoostClassifier, CatBoostRegressor
347
- return isinstance(estimator, (CatBoostClassifier, CatBoostRegressor))
348
- except ImportError:
349
- return False
350
-
351
-
352
345
  def _fit_and_score(
353
346
  estimator,
354
347
  X,
@@ -504,7 +497,7 @@ def _fit_and_score(
504
497
  if y_train is None:
505
498
  estimator.fit(X_train, **fit_params)
506
499
  else:
507
- if is_catboost_estimator(estimator):
500
+ if isinstance(estimator, (CatBoostClassifier, CatBoostRegressor)):
508
501
  fit_params = fit_params.copy()
509
502
  fit_params["eval_set"] = [(X_test, y_test)]
510
503
  estimator.fit(X_train, y_train, **fit_params)
upgini/utils/sort.py CHANGED
@@ -39,6 +39,11 @@ def sort_columns(
39
39
  sorted_keys = sorted(search_keys.keys(), key=lambda x: str(search_keys.get(x)))
40
40
  sorted_keys = [k for k in sorted_keys if k in df.columns and k not in exclude_columns]
41
41
 
42
+ duplicate_names = df.columns[df.columns.duplicated()].unique()
43
+ if len(duplicate_names) > 0:
44
+ logger.warning(f"WARNING: Found columns with duplicate names: {list(duplicate_names)}")
45
+ df = df[list(set(df.columns))]
46
+
42
47
  other_columns = sorted(
43
48
  [
44
49
  c
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: upgini
3
- Version: 1.2.68a3832.dev12
3
+ Version: 1.2.69
4
4
  Summary: Intelligent data search & enrichment for Machine Learning
5
5
  Project-URL: Bug Reports, https://github.com/upgini/upgini/issues
6
6
  Project-URL: Homepage, https://upgini.com/
@@ -23,12 +23,12 @@ Classifier: Programming Language :: Python :: 3.10
23
23
  Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
24
24
  Classifier: Topic :: Scientific/Engineering :: Information Analysis
25
25
  Requires-Python: <3.12,>=3.8
26
+ Requires-Dist: catboost>=1.0.3
26
27
  Requires-Dist: fastparquet>=0.8.1
27
28
  Requires-Dist: ipywidgets>=8.1.0
28
29
  Requires-Dist: jarowinkler>=2.0.0
29
30
  Requires-Dist: levenshtein>=0.25.1
30
- Requires-Dist: lightgbm>=4.6.0
31
- Requires-Dist: numpy<3.0.0,>=1.19.0
31
+ Requires-Dist: numpy<=1.26.4,>=1.19.0
32
32
  Requires-Dist: pandas<3.0.0,>=1.1.0
33
33
  Requires-Dist: psutil>=6.0.0
34
34
  Requires-Dist: pydantic<3.0.0,>1.0.0
@@ -39,7 +39,6 @@ Requires-Dist: python-json-logger>=3.3.0
39
39
  Requires-Dist: requests>=2.8.0
40
40
  Requires-Dist: scikit-learn>=1.3.0
41
41
  Requires-Dist: scipy>=1.10.0
42
- Requires-Dist: shap>=0.44.0
43
42
  Requires-Dist: xhtml2pdf<0.3.0,>=0.2.11
44
43
  Description-Content-Type: text/markdown
45
44
 
@@ -1,13 +1,13 @@
1
- upgini/__about__.py,sha256=U3w9ipbCUQQonL603X2mBrHUqIttoTSqcgno2WwRvzk,34
1
+ upgini/__about__.py,sha256=b5n5Ah2b8KdU4qEsuokdYRRb9Cz2Tg3GOvmqydpG060,23
2
2
  upgini/__init__.py,sha256=LXSfTNU0HnlOkE69VCxkgIKDhWP-JFo_eBQ71OxTr5Y,261
3
3
  upgini/ads.py,sha256=nvuRxRx5MHDMgPr9SiU-fsqRdFaBv8p4_v1oqiysKpc,2714
4
- upgini/dataset.py,sha256=1rb6BzyuiQFGVCTDmKL2wox3UFRNjtNaIJOwQnZ801A,34956
4
+ upgini/dataset.py,sha256=OGjpeFHbj3lWiZTOHTpWEoMMDmFY1FlNC44FKktoZvU,34956
5
5
  upgini/errors.py,sha256=2b_Wbo0OYhLUbrZqdLIx5jBnAsiD1Mcenh-VjR4HCTw,950
6
- upgini/features_enricher.py,sha256=GXXx14jwf3F26_KrfJ6O40Vcu1hRx5iBjUB_jxy3Xvg,205476
7
- upgini/http.py,sha256=RvzcShpDXssLs6ycGN8xilkKi8ZV9XGUrrk8bwdUzbw,43607
6
+ upgini/features_enricher.py,sha256=J5U6nprU-oEGUM54fS1W6daG6j4C2xYJE1lx3p6lcBc,205601
7
+ upgini/http.py,sha256=ud0Cp7h0jNeHuuZGpU_1dAAEiabGoJjGxc1X5oeBQr4,43496
8
8
  upgini/lazy_import.py,sha256=74gQ8JuA48BGRLxAo7lNHNKY2D2emMxrUxKGdxVGhuY,1012
9
9
  upgini/metadata.py,sha256=Jh6YTaS00m_nbaOY_owvlSyn9zgkErkqu8iTr9ZjKI8,12279
10
- upgini/metrics.py,sha256=I0sVJLNp4fiIq7ZFcUdNTxJjFkzStdFuKbnf2niEGjc,38207
10
+ upgini/metrics.py,sha256=t7uOOnlDYvP6E3DLjPMQcFBjyhJfUQY8aUlx7N0Mh-s,35477
11
11
  upgini/search_task.py,sha256=qxUxAD-bed-FpZYmTB_4orW7YJsW_O6a1TcgnZIRFr4,17307
12
12
  upgini/spinner.py,sha256=4iMd-eIe_BnkqFEMIliULTbj6rNI2HkN_VJ4qYe0cUc,1118
13
13
  upgini/version_validator.py,sha256=DvbaAvuYFoJqYt0fitpsk6Xcv-H1BYDJYHUMxaKSH_Y,1509
@@ -38,7 +38,7 @@ upgini/normalizer/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU
38
38
  upgini/normalizer/normalize_utils.py,sha256=Ft2MwSgVoBilXAORAOYAuwPD79GOLfwn4qQE3IUFzzg,7218
39
39
  upgini/resource_bundle/__init__.py,sha256=S5F2G47pnJd2LDpmFsjDqEwiKkP8Hm-hcseDbMka6Ko,8345
40
40
  upgini/resource_bundle/exceptions.py,sha256=5fRvx0_vWdE1-7HcSgF0tckB4A9AKyf5RiinZkInTsI,621
41
- upgini/resource_bundle/strings.properties,sha256=XU5ulr5ZDQfGbFk9QdFDzl3oDMaw0eDYCPoEq3ZvIkw,27687
41
+ upgini/resource_bundle/strings.properties,sha256=3zctRNQDJ1STTvLUfryBT72wYeHYnrllV4rG1C3HtfI,27542
42
42
  upgini/resource_bundle/strings_widget.properties,sha256=gOdqvZWntP2LCza_tyVk1_yRYcG4c04K9sQOAVhF_gw,1577
43
43
  upgini/sampler/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
44
44
  upgini/sampler/base.py,sha256=7GpjYqjOp58vYcJLiX__1R5wjUlyQbxvHJ2klFnup_M,6389
@@ -52,11 +52,11 @@ upgini/utils/country_utils.py,sha256=lY-eXWwFVegdVENFttbvLcgGDjFO17Sex8hd2PyJaRk
52
52
  upgini/utils/custom_loss_utils.py,sha256=kieNZYBYZm5ZGBltF1F_jOSF4ea6C29rYuCyiDcqVNY,3857
53
53
  upgini/utils/cv_utils.py,sha256=w6FQb9nO8BWDx88EF83NpjPLarK4eR4ia0Wg0kLBJC4,3525
54
54
  upgini/utils/datetime_utils.py,sha256=_jq-kn_dGNFfs-DGXcWCGzy9bkplfAjrZ8SsmN28zXc,13535
55
- upgini/utils/deduplicate_utils.py,sha256=AcMLoObMjhOTQ_fMS1LWy0GKp6WXnZ-FNux_8V3nbZU,8914
55
+ upgini/utils/deduplicate_utils.py,sha256=SMZx9IKIhWI5HqXepfKiQb3uDJrogQZtG6jcWuMo5Z4,8855
56
56
  upgini/utils/display_utils.py,sha256=DsBjJ8jEYAh8BPgfAbzq5imoGFV6IACP20PQ78BQCX0,11964
57
57
  upgini/utils/email_utils.py,sha256=pZ2vCfNxLIPUhxr0-OlABNXm12jjU44isBk8kGmqQzA,5277
58
58
  upgini/utils/fallback_progress_bar.py,sha256=PDaKb8dYpVZaWMroNcOHsTc3pSjgi9mOm0--cOFTwJ0,1074
59
- upgini/utils/feature_info.py,sha256=Q9HN6A-fvfVD-irFWrmOqqZG9RsUSvh5MTY_k0xu-tE,7287
59
+ upgini/utils/feature_info.py,sha256=m1tQcT3hTChPAiXzpk0WQcEqElj8KgeCifEJFa7-gss,7247
60
60
  upgini/utils/features_validator.py,sha256=lEfmk4DoxZ4ooOE1HC0ZXtUb_lFKRFHIrnFULZ4_rL8,3746
61
61
  upgini/utils/format.py,sha256=Yv5cvvSs2bOLUzzNu96Pu33VMDNbabio92QepUj41jU,243
62
62
  upgini/utils/ip_utils.py,sha256=TSQ_qDsLlVnm09X1HacpabEf_HNqSWpxBF4Sdc2xs08,6580
@@ -64,13 +64,13 @@ upgini/utils/mstats.py,sha256=dLJQr5Ak5BAoV-pDPpnfvMURZVkZ3_v250QzAsSlqY4,6286
64
64
  upgini/utils/phone_utils.py,sha256=IrbztLuOJBiePqqxllfABWfYlfAjYevPhXKipl95wUI,10432
65
65
  upgini/utils/postal_code_utils.py,sha256=5M0sUqH2DAr33kARWCTXR-ACyzWbjDq_-0mmEml6ZcU,1716
66
66
  upgini/utils/progress_bar.py,sha256=N-Sfdah2Hg8lXP_fV9EfUTXz_PyRt4lo9fAHoUDOoLc,1550
67
- upgini/utils/sklearn_ext.py,sha256=E7zfYqBW597LetYXHxyM-i4f8luHsGIuP6mMJ2wtSMs,44661
68
- upgini/utils/sort.py,sha256=VDXgZObIVAuGzXlAEejlKCNQcHmN5pN2bMou58sDKFI,6729
67
+ upgini/utils/sklearn_ext.py,sha256=13jQS_k7v0aUtudXV6nGUEWjttPQzAW9AFYL5wgEz9k,44511
68
+ upgini/utils/sort.py,sha256=H79A17NMoHtLbqLCPFx_MBUloLZcDKjOba_H4gCE3t8,6965
69
69
  upgini/utils/target_utils.py,sha256=b1GzO8_gMcwXSZ2v98CY50MJJBzKbWHId_BJGybXfkM,16579
70
70
  upgini/utils/track_info.py,sha256=G5Lu1xxakg2_TQjKZk4b5SvrHsATTXNVV3NbvWtT8k8,5663
71
71
  upgini/utils/ts_utils.py,sha256=26vhC0pN7vLXK6R09EEkMK3Lwb9IVPH7LRdqFIQ3kPs,1383
72
72
  upgini/utils/warning_counter.py,sha256=-GRY8EUggEBKODPSuXAkHn9KnEQwAORC0mmz_tim-PM,254
73
- upgini-1.2.68a3832.dev12.dist-info/METADATA,sha256=ElzWiZHc8K-GAYoV-4oqiAyyMHin2uzqhXXkZcHrvjE,49150
74
- upgini-1.2.68a3832.dev12.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
75
- upgini-1.2.68a3832.dev12.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
76
- upgini-1.2.68a3832.dev12.dist-info/RECORD,,
73
+ upgini-1.2.69.dist-info/METADATA,sha256=Z8doK3pmiKqcbPbXbG-JZwaqGwtAEsc6YJg8zfqb7cM,49113
74
+ upgini-1.2.69.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
75
+ upgini-1.2.69.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
76
+ upgini-1.2.69.dist-info/RECORD,,