upgini 1.2.70a3832.dev2__py3-none-any.whl → 1.2.71__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of upgini might be problematic. Click here for more details.

upgini/metadata.py CHANGED
@@ -325,6 +325,10 @@ class RuntimeParameters(BaseModel):
325
325
  properties: Dict[str, Any] = {}
326
326
 
327
327
 
328
+ class AutoFEParameters(BaseModel):
329
+ ts_gap_days: Optional[int] = None
330
+
331
+
328
332
  class SearchCustomization(BaseModel):
329
333
  featuresFilter: Optional[FeaturesFilter] = None
330
334
  extractFeatures: Optional[bool] = None
upgini/metrics.py CHANGED
@@ -3,21 +3,23 @@ from __future__ import annotations
3
3
  import inspect
4
4
  import logging
5
5
  import re
6
- import warnings
7
6
  from collections import defaultdict
8
7
  from copy import deepcopy
9
8
  from dataclasses import dataclass
10
9
  from typing import Any, Callable, Dict, List, Optional, Tuple, Union
11
10
 
11
+ import lightgbm as lgb
12
12
  import numpy as np
13
13
  import pandas as pd
14
14
  from lightgbm import LGBMClassifier, LGBMRegressor
15
15
  from numpy import log1p
16
16
  from pandas.api.types import is_numeric_dtype
17
17
  from sklearn.metrics import check_scoring, get_scorer, make_scorer, roc_auc_score
18
+ from sklearn.preprocessing import OrdinalEncoder
18
19
 
19
20
  from upgini.utils.features_validator import FeaturesValidator
20
21
  from upgini.utils.sklearn_ext import cross_validate
22
+ from upgini.utils.blocked_time_series import BlockedTimeSeriesSplit
21
23
 
22
24
  try:
23
25
  from sklearn.metrics import get_scorer_names
@@ -29,7 +31,7 @@ except ImportError:
29
31
  available_scorers = SCORERS
30
32
  from sklearn.metrics import mean_squared_error
31
33
  from sklearn.metrics._regression import _check_reg_targets, check_consistent_length
32
- from sklearn.model_selection import BaseCrossValidator
34
+ from sklearn.model_selection import BaseCrossValidator, TimeSeriesSplit
33
35
 
34
36
  from upgini.errors import ValidationError
35
37
  from upgini.metadata import ModelTaskType
@@ -83,22 +85,6 @@ CATBOOST_MULTICLASS_PARAMS = {
83
85
  "auto_class_weights": "Balanced",
84
86
  }
85
87
 
86
- LIGHTGBM_PARAMS = {
87
- "random_state": DEFAULT_RANDOM_STATE,
88
- # "num_leaves": 16,
89
- # "n_estimators": 150,
90
- # "min_child_weight": 1,
91
- "max_depth": 4,
92
- "max_cat_threshold": 80,
93
- "min_data_per_group": 25,
94
- "num_boost_round": 150,
95
- "cat_l2": 10,
96
- "cat_smooth": 12,
97
- "learning_rate": 0.05,
98
- "feature_fraction": 1.0,
99
- "min_sum_hessian_in_leaf": 0.01,
100
- }
101
-
102
88
  LIGHTGBM_REGRESSION_PARAMS = {
103
89
  "random_state": DEFAULT_RANDOM_STATE,
104
90
  "deterministic": True,
@@ -118,18 +104,16 @@ LIGHTGBM_REGRESSION_PARAMS = {
118
104
 
119
105
  LIGHTGBM_MULTICLASS_PARAMS = {
120
106
  "random_state": DEFAULT_RANDOM_STATE,
121
- "deterministic": True,
122
- "min_gain_to_split": 0.001,
123
107
  "n_estimators": 275,
124
- "max_depth": 3,
108
+ "max_depth": 5,
109
+ "learning_rate": 0.05,
110
+ "min_gain_to_split": 0.001,
125
111
  "max_cat_threshold": 80,
126
- "min_data_per_group": 25,
127
- "cat_l2": 10,
128
- "cat_smooth": 12,
129
- "learning_rate": 0.25, # CatBoost 0.25
130
- "min_sum_hessian_in_leaf": 0.01,
131
- "class_weight": "balanced", # TODO pass dict with weights for each class
112
+ "min_data_per_group": 20,
113
+ "cat_smooth": 18,
114
+ "cat_l2": 8,
132
115
  "objective": "multiclass",
116
+ # "class_weight": "balanced",
133
117
  "use_quantized_grad": "true",
134
118
  "num_grad_quant_bins": "8",
135
119
  "stochastic_rounding": "true",
@@ -138,22 +122,22 @@ LIGHTGBM_MULTICLASS_PARAMS = {
138
122
 
139
123
  LIGHTGBM_BINARY_PARAMS = {
140
124
  "random_state": DEFAULT_RANDOM_STATE,
141
- "deterministic": True,
142
125
  "min_gain_to_split": 0.001,
143
126
  "n_estimators": 275,
144
127
  "max_depth": 5,
145
- "max_cat_threshold": 80,
146
- "min_data_per_group": 25,
147
- "cat_l2": 10,
148
- "cat_smooth": 12,
149
128
  "learning_rate": 0.05,
150
- "feature_fraction": 1.0,
151
- "min_sum_hessian_in_leaf": 0.01,
152
129
  "objective": "binary",
153
- "class_weight": "balanced", # TODO pass dict with weights for each class
130
+ # "class_weight": "balanced",
131
+ "deterministic": True,
132
+ "max_cat_threshold": 80,
133
+ "min_data_per_group": 20,
134
+ "cat_smooth": 18,
135
+ "cat_l2": 8,
154
136
  "verbosity": -1,
155
137
  }
156
138
 
139
+ LIGHTGBM_EARLY_STOPPING_ROUNDS = 20
140
+
157
141
  N_FOLDS = 5
158
142
  BLOCKED_TS_TEST_SIZE = 0.2
159
143
 
@@ -507,7 +491,8 @@ class EstimatorWrapper:
507
491
  params = _get_add_params(params, add_params)
508
492
  estimator = LightGBMWrapper(LGBMClassifier(**params), **kwargs)
509
493
  elif target_type == ModelTaskType.REGRESSION:
510
- params = _get_add_params(params, LIGHTGBM_REGRESSION_PARAMS)
494
+ if not isinstance(cv, TimeSeriesSplit) and not isinstance(cv, BlockedTimeSeriesSplit):
495
+ params = _get_add_params(params, LIGHTGBM_REGRESSION_PARAMS)
511
496
  params = _get_add_params(params, add_params)
512
497
  estimator = LightGBMWrapper(LGBMRegressor(**params), **kwargs)
513
498
  else:
@@ -756,13 +741,24 @@ class LightGBMWrapper(EstimatorWrapper):
756
741
  logger=logger,
757
742
  )
758
743
  self.cat_features = None
744
+ self.cat_encoder = None
745
+ self.n_classes = None
759
746
 
760
747
  def _prepare_to_fit(self, x: pd.DataFrame, y: pd.Series) -> Tuple[pd.DataFrame, pd.Series, np.ndarray, dict]:
761
748
  x, y_numpy, groups, params = super()._prepare_to_fit(x, y)
749
+ if self.target_type in [ModelTaskType.BINARY, ModelTaskType.MULTICLASS]:
750
+ self.n_classes = len(np.unique(y_numpy))
751
+ if LIGHTGBM_EARLY_STOPPING_ROUNDS is not None:
752
+ params["callbacks"] = [lgb.early_stopping(stopping_rounds=LIGHTGBM_EARLY_STOPPING_ROUNDS, verbose=False)]
762
753
  self.cat_features = _get_cat_features(x)
763
- x = fill_na_cat_features(x, self.cat_features)
764
- for feature in self.cat_features:
765
- x[feature] = x[feature].astype("category").cat.codes
754
+ if self.cat_features:
755
+ x = fill_na_cat_features(x, self.cat_features)
756
+ encoder = OrdinalEncoder(handle_unknown="use_encoded_value", unknown_value=-1)
757
+ encoded = pd.DataFrame(
758
+ encoder.fit_transform(x[self.cat_features]), columns=self.cat_features, dtype="category"
759
+ )
760
+ x[self.cat_features] = encoded
761
+ self.cat_encoder = encoder
766
762
  if not is_numeric_dtype(y_numpy):
767
763
  y_numpy = correct_string_target(y_numpy)
768
764
 
@@ -772,39 +768,50 @@ class LightGBMWrapper(EstimatorWrapper):
772
768
  x, y_numpy, params = super()._prepare_to_calculate(x, y)
773
769
  if self.cat_features is not None:
774
770
  x = fill_na_cat_features(x, self.cat_features)
775
- for feature in self.cat_features:
776
- x[feature] = x[feature].astype("category").cat.codes
771
+ if self.cat_encoder is not None:
772
+ x[self.cat_features] = pd.DataFrame(
773
+ self.cat_encoder.transform(x[self.cat_features]), columns=self.cat_features, dtype="category"
774
+ )
777
775
  if not is_numeric_dtype(y):
778
776
  y_numpy = correct_string_target(y_numpy)
779
777
  return x, y_numpy, params
780
778
 
781
779
  def calculate_shap(self, x: pd.DataFrame, y: pd.Series, estimator) -> Optional[Dict[str, float]]:
782
780
  try:
783
- # Suppress specific warning from SHAP for LightGBM binary classifier
784
- warnings.filterwarnings(
785
- "ignore",
786
- message=(
787
- "LightGBM binary classifier with TreeExplainer shap values output has changed to a list of ndarray"
788
- ),
781
+ shap_matrix = estimator.predict(
782
+ x,
783
+ predict_disable_shape_check=True,
784
+ raw_score=True,
785
+ pred_leaf=False,
786
+ pred_early_stop=True,
787
+ pred_contrib=True,
789
788
  )
790
- from shap import TreeExplainer
791
-
792
- if not isinstance(estimator, (LGBMRegressor, LGBMClassifier)):
793
- return None
794
789
 
795
- explainer = TreeExplainer(estimator)
796
-
797
- shap_values = explainer.shap_values(x)
790
+ if self.target_type == ModelTaskType.MULTICLASS:
791
+ n_feat = x.shape[1]
792
+ shap_matrix.shape = (shap_matrix.shape[0], self.n_classes, n_feat + 1)
793
+ shap_matrix = np.mean(np.abs(shap_matrix), axis=1)
798
794
 
799
- # For classification, shap_values is returned as a list for each class
800
- # Take values for the positive class
801
- if isinstance(shap_values, list):
802
- shap_values = shap_values[1]
795
+ # exclude base value
796
+ shap_matrix = shap_matrix[:, :-1]
803
797
 
804
- # Calculate mean absolute SHAP value for each feature
805
798
  feature_importance = {}
806
799
  for i, col in enumerate(x.columns):
807
- feature_importance[col] = np.mean(np.abs(shap_values[:, i]))
800
+ feature_importance[col] = np.mean(np.abs(shap_matrix[:, i]))
801
+
802
+ # # exclude last column (base value)
803
+ # shap_values_only = shap_values[:, :-1]
804
+ # mean_abs_shap = np.mean(np.abs(shap_values_only), axis=0)
805
+
806
+ # # For classification, shap_values is returned as a list for each class
807
+ # # Take values for the positive class
808
+ # if isinstance(shap_values, list):
809
+ # shap_values = shap_values[1]
810
+
811
+ # # Calculate mean absolute SHAP value for each feature
812
+ # feature_importance = {}
813
+ # for i, col in enumerate(x.columns):
814
+ # feature_importance[col] = np.mean(np.abs(shap_values[:, i]))
808
815
 
809
816
  return feature_importance
810
817
 
@@ -137,6 +137,7 @@ x_and_eval_x_diff_types=X and eval_set X has different types: {} and {}
137
137
  baseline_score_column_not_exists=baseline_score_column {} doesn't exist in input dataframe
138
138
  baseline_score_column_has_na=baseline_score_column contains NaN. Clear it and and retry
139
139
  missing_features_for_transform=Missing some features for transform that were presented on fit: {}
140
+ missing_target_for_transform=Search contains features on target. Please add y to the call and try again
140
141
  missing_id_column=Id column {} not found in X
141
142
  # target validation
142
143
  empty_target=Target is empty in all rows
upgini/search_task.py CHANGED
@@ -168,7 +168,13 @@ class SearchTask:
168
168
  for meta in self.provider_metadata_v2:
169
169
  if meta.features_used_for_embeddings is not None:
170
170
  features_for_transform.update(meta.features_used_for_embeddings)
171
-
171
+ if meta.generated_features:
172
+ features_for_transform.update(
173
+ c.original_name
174
+ for f in meta.generated_features
175
+ for c in f.base_columns
176
+ if c.ads_definition_id is None
177
+ )
172
178
  return list(features_for_transform)
173
179
 
174
180
  def get_shuffle_kfold(self) -> Optional[bool]:
upgini/utils/mstats.py CHANGED
@@ -118,7 +118,7 @@ def spearmanr(
118
118
  # - dof: degrees of freedom
119
119
  # - t_stat: t-statistic
120
120
  # - alternative: 'two-sided', 'greater', 'less'
121
- def compute_t_pvalue(t_stat, dof, alternative='two-sided'):
121
+ def compute_t_pvalue(t_stat, dof, alternative="two-sided"):
122
122
  from scipy.stats import t
123
123
 
124
124
  if alternative == "two-sided":
@@ -349,6 +349,14 @@ def is_catboost_estimator(estimator):
349
349
  return False
350
350
 
351
351
 
352
+ def is_lightgbm_estimator(estimator):
353
+ try:
354
+ from lightgbm import LGBMClassifier, LGBMRegressor
355
+ return isinstance(estimator, (LGBMClassifier, LGBMRegressor))
356
+ except ImportError:
357
+ return False
358
+
359
+
352
360
  def _fit_and_score(
353
361
  estimator,
354
362
  X,
@@ -507,6 +515,9 @@ def _fit_and_score(
507
515
  if is_catboost_estimator(estimator):
508
516
  fit_params = fit_params.copy()
509
517
  fit_params["eval_set"] = [(X_test, y_test)]
518
+ elif is_lightgbm_estimator(estimator):
519
+ fit_params = fit_params.copy()
520
+ fit_params["eval_set"] = [(X_test, y_test)]
510
521
  estimator.fit(X_train, y_train, **fit_params)
511
522
 
512
523
  except Exception:
upgini/utils/sort.py CHANGED
@@ -87,7 +87,7 @@ def get_sort_columns_dict(
87
87
  df_with_target = df_with_target.loc[~target.isna()]
88
88
  df = df_with_target.iloc[:, :-1]
89
89
  target = df_with_target.iloc[:, -1]
90
- df = df.fillna(df.mean())
90
+ df = df.fillna(df.apply(lambda x: int(x.mean()) if pd.api.types.is_integer_dtype(x) else x.mean()))
91
91
  omit_nan = False
92
92
  hashes = [hash_series(df[col]) for col in columns_for_sort]
93
93
  df = np.asarray(df, dtype=np.float32)
@@ -204,7 +204,7 @@ def balance_undersample(
204
204
  def balance_undersample_forced(
205
205
  df: pd.DataFrame,
206
206
  target_column: str,
207
- id_columns: List[str],
207
+ id_columns: Optional[List[str]],
208
208
  date_column: str,
209
209
  task_type: ModelTaskType,
210
210
  cv_type: Optional[CVType],
@@ -287,7 +287,7 @@ DEFAULT_TIME_UNIT_THRESHOLD = pd.Timedelta(weeks=4)
287
287
 
288
288
  def balance_undersample_time_series_trunc(
289
289
  df: pd.DataFrame,
290
- id_columns: List[str],
290
+ id_columns: Optional[List[str]],
291
291
  date_column: str,
292
292
  sample_size: int,
293
293
  random_state: int = 42,
@@ -298,6 +298,8 @@ def balance_undersample_time_series_trunc(
298
298
  **kwargs,
299
299
  ):
300
300
  # Convert date column to datetime
301
+ if id_columns is None:
302
+ id_columns = [date_column]
301
303
  dates_df = df[id_columns + [date_column]].copy()
302
304
  dates_df[date_column] = pd.to_datetime(dates_df[date_column], unit="ms")
303
305
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: upgini
3
- Version: 1.2.70a3832.dev2
3
+ Version: 1.2.71
4
4
  Summary: Intelligent data search & enrichment for Machine Learning
5
5
  Project-URL: Bug Reports, https://github.com/upgini/upgini/issues
6
6
  Project-URL: Homepage, https://upgini.com/
@@ -17,12 +17,11 @@ Classifier: Intended Audience :: Science/Research
17
17
  Classifier: Intended Audience :: Telecommunications Industry
18
18
  Classifier: License :: OSI Approved :: BSD License
19
19
  Classifier: Operating System :: OS Independent
20
- Classifier: Programming Language :: Python :: 3.8
21
- Classifier: Programming Language :: Python :: 3.9
22
20
  Classifier: Programming Language :: Python :: 3.10
21
+ Classifier: Programming Language :: Python :: 3.11
23
22
  Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
24
23
  Classifier: Topic :: Scientific/Engineering :: Information Analysis
25
- Requires-Python: <3.12,>=3.8
24
+ Requires-Python: <3.12,>=3.10
26
25
  Requires-Dist: fastparquet>=0.8.1
27
26
  Requires-Dist: ipywidgets>=8.1.0
28
27
  Requires-Dist: jarowinkler>=2.0.0
@@ -1,14 +1,13 @@
1
- upgini/__about__.py,sha256=kdfsfbpVybww_eNOlX3jVjyk7oUeeXZvGBwotRAlM-U,33
1
+ upgini/__about__.py,sha256=GEAFb-nM2N0vpxFRvsPnDG0tUOHq0YpnqvOcoHBpoqg,23
2
2
  upgini/__init__.py,sha256=LXSfTNU0HnlOkE69VCxkgIKDhWP-JFo_eBQ71OxTr5Y,261
3
3
  upgini/ads.py,sha256=nvuRxRx5MHDMgPr9SiU-fsqRdFaBv8p4_v1oqiysKpc,2714
4
- upgini/dataset.py,sha256=1rb6BzyuiQFGVCTDmKL2wox3UFRNjtNaIJOwQnZ801A,34956
4
+ upgini/dataset.py,sha256=aspri7ZAgwkNNUiIgQ1GRXvw8XQii3F4RfNXSrF4wrw,35365
5
5
  upgini/errors.py,sha256=2b_Wbo0OYhLUbrZqdLIx5jBnAsiD1Mcenh-VjR4HCTw,950
6
- upgini/features_enricher.py,sha256=GwBizSoaI17Meg2bPHCz-o_U8i6-wRgrXv6ZpyJqSvk,205718
6
+ upgini/features_enricher.py,sha256=Li1sPihWVkPUPcma8HRbPFwpCqd9V9d2p5zQUgkpdpU,206998
7
7
  upgini/http.py,sha256=RvzcShpDXssLs6ycGN8xilkKi8ZV9XGUrrk8bwdUzbw,43607
8
- upgini/lazy_import.py,sha256=74gQ8JuA48BGRLxAo7lNHNKY2D2emMxrUxKGdxVGhuY,1012
9
- upgini/metadata.py,sha256=Jh6YTaS00m_nbaOY_owvlSyn9zgkErkqu8iTr9ZjKI8,12279
10
- upgini/metrics.py,sha256=I0sVJLNp4fiIq7ZFcUdNTxJjFkzStdFuKbnf2niEGjc,38207
11
- upgini/search_task.py,sha256=qxUxAD-bed-FpZYmTB_4orW7YJsW_O6a1TcgnZIRFr4,17307
8
+ upgini/metadata.py,sha256=Yd6iW2f7Wz6vUkg5uvR4xylN16ANnCKVKqAsAkap7p8,12354
9
+ upgini/metrics.py,sha256=a0bY4oTMb-MgB1yC1IuTcEtotKZxAxjgV_QV2Z4V8u4,38988
10
+ upgini/search_task.py,sha256=EuCGp0iCWz2fpuJgN6M47aP_CtIi3Oq9zw78w0mkKiU,17595
12
11
  upgini/spinner.py,sha256=4iMd-eIe_BnkqFEMIliULTbj6rNI2HkN_VJ4qYe0cUc,1118
13
12
  upgini/version_validator.py,sha256=DvbaAvuYFoJqYt0fitpsk6Xcv-H1BYDJYHUMxaKSH_Y,1509
14
13
  upgini/ads_management/__init__.py,sha256=qzyisOToVRP-tquAJD1PblZhNtMrOB8FiyF9JvfkvgE,50
@@ -16,19 +15,20 @@ upgini/ads_management/ads_manager.py,sha256=igVbN2jz80Umb2BUJixmJVj-zx8unoKpecVo
16
15
  upgini/autofe/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
17
16
  upgini/autofe/all_operators.py,sha256=rdjF5eaE4bC6Q4eu_el5Z7ekYt8DjOFermz2bePPbUc,333
18
17
  upgini/autofe/binary.py,sha256=MnQuFiERpocjCPQUjOljlsq5FE-04GPfwtNjzvfNMyU,7671
19
- upgini/autofe/date.py,sha256=I07psJerrxOcHao91PdSCk9X6KWu61IBVyFRLjGNgK8,10730
20
- upgini/autofe/feature.py,sha256=xgu6bVIlUJ5PCUgoXQRNcGkcMOhj-_BdDRmkB_qRFS4,14766
18
+ upgini/autofe/date.py,sha256=C86F7sPiscUGq2a45UtQA9ADWBWg0kt54mePHHzjbLE,10633
19
+ upgini/autofe/feature.py,sha256=y1x3wijhTVBmloayQAHiscqKU9Ll8kLcGm1PdvS357I,14910
21
20
  upgini/autofe/groupby.py,sha256=IYmQV9uoCdRcpkeWZj_kI3ObzoNCNx3ff3h8sTL01tk,3603
22
21
  upgini/autofe/operator.py,sha256=EOffJw6vKXpEh5yymqb1RFNJPxGxmnHdFRo9dB5SCFo,4969
23
22
  upgini/autofe/unary.py,sha256=yVgPvtfnPSOhrii0YgezddmgWPwyOBCR0JutaIkdTTc,4658
23
+ upgini/autofe/utils.py,sha256=fK1am2_tQj3fL2vDslblye8lmyfWgGIUOX1beYVBz4k,2420
24
24
  upgini/autofe/vector.py,sha256=l0KdKg-txlZxDSE4hPPfCtfGQofYbl7oaABPr830sPI,667
25
25
  upgini/autofe/timeseries/__init__.py,sha256=PGwwDAMwvkXl3el12tXVEmZUgDUvlmIPlXtROm6bD18,738
26
- upgini/autofe/timeseries/base.py,sha256=T9Ec8LKJbiwTUGGsd_xhM0U0NUJblqmKchkzUI1sK88,3755
27
- upgini/autofe/timeseries/cross.py,sha256=Sh5hAXZFWKaFRqf_JGODu9pWO2tmuV5VKyK9eX3i7-I,4931
26
+ upgini/autofe/timeseries/base.py,sha256=rWJqRuFAzTZEsUdWG5s1Vhif9zzRRmalASXvarufRxI,3610
27
+ upgini/autofe/timeseries/cross.py,sha256=BTINVwuZSbm_4NKkVm0FGM68SrvZLENZKXN7-UyvhYI,5319
28
28
  upgini/autofe/timeseries/delta.py,sha256=h0YhmI1TlPJnjwFpN_GQxLb6r59DQuucnG5tQAXSgjU,3520
29
29
  upgini/autofe/timeseries/lag.py,sha256=LfQtg484vuqM0mgY4Wft1swHX_Srq7OKKgZswCXoiXI,1882
30
- upgini/autofe/timeseries/roll.py,sha256=bNFMDszSYTWvB7EyhHbRY1DJqzSURvHlPAcBebt0y0Y,2878
31
- upgini/autofe/timeseries/trend.py,sha256=9p2Q5ByAi6cx9RH9teBTe8FyjSzqthznC2Lo5dsJ0ho,2051
30
+ upgini/autofe/timeseries/roll.py,sha256=zADKXU-eYWQnQ5R3am1yEal8uU6Tm0jLAixwPb_aCHg,2794
31
+ upgini/autofe/timeseries/trend.py,sha256=K1_iw2ko_LIUU8YCUgrvN3n0MkHtsi7-63-8x9er1k4,2129
32
32
  upgini/autofe/timeseries/volatility.py,sha256=9shUmIKjpWTHVYjj80YBsk0XheBJ9uBuLv5NW9Mchnk,7953
33
33
  upgini/data_source/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
34
34
  upgini/data_source/data_source_publisher.py,sha256=4S9qwlAklD8vg9tUU_c1pHE2_glUHAh15-wr5hMwKFw,22879
@@ -38,7 +38,7 @@ upgini/normalizer/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU
38
38
  upgini/normalizer/normalize_utils.py,sha256=Ft2MwSgVoBilXAORAOYAuwPD79GOLfwn4qQE3IUFzzg,7218
39
39
  upgini/resource_bundle/__init__.py,sha256=S5F2G47pnJd2LDpmFsjDqEwiKkP8Hm-hcseDbMka6Ko,8345
40
40
  upgini/resource_bundle/exceptions.py,sha256=5fRvx0_vWdE1-7HcSgF0tckB4A9AKyf5RiinZkInTsI,621
41
- upgini/resource_bundle/strings.properties,sha256=XU5ulr5ZDQfGbFk9QdFDzl3oDMaw0eDYCPoEq3ZvIkw,27687
41
+ upgini/resource_bundle/strings.properties,sha256=mwQrerdJj3adzT-fHqvs6Qjf-rqDccsUzELDIXJKAmY,27791
42
42
  upgini/resource_bundle/strings_widget.properties,sha256=gOdqvZWntP2LCza_tyVk1_yRYcG4c04K9sQOAVhF_gw,1577
43
43
  upgini/sampler/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
44
44
  upgini/sampler/base.py,sha256=7GpjYqjOp58vYcJLiX__1R5wjUlyQbxvHJ2klFnup_M,6389
@@ -60,17 +60,17 @@ upgini/utils/feature_info.py,sha256=Q9HN6A-fvfVD-irFWrmOqqZG9RsUSvh5MTY_k0xu-tE,
60
60
  upgini/utils/features_validator.py,sha256=lEfmk4DoxZ4ooOE1HC0ZXtUb_lFKRFHIrnFULZ4_rL8,3746
61
61
  upgini/utils/format.py,sha256=Yv5cvvSs2bOLUzzNu96Pu33VMDNbabio92QepUj41jU,243
62
62
  upgini/utils/ip_utils.py,sha256=TSQ_qDsLlVnm09X1HacpabEf_HNqSWpxBF4Sdc2xs08,6580
63
- upgini/utils/mstats.py,sha256=dLJQr5Ak5BAoV-pDPpnfvMURZVkZ3_v250QzAsSlqY4,6286
63
+ upgini/utils/mstats.py,sha256=u3gQVUtDRbyrOQK6V1UJ2Rx1QbkSNYGjXa6m3Z_dPVs,6286
64
64
  upgini/utils/phone_utils.py,sha256=IrbztLuOJBiePqqxllfABWfYlfAjYevPhXKipl95wUI,10432
65
65
  upgini/utils/postal_code_utils.py,sha256=5M0sUqH2DAr33kARWCTXR-ACyzWbjDq_-0mmEml6ZcU,1716
66
66
  upgini/utils/progress_bar.py,sha256=N-Sfdah2Hg8lXP_fV9EfUTXz_PyRt4lo9fAHoUDOoLc,1550
67
- upgini/utils/sklearn_ext.py,sha256=E7zfYqBW597LetYXHxyM-i4f8luHsGIuP6mMJ2wtSMs,44661
68
- upgini/utils/sort.py,sha256=H79A17NMoHtLbqLCPFx_MBUloLZcDKjOba_H4gCE3t8,6965
69
- upgini/utils/target_utils.py,sha256=b1GzO8_gMcwXSZ2v98CY50MJJBzKbWHId_BJGybXfkM,16579
67
+ upgini/utils/sklearn_ext.py,sha256=HpaNQaKJisgNE7IZ71n7uswxTj7kbPglU2G3s1sORAc,45042
68
+ upgini/utils/sort.py,sha256=8uuHs2nfSMVnz8GgvbOmgMB1PgEIZP1uhmeRFxcwnYw,7039
69
+ upgini/utils/target_utils.py,sha256=KNFzJta1SpGU4sp07dHKSeVJlDs_9qgD2wcw5YuJfOc,16661
70
70
  upgini/utils/track_info.py,sha256=G5Lu1xxakg2_TQjKZk4b5SvrHsATTXNVV3NbvWtT8k8,5663
71
71
  upgini/utils/ts_utils.py,sha256=26vhC0pN7vLXK6R09EEkMK3Lwb9IVPH7LRdqFIQ3kPs,1383
72
72
  upgini/utils/warning_counter.py,sha256=-GRY8EUggEBKODPSuXAkHn9KnEQwAORC0mmz_tim-PM,254
73
- upgini-1.2.70a3832.dev2.dist-info/METADATA,sha256=O_4tZRoxEur3Ut9q-6kT2LEIm-JN5-mVps1ujZibt6A,49149
74
- upgini-1.2.70a3832.dev2.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
75
- upgini-1.2.70a3832.dev2.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
76
- upgini-1.2.70a3832.dev2.dist-info/RECORD,,
73
+ upgini-1.2.71.dist-info/METADATA,sha256=Dvzwz4pOrA3V4OlH7BjxerA7UAZhn1H4-qh7SjMpc3E,49091
74
+ upgini-1.2.71.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
75
+ upgini-1.2.71.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
76
+ upgini-1.2.71.dist-info/RECORD,,
upgini/lazy_import.py DELETED
@@ -1,35 +0,0 @@
1
- import importlib
2
- import importlib.util
3
- import importlib.machinery
4
-
5
-
6
- class LazyImport:
7
- def __init__(self, module_name, class_name):
8
- self.module_name = module_name
9
- self.class_name = class_name
10
- self._module = None
11
- self._class = None
12
-
13
- def _load(self):
14
- if self._module is None:
15
- # Load module and save link to it
16
- spec = importlib.util.find_spec(self.module_name)
17
- if spec is None:
18
- raise ImportError(f"Module {self.module_name} not found")
19
-
20
- # Create module
21
- self._module = importlib.util.module_from_spec(spec)
22
-
23
- # Execute module
24
- spec.loader.exec_module(self._module)
25
-
26
- # Get class from module
27
- self._class = getattr(self._module, self.class_name)
28
-
29
- def __call__(self, *args, **kwargs):
30
- self._load()
31
- return self._class(*args, **kwargs)
32
-
33
- def __getattr__(self, name):
34
- self._load()
35
- return getattr(self._class, name)