upgini 1.2.9a107__py3-none-any.whl → 1.2.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of upgini might be problematic. Click here for more details.

upgini/__about__.py CHANGED
@@ -1 +1 @@
1
- __version__ = "1.2.9a107"
1
+ __version__ = "1.2.10"
@@ -1633,10 +1633,10 @@ class FeaturesEnricher(TransformerMixin):
1633
1633
 
1634
1634
  rows_to_drop = None
1635
1635
  has_date = SearchKey.find_key(search_keys, [SearchKey.DATE, SearchKey.DATETIME]) is not None
1636
- task_type = self.model_task_type or define_task(
1636
+ self.model_task_type = self.model_task_type or define_task(
1637
1637
  self.df_with_original_index[TARGET], has_date, self.logger, silent=True
1638
1638
  )
1639
- if task_type == ModelTaskType.REGRESSION:
1639
+ if self.model_task_type == ModelTaskType.REGRESSION:
1640
1640
  target_outliers_df = self._search_task.get_target_outliers(trace_id)
1641
1641
  if target_outliers_df is not None and len(target_outliers_df) > 0:
1642
1642
  outliers = pd.merge(
@@ -2391,12 +2391,12 @@ class FeaturesEnricher(TransformerMixin):
2391
2391
 
2392
2392
  maybe_date_column = SearchKey.find_key(self.fit_search_keys, [SearchKey.DATE, SearchKey.DATETIME])
2393
2393
  has_date = maybe_date_column is not None
2394
- model_task_type = self.model_task_type or define_task(validated_y, has_date, self.logger)
2394
+ self.model_task_type = self.model_task_type or define_task(validated_y, has_date, self.logger)
2395
2395
 
2396
- self._validate_binary_observations(validated_y, model_task_type)
2396
+ self._validate_binary_observations(validated_y, self.model_task_type)
2397
2397
 
2398
2398
  self.runtime_parameters = get_runtime_params_custom_loss(
2399
- self.loss, model_task_type, self.runtime_parameters, self.logger
2399
+ self.loss, self.model_task_type, self.runtime_parameters, self.logger
2400
2400
  )
2401
2401
 
2402
2402
  if validated_eval_set is not None and len(validated_eval_set) > 0:
@@ -2449,7 +2449,7 @@ class FeaturesEnricher(TransformerMixin):
2449
2449
  if is_numeric_dtype(df[self.TARGET_NAME]) and has_date:
2450
2450
  self._validate_PSI(df.sort_values(by=maybe_date_column))
2451
2451
 
2452
- self.__adjust_cv(df, maybe_date_column, model_task_type)
2452
+ self.__adjust_cv(df, maybe_date_column, self.model_task_type)
2453
2453
 
2454
2454
  normalizer = Normalizer(
2455
2455
  self.fit_search_keys, self.fit_generated_features, self.bundle, self.logger, self.warning_counter
@@ -2557,7 +2557,7 @@ class FeaturesEnricher(TransformerMixin):
2557
2557
  meaning_types=meaning_types,
2558
2558
  search_keys=combined_search_keys,
2559
2559
  unnest_search_keys=unnest_search_keys,
2560
- model_task_type=model_task_type,
2560
+ model_task_type=self.model_task_type,
2561
2561
  date_format=self.date_format,
2562
2562
  random_state=self.random_state,
2563
2563
  rest_client=self.rest_client,
@@ -2780,6 +2780,8 @@ class FeaturesEnricher(TransformerMixin):
2780
2780
  raise ValidationError(self.bundle.get("x_contains_reserved_column_name").format(EVAL_SET_INDEX))
2781
2781
  if SYSTEM_RECORD_ID in validated_X.columns:
2782
2782
  raise ValidationError(self.bundle.get("x_contains_reserved_column_name").format(SYSTEM_RECORD_ID))
2783
+ if ENTITY_SYSTEM_RECORD_ID in validated_X.columns:
2784
+ raise ValidationError(self.bundle.get("x_contains_reserved_column_name").format(ENTITY_SYSTEM_RECORD_ID))
2783
2785
 
2784
2786
  return validated_X
2785
2787
 
@@ -3760,7 +3762,10 @@ class FeaturesEnricher(TransformerMixin):
3760
3762
  display_html_dataframe(self.metrics, self.metrics, msg)
3761
3763
 
3762
3764
  def __show_selected_features(self, search_keys: Dict[str, SearchKey]):
3763
- msg = self.bundle.get("features_info_header").format(len(self.feature_names_), list(search_keys.keys()))
3765
+ search_key_names = search_keys.keys()
3766
+ if self.fit_columns_renaming:
3767
+ search_key_names = [self.fit_columns_renaming.get(col, col) for col in search_key_names]
3768
+ msg = self.bundle.get("features_info_header").format(len(self.feature_names_), search_key_names)
3764
3769
 
3765
3770
  try:
3766
3771
  _ = get_ipython() # type: ignore
upgini/metrics.py CHANGED
@@ -10,7 +10,6 @@ import catboost
10
10
  import numpy as np
11
11
  import pandas as pd
12
12
  from catboost import CatBoostClassifier, CatBoostRegressor
13
- from lightgbm import LGBMClassifier, LGBMRegressor
14
13
  from numpy import log1p
15
14
  from pandas.api.types import is_numeric_dtype
16
15
  from sklearn.metrics import check_scoring, get_scorer, make_scorer, roc_auc_score
@@ -340,8 +339,6 @@ class EstimatorWrapper:
340
339
  else:
341
340
  metrics = []
342
341
  for est in self.cv_estimators:
343
- self.logger.info(f"Before scoring metric with x: {x.columns.to_list()}")
344
- print(f"Before scoring metric with x: {x.columns.to_list()}")
345
342
  metrics.append(self.scorer(est, x, y))
346
343
 
347
344
  metric = np.mean(metrics) * self.multiplier
@@ -410,6 +407,8 @@ class EstimatorWrapper:
410
407
  estimator = CatBoostWrapper(**kwargs)
411
408
  else:
412
409
  try:
410
+ from lightgbm import LGBMClassifier, LGBMRegressor
411
+
413
412
  if isinstance(estimator, (LGBMClassifier, LGBMRegressor)):
414
413
  estimator = LightGBMWrapper(**kwargs)
415
414
  else:
@@ -497,7 +496,7 @@ class CatBoostWrapper(EstimatorWrapper):
497
496
  if x[name].nunique() > 1:
498
497
  unique_cat_features.append(name)
499
498
  else:
500
- print(f"Drop column {name} on preparing data for fit")
499
+ self.logger.info(f"Drop column {name} on preparing data for fit")
501
500
  x = x.drop(columns=name)
502
501
  self.exclude_features.append(name)
503
502
  self.cat_features = unique_cat_features
@@ -87,4 +87,4 @@ class FeaturesValidator:
87
87
 
88
88
  @staticmethod
89
89
  def find_constant_features(df: pd.DataFrame) -> List[str]:
90
- return [i for i in df if df[i].nunique() == 1]
90
+ return [i for i in df if df[i].nunique() <= 1]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: upgini
3
- Version: 1.2.9a107
3
+ Version: 1.2.10
4
4
  Summary: Intelligent data search & enrichment for Machine Learning
5
5
  Project-URL: Bug Reports, https://github.com/upgini/upgini/issues
6
6
  Project-URL: Homepage, https://upgini.com/
@@ -28,7 +28,6 @@ Requires-Dist: fastparquet>=0.8.1
28
28
  Requires-Dist: ipywidgets>=8.1.0
29
29
  Requires-Dist: jarowinkler>=2.0.0
30
30
  Requires-Dist: levenshtein>=0.25.1
31
- Requires-Dist: lightgbm>=3.3.2
32
31
  Requires-Dist: numpy<=1.26.4,>=1.19.0
33
32
  Requires-Dist: pandas<3.0.0,>=1.1.0
34
33
  Requires-Dist: pydantic<3.0.0,>1.0.0
@@ -1,13 +1,13 @@
1
- upgini/__about__.py,sha256=KFwktvvWCEF1O_q5hKsFs_W_DL6N5cIT2gykmOpfzC4,26
1
+ upgini/__about__.py,sha256=GI4rgymQsPWdk2_d96NgmZBRuFM6yOZB-kysnrjBjVo,23
2
2
  upgini/__init__.py,sha256=M64LwQTBa-5Jz24Zm2h8rWwlKQQ1J8nP7gGgIciS0WU,589
3
3
  upgini/ads.py,sha256=nvuRxRx5MHDMgPr9SiU-fsqRdFaBv8p4_v1oqiysKpc,2714
4
4
  upgini/dataset.py,sha256=olZ-OHSfBNoBSCo7R5t7uCLukI2nO7afpx_A-HCiJLk,31067
5
5
  upgini/errors.py,sha256=2b_Wbo0OYhLUbrZqdLIx5jBnAsiD1Mcenh-VjR4HCTw,950
6
- upgini/features_enricher.py,sha256=rC3Lq1KvwJdSiITAUfYzBxdRtPkpOo6X2fqc3wWQfM4,187594
6
+ upgini/features_enricher.py,sha256=NIroiDLvlWtfxE9qqYYoB6ZTLgpGweRaCVcL8osXoI8,187995
7
7
  upgini/http.py,sha256=21asexflvavydzCOONJDGQBtQanCElrbnqLXakJ9Cu8,42880
8
8
  upgini/lazy_import.py,sha256=74gQ8JuA48BGRLxAo7lNHNKY2D2emMxrUxKGdxVGhuY,1012
9
9
  upgini/metadata.py,sha256=osmzdNESeh7yP3BZday6N9Q3eaIHfzhhRM1d6NSgcf0,11223
10
- upgini/metrics.py,sha256=LuBOoPuJW8yeIhlqA8gqoRPU7Ey_CMqO05bg7hjMB2g,31378
10
+ upgini/metrics.py,sha256=aKJwAYUGNRdiz9z-bxDxs4jGZQ_VkPXa7sZ52C0VpVI,31243
11
11
  upgini/search_task.py,sha256=qxUxAD-bed-FpZYmTB_4orW7YJsW_O6a1TcgnZIRFr4,17307
12
12
  upgini/spinner.py,sha256=4iMd-eIe_BnkqFEMIliULTbj6rNI2HkN_VJ4qYe0cUc,1118
13
13
  upgini/version_validator.py,sha256=ddSKUK_-eGJB3NgrqOMoWJU-OxQ253WsNLp8aqJkaIM,1389
@@ -47,7 +47,7 @@ upgini/utils/deduplicate_utils.py,sha256=Zvs7zW4QzaERQmJNPrTVf2ZTVBkBLOycFCzyMwt
47
47
  upgini/utils/display_utils.py,sha256=A2ouB5eiZ-Kyt9ykYxkLQwyoRPrdYeJymwNTiajtFXs,10990
48
48
  upgini/utils/email_utils.py,sha256=j0Ug1R_0AnCg1Y92zIZ4XMwvKo3G5_pcOlBN1OH_gZs,5191
49
49
  upgini/utils/fallback_progress_bar.py,sha256=PDaKb8dYpVZaWMroNcOHsTc3pSjgi9mOm0--cOFTwJ0,1074
50
- upgini/utils/features_validator.py,sha256=PgKNt5dyqfErTvjtRNNUS9g7GFqHBtAtnsfA-V5UO1A,3307
50
+ upgini/utils/features_validator.py,sha256=LIF6YMpHlxCrVz6mvMpc1kfNTIMVGlNCor7IJTmlSfI,3307
51
51
  upgini/utils/format.py,sha256=Yv5cvvSs2bOLUzzNu96Pu33VMDNbabio92QepUj41jU,243
52
52
  upgini/utils/ip_utils.py,sha256=Q6vb7Sr5Khx3Sq3eENjW2qCXKej_S5jZbneH6zEOkzQ,5171
53
53
  upgini/utils/phone_utils.py,sha256=IrbztLuOJBiePqqxllfABWfYlfAjYevPhXKipl95wUI,10432
@@ -57,7 +57,7 @@ upgini/utils/sklearn_ext.py,sha256=13jQS_k7v0aUtudXV6nGUEWjttPQzAW9AFYL5wgEz9k,4
57
57
  upgini/utils/target_utils.py,sha256=BVtDmrmFMKerSUWaNOIEdzsYHIFiODdpnWbE50QDPDc,7864
58
58
  upgini/utils/track_info.py,sha256=G5Lu1xxakg2_TQjKZk4b5SvrHsATTXNVV3NbvWtT8k8,5663
59
59
  upgini/utils/warning_counter.py,sha256=dIWBB4dI5XRRJZudvIlqlIYKEiwLLPcXarsZuYRt338,227
60
- upgini-1.2.9a107.dist-info/METADATA,sha256=7kEGQtfOfmwSQjJ7ALMj19F9sNWc44XhLMLl44_lKSw,48611
61
- upgini-1.2.9a107.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
62
- upgini-1.2.9a107.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
63
- upgini-1.2.9a107.dist-info/RECORD,,
60
+ upgini-1.2.10.dist-info/METADATA,sha256=Cc-4FefWQaLK1hlCIR_dMIAm_NHRD9HxquQbTZn986E,48577
61
+ upgini-1.2.10.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
62
+ upgini-1.2.10.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
63
+ upgini-1.2.10.dist-info/RECORD,,