upgini 1.2.68a3832.dev4__py3-none-any.whl → 1.2.68a3832.dev6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of upgini might be problematic. Click here for more details.

upgini/__about__.py CHANGED
@@ -1 +1 @@
1
- __version__ = "1.2.68a3832.dev4"
1
+ __version__ = "1.2.68a3832.dev6"
upgini/dataset.py CHANGED
@@ -388,7 +388,7 @@ class Dataset: # (pd.DataFrame):
388
388
  for col in columns_to_validate:
389
389
  self.data[f"{col}_is_valid"] = ~self.data[col].isnull()
390
390
  if validate_target and target is not None and col == target:
391
- self.data.loc[self.data[target] == np.Inf, f"{col}_is_valid"] = False
391
+ self.data.loc[self.data[target] == np.inf, f"{col}_is_valid"] = False
392
392
 
393
393
  if col in mandatory_columns:
394
394
  self.data["valid_mandatory"] = self.data["valid_mandatory"] & self.data[f"{col}_is_valid"]
upgini/metrics.py CHANGED
@@ -101,6 +101,52 @@ LIGHTGBM_PARAMS = {
101
101
  "min_sum_hessian_in_leaf": 0.01,
102
102
  }
103
103
 
104
+ LIGHTGBM_REGRESSION_PARAMS = {
105
+ "random_state": DEFAULT_RANDOM_STATE,
106
+ "n_estimators": 275,
107
+ "max_depth": 5,
108
+ "max_cat_threshold": 80,
109
+ "min_data_per_group": 25,
110
+ "cat_l2": 10,
111
+ "cat_smooth": 12,
112
+ "learning_rate": 0.05,
113
+ "feature_fraction": 1.0,
114
+ "min_sum_hessian_in_leaf": 0.01,
115
+ "objective": "huber",
116
+ "verbosity": 0,
117
+ }
118
+
119
+ LIGHTGBM_MULTICLASS_PARAMS = {
120
+ "random_state": DEFAULT_RANDOM_STATE,
121
+ "n_estimators": 275,
122
+ "max_depth": 3,
123
+ "max_cat_threshold": 80,
124
+ "min_data_per_group": 25,
125
+ "cat_l2": 10,
126
+ "cat_smooth": 12,
127
+ "learning_rate": 0.25, # CatBoost 0.25
128
+ "min_sum_hessian_in_leaf": 0.01,
129
+ "objective": "multiclass",
130
+ "class_weight": "balanced",
131
+ "verbosity": 0,
132
+ }
133
+
134
+ LIGHTGBM_BINARY_PARAMS = {
135
+ "random_state": DEFAULT_RANDOM_STATE,
136
+ "n_estimators": 275,
137
+ "max_depth": 5,
138
+ "max_cat_threshold": 80,
139
+ "min_data_per_group": 25,
140
+ "cat_l2": 10,
141
+ "cat_smooth": 12,
142
+ "learning_rate": 0.05,
143
+ "feature_fraction": 1.0,
144
+ "min_sum_hessian_in_leaf": 0.01,
145
+ "objective": "binary",
146
+ "class_weight": "balanced",
147
+ "verbosity": 0,
148
+ }
149
+
104
150
  N_FOLDS = 5
105
151
  BLOCKED_TS_TEST_SIZE = 0.2
106
152
 
@@ -441,28 +487,27 @@ class EstimatorWrapper:
441
487
  }
442
488
  if estimator is None:
443
489
  params = {}
444
- params["has_time"] = has_date
445
490
  # if metric_name.upper() in SUPPORTED_CATBOOST_METRICS:
446
491
  # params["eval_metric"] = SUPPORTED_CATBOOST_METRICS[metric_name.upper()]
447
492
  if target_type == ModelTaskType.MULTICLASS:
448
493
  # params = _get_add_params(params, CATBOOST_MULTICLASS_PARAMS)
449
494
  # params = _get_add_params(params, add_params)
450
495
  # estimator = CatBoostWrapper(CatBoostClassifier(**params), **kwargs)
451
- params = _get_add_params(params, LIGHTGBM_PARAMS)
496
+ params = _get_add_params(params, LIGHTGBM_MULTICLASS_PARAMS)
452
497
  params = _get_add_params(params, add_params)
453
498
  estimator = LightGBMWrapper(LGBMClassifier(**params), **kwargs)
454
499
  elif target_type == ModelTaskType.BINARY:
455
500
  # params = _get_add_params(params, CATBOOST_BINARY_PARAMS)
456
501
  # params = _get_add_params(params, add_params)
457
502
  # estimator = CatBoostWrapper(CatBoostClassifier(**params), **kwargs)
458
- params = _get_add_params(params, LIGHTGBM_PARAMS)
503
+ params = _get_add_params(params, LIGHTGBM_BINARY_PARAMS)
459
504
  params = _get_add_params(params, add_params)
460
505
  estimator = LightGBMWrapper(LGBMClassifier(**params), **kwargs)
461
506
  elif target_type == ModelTaskType.REGRESSION:
462
507
  # params = _get_add_params(params, CATBOOST_REGRESSION_PARAMS)
463
508
  # params = _get_add_params(params, add_params)
464
509
  # estimator = CatBoostWrapper(CatBoostRegressor(**params), **kwargs)
465
- params = _get_add_params(params, LIGHTGBM_PARAMS)
510
+ params = _get_add_params(params, LIGHTGBM_REGRESSION_PARAMS)
466
511
  params = _get_add_params(params, add_params)
467
512
  estimator = LightGBMWrapper(LGBMRegressor(**params), **kwargs)
468
513
  else:
@@ -474,6 +519,7 @@ class EstimatorWrapper:
474
519
  estimator_copy = deepcopy(estimator)
475
520
  kwargs["estimator"] = estimator_copy
476
521
  if is_catboost_estimator(estimator):
522
+ params["has_time"] = has_date
477
523
  if cat_features is not None:
478
524
  for cat_feature in cat_features:
479
525
  if cat_feature not in x.columns:
@@ -712,6 +758,8 @@ class LightGBMWrapper(EstimatorWrapper):
712
758
 
713
759
  def _prepare_to_fit(self, x: pd.DataFrame, y: pd.Series) -> Tuple[pd.DataFrame, pd.Series, np.ndarray, dict]:
714
760
  x, y, groups, params = super()._prepare_to_fit(x, y)
761
+ if self.target_type == ModelTaskType.MULTICLASS:
762
+ params["num_class"] = y.nunique()
715
763
  self.cat_features = _get_cat_features(x)
716
764
  x = fill_na_cat_features(x, self.cat_features)
717
765
  for feature in self.cat_features:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: upgini
3
- Version: 1.2.68a3832.dev4
3
+ Version: 1.2.68a3832.dev6
4
4
  Summary: Intelligent data search & enrichment for Machine Learning
5
5
  Project-URL: Bug Reports, https://github.com/upgini/upgini/issues
6
6
  Project-URL: Homepage, https://upgini.com/
@@ -1,13 +1,13 @@
1
- upgini/__about__.py,sha256=tA-8e1UEPt8tajFxx1rKXEmQOOfuqGfXLGYhIFUctPM,33
1
+ upgini/__about__.py,sha256=8CoP2d6NQy3RuFamWKCHcwiF2GYkyj5rtk6FIpBm0rI,33
2
2
  upgini/__init__.py,sha256=LXSfTNU0HnlOkE69VCxkgIKDhWP-JFo_eBQ71OxTr5Y,261
3
3
  upgini/ads.py,sha256=nvuRxRx5MHDMgPr9SiU-fsqRdFaBv8p4_v1oqiysKpc,2714
4
- upgini/dataset.py,sha256=OGjpeFHbj3lWiZTOHTpWEoMMDmFY1FlNC44FKktoZvU,34956
4
+ upgini/dataset.py,sha256=1rb6BzyuiQFGVCTDmKL2wox3UFRNjtNaIJOwQnZ801A,34956
5
5
  upgini/errors.py,sha256=2b_Wbo0OYhLUbrZqdLIx5jBnAsiD1Mcenh-VjR4HCTw,950
6
6
  upgini/features_enricher.py,sha256=GXXx14jwf3F26_KrfJ6O40Vcu1hRx5iBjUB_jxy3Xvg,205476
7
7
  upgini/http.py,sha256=ud0Cp7h0jNeHuuZGpU_1dAAEiabGoJjGxc1X5oeBQr4,43496
8
8
  upgini/lazy_import.py,sha256=74gQ8JuA48BGRLxAo7lNHNKY2D2emMxrUxKGdxVGhuY,1012
9
9
  upgini/metadata.py,sha256=Jh6YTaS00m_nbaOY_owvlSyn9zgkErkqu8iTr9ZjKI8,12279
10
- upgini/metrics.py,sha256=0WIe1IQx9vzUK0pVGv3hODBrOL3zaLDybXbs5S_ntvQ,36991
10
+ upgini/metrics.py,sha256=ZBAjInLCm15BBYWNi9kz6IJs8R0WrF2PkrLnLAodR1Y,38246
11
11
  upgini/search_task.py,sha256=qxUxAD-bed-FpZYmTB_4orW7YJsW_O6a1TcgnZIRFr4,17307
12
12
  upgini/spinner.py,sha256=4iMd-eIe_BnkqFEMIliULTbj6rNI2HkN_VJ4qYe0cUc,1118
13
13
  upgini/version_validator.py,sha256=DvbaAvuYFoJqYt0fitpsk6Xcv-H1BYDJYHUMxaKSH_Y,1509
@@ -70,7 +70,7 @@ upgini/utils/target_utils.py,sha256=b1GzO8_gMcwXSZ2v98CY50MJJBzKbWHId_BJGybXfkM,
70
70
  upgini/utils/track_info.py,sha256=G5Lu1xxakg2_TQjKZk4b5SvrHsATTXNVV3NbvWtT8k8,5663
71
71
  upgini/utils/ts_utils.py,sha256=26vhC0pN7vLXK6R09EEkMK3Lwb9IVPH7LRdqFIQ3kPs,1383
72
72
  upgini/utils/warning_counter.py,sha256=-GRY8EUggEBKODPSuXAkHn9KnEQwAORC0mmz_tim-PM,254
73
- upgini-1.2.68a3832.dev4.dist-info/METADATA,sha256=N1U1IshnQeHVgYDyt_pa2G7SezelEjMUjcdNOQ1KxkQ,49149
74
- upgini-1.2.68a3832.dev4.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
75
- upgini-1.2.68a3832.dev4.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
76
- upgini-1.2.68a3832.dev4.dist-info/RECORD,,
73
+ upgini-1.2.68a3832.dev6.dist-info/METADATA,sha256=UWgAnrn5D5mQT6Js-sXVBCA1wPW7YZU-JEEcnRdUCHU,49149
74
+ upgini-1.2.68a3832.dev6.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
75
+ upgini-1.2.68a3832.dev6.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
76
+ upgini-1.2.68a3832.dev6.dist-info/RECORD,,