upgini 1.2.68a3832.dev8__tar.gz → 1.2.68a3832.dev10__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of upgini might be problematic. Click here for more details.

Files changed (78) hide show
  1. {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev10}/PKG-INFO +1 -1
  2. upgini-1.2.68a3832.dev10/src/upgini/__about__.py +1 -0
  3. {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev10}/src/upgini/metrics.py +9 -22
  4. {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev10}/src/upgini/utils/feature_info.py +2 -1
  5. upgini-1.2.68a3832.dev8/src/upgini/__about__.py +0 -1
  6. {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev10}/.gitignore +0 -0
  7. {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev10}/LICENSE +0 -0
  8. {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev10}/README.md +0 -0
  9. {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev10}/pyproject.toml +0 -0
  10. {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev10}/src/upgini/__init__.py +0 -0
  11. {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev10}/src/upgini/ads.py +0 -0
  12. {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev10}/src/upgini/ads_management/__init__.py +0 -0
  13. {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev10}/src/upgini/ads_management/ads_manager.py +0 -0
  14. {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev10}/src/upgini/autofe/__init__.py +0 -0
  15. {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev10}/src/upgini/autofe/all_operators.py +0 -0
  16. {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev10}/src/upgini/autofe/binary.py +0 -0
  17. {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev10}/src/upgini/autofe/date.py +0 -0
  18. {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev10}/src/upgini/autofe/feature.py +0 -0
  19. {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev10}/src/upgini/autofe/groupby.py +0 -0
  20. {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev10}/src/upgini/autofe/operator.py +0 -0
  21. {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev10}/src/upgini/autofe/timeseries/__init__.py +0 -0
  22. {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev10}/src/upgini/autofe/timeseries/base.py +0 -0
  23. {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev10}/src/upgini/autofe/timeseries/cross.py +0 -0
  24. {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev10}/src/upgini/autofe/timeseries/delta.py +0 -0
  25. {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev10}/src/upgini/autofe/timeseries/lag.py +0 -0
  26. {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev10}/src/upgini/autofe/timeseries/roll.py +0 -0
  27. {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev10}/src/upgini/autofe/timeseries/trend.py +0 -0
  28. {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev10}/src/upgini/autofe/timeseries/volatility.py +0 -0
  29. {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev10}/src/upgini/autofe/unary.py +0 -0
  30. {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev10}/src/upgini/autofe/vector.py +0 -0
  31. {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev10}/src/upgini/data_source/__init__.py +0 -0
  32. {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev10}/src/upgini/data_source/data_source_publisher.py +0 -0
  33. {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev10}/src/upgini/dataset.py +0 -0
  34. {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev10}/src/upgini/errors.py +0 -0
  35. {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev10}/src/upgini/features_enricher.py +0 -0
  36. {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev10}/src/upgini/http.py +0 -0
  37. {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev10}/src/upgini/lazy_import.py +0 -0
  38. {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev10}/src/upgini/mdc/__init__.py +0 -0
  39. {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev10}/src/upgini/mdc/context.py +0 -0
  40. {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev10}/src/upgini/metadata.py +0 -0
  41. {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev10}/src/upgini/normalizer/__init__.py +0 -0
  42. {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev10}/src/upgini/normalizer/normalize_utils.py +0 -0
  43. {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev10}/src/upgini/resource_bundle/__init__.py +0 -0
  44. {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev10}/src/upgini/resource_bundle/exceptions.py +0 -0
  45. {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev10}/src/upgini/resource_bundle/strings.properties +0 -0
  46. {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev10}/src/upgini/resource_bundle/strings_widget.properties +0 -0
  47. {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev10}/src/upgini/sampler/__init__.py +0 -0
  48. {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev10}/src/upgini/sampler/base.py +0 -0
  49. {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev10}/src/upgini/sampler/random_under_sampler.py +0 -0
  50. {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev10}/src/upgini/sampler/utils.py +0 -0
  51. {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev10}/src/upgini/search_task.py +0 -0
  52. {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev10}/src/upgini/spinner.py +0 -0
  53. {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev10}/src/upgini/utils/Roboto-Regular.ttf +0 -0
  54. {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev10}/src/upgini/utils/__init__.py +0 -0
  55. {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev10}/src/upgini/utils/base_search_key_detector.py +0 -0
  56. {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev10}/src/upgini/utils/blocked_time_series.py +0 -0
  57. {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev10}/src/upgini/utils/country_utils.py +0 -0
  58. {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev10}/src/upgini/utils/custom_loss_utils.py +0 -0
  59. {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev10}/src/upgini/utils/cv_utils.py +0 -0
  60. {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev10}/src/upgini/utils/datetime_utils.py +0 -0
  61. {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev10}/src/upgini/utils/deduplicate_utils.py +0 -0
  62. {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev10}/src/upgini/utils/display_utils.py +0 -0
  63. {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev10}/src/upgini/utils/email_utils.py +0 -0
  64. {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev10}/src/upgini/utils/fallback_progress_bar.py +0 -0
  65. {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev10}/src/upgini/utils/features_validator.py +0 -0
  66. {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev10}/src/upgini/utils/format.py +0 -0
  67. {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev10}/src/upgini/utils/ip_utils.py +0 -0
  68. {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev10}/src/upgini/utils/mstats.py +0 -0
  69. {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev10}/src/upgini/utils/phone_utils.py +0 -0
  70. {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev10}/src/upgini/utils/postal_code_utils.py +0 -0
  71. {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev10}/src/upgini/utils/progress_bar.py +0 -0
  72. {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev10}/src/upgini/utils/sklearn_ext.py +0 -0
  73. {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev10}/src/upgini/utils/sort.py +0 -0
  74. {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev10}/src/upgini/utils/target_utils.py +0 -0
  75. {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev10}/src/upgini/utils/track_info.py +0 -0
  76. {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev10}/src/upgini/utils/ts_utils.py +0 -0
  77. {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev10}/src/upgini/utils/warning_counter.py +0 -0
  78. {upgini-1.2.68a3832.dev8 → upgini-1.2.68a3832.dev10}/src/upgini/version_validator.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: upgini
3
- Version: 1.2.68a3832.dev8
3
+ Version: 1.2.68a3832.dev10
4
4
  Summary: Intelligent data search & enrichment for Machine Learning
5
5
  Project-URL: Bug Reports, https://github.com/upgini/upgini/issues
6
6
  Project-URL: Homepage, https://upgini.com/
@@ -0,0 +1 @@
1
+ __version__ = "1.2.68a3832.dev10"
@@ -127,8 +127,11 @@ LIGHTGBM_MULTICLASS_PARAMS = {
127
127
  "cat_smooth": 12,
128
128
  "learning_rate": 0.25, # CatBoost 0.25
129
129
  "min_sum_hessian_in_leaf": 0.01,
130
- "objective": "softmax",
131
130
  "class_weight": "balanced", # TODO pass dict with weights for each class
131
+ "objective": "multiclass",
132
+ "use_quantized_grad": "true",
133
+ "num_grad_quant_bins": "8",
134
+ "stochastic_rounding": "true",
132
135
  "verbosity": -1,
133
136
  }
134
137
 
@@ -491,30 +494,15 @@ class EstimatorWrapper:
491
494
  }
492
495
  if estimator is None:
493
496
  params = {}
494
- # emb_pattern = r"(.+)_emb\d+"
495
- # emb_features = [c for c in x.columns if re.match(emb_pattern, c) and is_numeric_dtype(x[c])]
496
- # max_bin_by_feature_type = {
497
- # feature: 63 if feature in emb_features else 255 for feature in x.columns
498
- # }
499
- # params["max_bin_by_feature_type"] = max_bin_by_feature_type
500
497
  if target_type == ModelTaskType.MULTICLASS:
501
- # params = _get_add_params(params, CATBOOST_MULTICLASS_PARAMS)
502
- # params = _get_add_params(params, add_params)
503
- # estimator = CatBoostWrapper(CatBoostClassifier(**params), **kwargs)
504
498
  params = _get_add_params(params, LIGHTGBM_MULTICLASS_PARAMS)
505
499
  params = _get_add_params(params, add_params)
506
500
  estimator = LightGBMWrapper(LGBMClassifier(**params), **kwargs)
507
501
  elif target_type == ModelTaskType.BINARY:
508
- # params = _get_add_params(params, CATBOOST_BINARY_PARAMS)
509
- # params = _get_add_params(params, add_params)
510
- # estimator = CatBoostWrapper(CatBoostClassifier(**params), **kwargs)
511
502
  params = _get_add_params(params, LIGHTGBM_BINARY_PARAMS)
512
503
  params = _get_add_params(params, add_params)
513
504
  estimator = LightGBMWrapper(LGBMClassifier(**params), **kwargs)
514
505
  elif target_type == ModelTaskType.REGRESSION:
515
- # params = _get_add_params(params, CATBOOST_REGRESSION_PARAMS)
516
- # params = _get_add_params(params, add_params)
517
- # estimator = CatBoostWrapper(CatBoostRegressor(**params), **kwargs)
518
506
  params = _get_add_params(params, LIGHTGBM_REGRESSION_PARAMS)
519
507
  params = _get_add_params(params, add_params)
520
508
  estimator = LightGBMWrapper(LGBMRegressor(**params), **kwargs)
@@ -527,18 +515,19 @@ class EstimatorWrapper:
527
515
  estimator_copy = deepcopy(estimator)
528
516
  kwargs["estimator"] = estimator_copy
529
517
  if is_catboost_estimator(estimator):
530
- params["has_time"] = has_date
531
518
  if cat_features is not None:
532
519
  for cat_feature in cat_features:
533
520
  if cat_feature not in x.columns:
534
521
  logger.error(
535
522
  f"Client cat_feature `{cat_feature}` not found in x columns: {x.columns.to_list()}"
536
523
  )
537
- estimator_copy.set_params(cat_features=cat_features)
524
+ estimator_copy.set_params(cat_features=cat_features, has_time=has_date)
538
525
  estimator = CatBoostWrapper(**kwargs)
539
526
  else:
540
527
  if isinstance(estimator, (LGBMClassifier, LGBMRegressor)):
541
528
  estimator = LightGBMWrapper(**kwargs)
529
+ elif is_catboost_estimator(estimator):
530
+ estimator = CatBoostWrapper(**kwargs)
542
531
  else:
543
532
  logger.warning(
544
533
  f"Unexpected estimator is used for metrics: {estimator}. "
@@ -765,14 +754,12 @@ class LightGBMWrapper(EstimatorWrapper):
765
754
  self.cat_features = None
766
755
 
767
756
  def _prepare_to_fit(self, x: pd.DataFrame, y: pd.Series) -> Tuple[pd.DataFrame, pd.Series, np.ndarray, dict]:
768
- x, y, groups, params = super()._prepare_to_fit(x, y)
769
- if self.target_type == ModelTaskType.MULTICLASS:
770
- params["num_class"] = y.nunique()
757
+ x, y_numpy, groups, params = super()._prepare_to_fit(x, y)
771
758
  self.cat_features = _get_cat_features(x)
772
759
  x = fill_na_cat_features(x, self.cat_features)
773
760
  for feature in self.cat_features:
774
761
  x[feature] = x[feature].astype("category").cat.codes
775
- if not is_numeric_dtype(y):
762
+ if not is_numeric_dtype(y_numpy):
776
763
  y = correct_string_target(y)
777
764
 
778
765
  return x, y, groups, params
@@ -90,7 +90,8 @@ class FeatureInfo:
90
90
  def _get_feature_sample(feature_meta: FeaturesMetadataV2, data: Optional[pd.DataFrame]) -> str:
91
91
  if data is not None and len(data) > 0 and feature_meta.name in data.columns:
92
92
  if len(data) > 3:
93
- feature_sample = np.random.choice(data[feature_meta.name].dropna().unique(), 3).tolist()
93
+ rand = np.random.RandomState(42)
94
+ feature_sample = rand.choice(data[feature_meta.name].dropna().unique(), 3).tolist()
94
95
  else:
95
96
  feature_sample = data[feature_meta.name].dropna().unique().tolist()
96
97
  if len(feature_sample) > 0 and isinstance(feature_sample[0], float):
@@ -1 +0,0 @@
1
- __version__ = "1.2.68a3832.dev8"