upgini 1.2.68a3832.dev6__tar.gz → 1.2.68a3832.dev8__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of upgini might be problematic. Click here for more details.

Files changed (78) hide show
  1. {upgini-1.2.68a3832.dev6 → upgini-1.2.68a3832.dev8}/PKG-INFO +1 -1
  2. upgini-1.2.68a3832.dev8/src/upgini/__about__.py +1 -0
  3. {upgini-1.2.68a3832.dev6 → upgini-1.2.68a3832.dev8}/src/upgini/metrics.py +38 -30
  4. upgini-1.2.68a3832.dev6/src/upgini/__about__.py +0 -1
  5. {upgini-1.2.68a3832.dev6 → upgini-1.2.68a3832.dev8}/.gitignore +0 -0
  6. {upgini-1.2.68a3832.dev6 → upgini-1.2.68a3832.dev8}/LICENSE +0 -0
  7. {upgini-1.2.68a3832.dev6 → upgini-1.2.68a3832.dev8}/README.md +0 -0
  8. {upgini-1.2.68a3832.dev6 → upgini-1.2.68a3832.dev8}/pyproject.toml +0 -0
  9. {upgini-1.2.68a3832.dev6 → upgini-1.2.68a3832.dev8}/src/upgini/__init__.py +0 -0
  10. {upgini-1.2.68a3832.dev6 → upgini-1.2.68a3832.dev8}/src/upgini/ads.py +0 -0
  11. {upgini-1.2.68a3832.dev6 → upgini-1.2.68a3832.dev8}/src/upgini/ads_management/__init__.py +0 -0
  12. {upgini-1.2.68a3832.dev6 → upgini-1.2.68a3832.dev8}/src/upgini/ads_management/ads_manager.py +0 -0
  13. {upgini-1.2.68a3832.dev6 → upgini-1.2.68a3832.dev8}/src/upgini/autofe/__init__.py +0 -0
  14. {upgini-1.2.68a3832.dev6 → upgini-1.2.68a3832.dev8}/src/upgini/autofe/all_operators.py +0 -0
  15. {upgini-1.2.68a3832.dev6 → upgini-1.2.68a3832.dev8}/src/upgini/autofe/binary.py +0 -0
  16. {upgini-1.2.68a3832.dev6 → upgini-1.2.68a3832.dev8}/src/upgini/autofe/date.py +0 -0
  17. {upgini-1.2.68a3832.dev6 → upgini-1.2.68a3832.dev8}/src/upgini/autofe/feature.py +0 -0
  18. {upgini-1.2.68a3832.dev6 → upgini-1.2.68a3832.dev8}/src/upgini/autofe/groupby.py +0 -0
  19. {upgini-1.2.68a3832.dev6 → upgini-1.2.68a3832.dev8}/src/upgini/autofe/operator.py +0 -0
  20. {upgini-1.2.68a3832.dev6 → upgini-1.2.68a3832.dev8}/src/upgini/autofe/timeseries/__init__.py +0 -0
  21. {upgini-1.2.68a3832.dev6 → upgini-1.2.68a3832.dev8}/src/upgini/autofe/timeseries/base.py +0 -0
  22. {upgini-1.2.68a3832.dev6 → upgini-1.2.68a3832.dev8}/src/upgini/autofe/timeseries/cross.py +0 -0
  23. {upgini-1.2.68a3832.dev6 → upgini-1.2.68a3832.dev8}/src/upgini/autofe/timeseries/delta.py +0 -0
  24. {upgini-1.2.68a3832.dev6 → upgini-1.2.68a3832.dev8}/src/upgini/autofe/timeseries/lag.py +0 -0
  25. {upgini-1.2.68a3832.dev6 → upgini-1.2.68a3832.dev8}/src/upgini/autofe/timeseries/roll.py +0 -0
  26. {upgini-1.2.68a3832.dev6 → upgini-1.2.68a3832.dev8}/src/upgini/autofe/timeseries/trend.py +0 -0
  27. {upgini-1.2.68a3832.dev6 → upgini-1.2.68a3832.dev8}/src/upgini/autofe/timeseries/volatility.py +0 -0
  28. {upgini-1.2.68a3832.dev6 → upgini-1.2.68a3832.dev8}/src/upgini/autofe/unary.py +0 -0
  29. {upgini-1.2.68a3832.dev6 → upgini-1.2.68a3832.dev8}/src/upgini/autofe/vector.py +0 -0
  30. {upgini-1.2.68a3832.dev6 → upgini-1.2.68a3832.dev8}/src/upgini/data_source/__init__.py +0 -0
  31. {upgini-1.2.68a3832.dev6 → upgini-1.2.68a3832.dev8}/src/upgini/data_source/data_source_publisher.py +0 -0
  32. {upgini-1.2.68a3832.dev6 → upgini-1.2.68a3832.dev8}/src/upgini/dataset.py +0 -0
  33. {upgini-1.2.68a3832.dev6 → upgini-1.2.68a3832.dev8}/src/upgini/errors.py +0 -0
  34. {upgini-1.2.68a3832.dev6 → upgini-1.2.68a3832.dev8}/src/upgini/features_enricher.py +0 -0
  35. {upgini-1.2.68a3832.dev6 → upgini-1.2.68a3832.dev8}/src/upgini/http.py +0 -0
  36. {upgini-1.2.68a3832.dev6 → upgini-1.2.68a3832.dev8}/src/upgini/lazy_import.py +0 -0
  37. {upgini-1.2.68a3832.dev6 → upgini-1.2.68a3832.dev8}/src/upgini/mdc/__init__.py +0 -0
  38. {upgini-1.2.68a3832.dev6 → upgini-1.2.68a3832.dev8}/src/upgini/mdc/context.py +0 -0
  39. {upgini-1.2.68a3832.dev6 → upgini-1.2.68a3832.dev8}/src/upgini/metadata.py +0 -0
  40. {upgini-1.2.68a3832.dev6 → upgini-1.2.68a3832.dev8}/src/upgini/normalizer/__init__.py +0 -0
  41. {upgini-1.2.68a3832.dev6 → upgini-1.2.68a3832.dev8}/src/upgini/normalizer/normalize_utils.py +0 -0
  42. {upgini-1.2.68a3832.dev6 → upgini-1.2.68a3832.dev8}/src/upgini/resource_bundle/__init__.py +0 -0
  43. {upgini-1.2.68a3832.dev6 → upgini-1.2.68a3832.dev8}/src/upgini/resource_bundle/exceptions.py +0 -0
  44. {upgini-1.2.68a3832.dev6 → upgini-1.2.68a3832.dev8}/src/upgini/resource_bundle/strings.properties +0 -0
  45. {upgini-1.2.68a3832.dev6 → upgini-1.2.68a3832.dev8}/src/upgini/resource_bundle/strings_widget.properties +0 -0
  46. {upgini-1.2.68a3832.dev6 → upgini-1.2.68a3832.dev8}/src/upgini/sampler/__init__.py +0 -0
  47. {upgini-1.2.68a3832.dev6 → upgini-1.2.68a3832.dev8}/src/upgini/sampler/base.py +0 -0
  48. {upgini-1.2.68a3832.dev6 → upgini-1.2.68a3832.dev8}/src/upgini/sampler/random_under_sampler.py +0 -0
  49. {upgini-1.2.68a3832.dev6 → upgini-1.2.68a3832.dev8}/src/upgini/sampler/utils.py +0 -0
  50. {upgini-1.2.68a3832.dev6 → upgini-1.2.68a3832.dev8}/src/upgini/search_task.py +0 -0
  51. {upgini-1.2.68a3832.dev6 → upgini-1.2.68a3832.dev8}/src/upgini/spinner.py +0 -0
  52. {upgini-1.2.68a3832.dev6 → upgini-1.2.68a3832.dev8}/src/upgini/utils/Roboto-Regular.ttf +0 -0
  53. {upgini-1.2.68a3832.dev6 → upgini-1.2.68a3832.dev8}/src/upgini/utils/__init__.py +0 -0
  54. {upgini-1.2.68a3832.dev6 → upgini-1.2.68a3832.dev8}/src/upgini/utils/base_search_key_detector.py +0 -0
  55. {upgini-1.2.68a3832.dev6 → upgini-1.2.68a3832.dev8}/src/upgini/utils/blocked_time_series.py +0 -0
  56. {upgini-1.2.68a3832.dev6 → upgini-1.2.68a3832.dev8}/src/upgini/utils/country_utils.py +0 -0
  57. {upgini-1.2.68a3832.dev6 → upgini-1.2.68a3832.dev8}/src/upgini/utils/custom_loss_utils.py +0 -0
  58. {upgini-1.2.68a3832.dev6 → upgini-1.2.68a3832.dev8}/src/upgini/utils/cv_utils.py +0 -0
  59. {upgini-1.2.68a3832.dev6 → upgini-1.2.68a3832.dev8}/src/upgini/utils/datetime_utils.py +0 -0
  60. {upgini-1.2.68a3832.dev6 → upgini-1.2.68a3832.dev8}/src/upgini/utils/deduplicate_utils.py +0 -0
  61. {upgini-1.2.68a3832.dev6 → upgini-1.2.68a3832.dev8}/src/upgini/utils/display_utils.py +0 -0
  62. {upgini-1.2.68a3832.dev6 → upgini-1.2.68a3832.dev8}/src/upgini/utils/email_utils.py +0 -0
  63. {upgini-1.2.68a3832.dev6 → upgini-1.2.68a3832.dev8}/src/upgini/utils/fallback_progress_bar.py +0 -0
  64. {upgini-1.2.68a3832.dev6 → upgini-1.2.68a3832.dev8}/src/upgini/utils/feature_info.py +0 -0
  65. {upgini-1.2.68a3832.dev6 → upgini-1.2.68a3832.dev8}/src/upgini/utils/features_validator.py +0 -0
  66. {upgini-1.2.68a3832.dev6 → upgini-1.2.68a3832.dev8}/src/upgini/utils/format.py +0 -0
  67. {upgini-1.2.68a3832.dev6 → upgini-1.2.68a3832.dev8}/src/upgini/utils/ip_utils.py +0 -0
  68. {upgini-1.2.68a3832.dev6 → upgini-1.2.68a3832.dev8}/src/upgini/utils/mstats.py +0 -0
  69. {upgini-1.2.68a3832.dev6 → upgini-1.2.68a3832.dev8}/src/upgini/utils/phone_utils.py +0 -0
  70. {upgini-1.2.68a3832.dev6 → upgini-1.2.68a3832.dev8}/src/upgini/utils/postal_code_utils.py +0 -0
  71. {upgini-1.2.68a3832.dev6 → upgini-1.2.68a3832.dev8}/src/upgini/utils/progress_bar.py +0 -0
  72. {upgini-1.2.68a3832.dev6 → upgini-1.2.68a3832.dev8}/src/upgini/utils/sklearn_ext.py +0 -0
  73. {upgini-1.2.68a3832.dev6 → upgini-1.2.68a3832.dev8}/src/upgini/utils/sort.py +0 -0
  74. {upgini-1.2.68a3832.dev6 → upgini-1.2.68a3832.dev8}/src/upgini/utils/target_utils.py +0 -0
  75. {upgini-1.2.68a3832.dev6 → upgini-1.2.68a3832.dev8}/src/upgini/utils/track_info.py +0 -0
  76. {upgini-1.2.68a3832.dev6 → upgini-1.2.68a3832.dev8}/src/upgini/utils/ts_utils.py +0 -0
  77. {upgini-1.2.68a3832.dev6 → upgini-1.2.68a3832.dev8}/src/upgini/utils/warning_counter.py +0 -0
  78. {upgini-1.2.68a3832.dev6 → upgini-1.2.68a3832.dev8}/src/upgini/version_validator.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: upgini
3
- Version: 1.2.68a3832.dev6
3
+ Version: 1.2.68a3832.dev8
4
4
  Summary: Intelligent data search & enrichment for Machine Learning
5
5
  Project-URL: Bug Reports, https://github.com/upgini/upgini/issues
6
6
  Project-URL: Homepage, https://upgini.com/
@@ -0,0 +1 @@
1
+ __version__ = "1.2.68a3832.dev8"
@@ -1,11 +1,11 @@
1
1
  from __future__ import annotations
2
2
 
3
- from dataclasses import dataclass
4
3
  import inspect
5
4
  import logging
6
5
  import re
7
6
  from collections import defaultdict
8
7
  from copy import deepcopy
8
+ from dataclasses import dataclass
9
9
  from typing import Any, Callable, Dict, List, Optional, Tuple, Union
10
10
 
11
11
  import numpy as np
@@ -26,11 +26,8 @@ except ImportError:
26
26
  from sklearn.metrics._scorer import SCORERS
27
27
 
28
28
  available_scorers = SCORERS
29
- from sklearn.metrics._regression import (
30
- _check_reg_targets,
31
- check_consistent_length,
32
- )
33
29
  from sklearn.metrics import mean_squared_error
30
+ from sklearn.metrics._regression import _check_reg_targets, check_consistent_length
34
31
  from sklearn.model_selection import BaseCrossValidator
35
32
 
36
33
  from upgini.errors import ValidationError
@@ -102,37 +99,43 @@ LIGHTGBM_PARAMS = {
102
99
  }
103
100
 
104
101
  LIGHTGBM_REGRESSION_PARAMS = {
105
- "random_state": DEFAULT_RANDOM_STATE,
106
- "n_estimators": 275,
107
- "max_depth": 5,
108
- "max_cat_threshold": 80,
109
- "min_data_per_group": 25,
110
- "cat_l2": 10,
111
- "cat_smooth": 12,
112
- "learning_rate": 0.05,
113
- "feature_fraction": 1.0,
114
- "min_sum_hessian_in_leaf": 0.01,
115
- "objective": "huber",
116
- "verbosity": 0,
102
+ "random_state": DEFAULT_RANDOM_STATE,
103
+ "deterministic": True,
104
+ "min_gain_to_split": 0.001,
105
+ "n_estimators": 275,
106
+ "max_depth": 5,
107
+ "max_cat_threshold": 80,
108
+ "min_data_per_group": 25,
109
+ "cat_l2": 10,
110
+ "cat_smooth": 12,
111
+ "learning_rate": 0.05,
112
+ "feature_fraction": 1.0,
113
+ "min_sum_hessian_in_leaf": 0.01,
114
+ "objective": "huber",
115
+ "verbosity": -1,
117
116
  }
118
117
 
119
118
  LIGHTGBM_MULTICLASS_PARAMS = {
120
119
  "random_state": DEFAULT_RANDOM_STATE,
120
+ "deterministic": True,
121
+ "min_gain_to_split": 0.001,
121
122
  "n_estimators": 275,
122
123
  "max_depth": 3,
123
124
  "max_cat_threshold": 80,
124
125
  "min_data_per_group": 25,
125
126
  "cat_l2": 10,
126
127
  "cat_smooth": 12,
127
- "learning_rate": 0.25, # CatBoost 0.25
128
+ "learning_rate": 0.25, # CatBoost 0.25
128
129
  "min_sum_hessian_in_leaf": 0.01,
129
- "objective": "multiclass",
130
- "class_weight": "balanced",
131
- "verbosity": 0,
130
+ "objective": "softmax",
131
+ "class_weight": "balanced", # TODO pass dict with weights for each class
132
+ "verbosity": -1,
132
133
  }
133
134
 
134
135
  LIGHTGBM_BINARY_PARAMS = {
135
136
  "random_state": DEFAULT_RANDOM_STATE,
137
+ "deterministic": True,
138
+ "min_gain_to_split": 0.001,
136
139
  "n_estimators": 275,
137
140
  "max_depth": 5,
138
141
  "max_cat_threshold": 80,
@@ -143,8 +146,8 @@ LIGHTGBM_BINARY_PARAMS = {
143
146
  "feature_fraction": 1.0,
144
147
  "min_sum_hessian_in_leaf": 0.01,
145
148
  "objective": "binary",
146
- "class_weight": "balanced",
147
- "verbosity": 0,
149
+ "class_weight": "balanced", # TODO pass dict with weights for each class
150
+ "verbosity": -1,
148
151
  }
149
152
 
150
153
  N_FOLDS = 5
@@ -266,6 +269,7 @@ SUPPORTED_CATBOOST_METRICS = {
266
269
  def is_catboost_estimator(estimator):
267
270
  try:
268
271
  from catboost import CatBoostClassifier, CatBoostRegressor
272
+
269
273
  return isinstance(estimator, (CatBoostClassifier, CatBoostRegressor))
270
274
  except ImportError:
271
275
  return False
@@ -487,8 +491,12 @@ class EstimatorWrapper:
487
491
  }
488
492
  if estimator is None:
489
493
  params = {}
490
- # if metric_name.upper() in SUPPORTED_CATBOOST_METRICS:
491
- # params["eval_metric"] = SUPPORTED_CATBOOST_METRICS[metric_name.upper()]
494
+ # emb_pattern = r"(.+)_emb\d+"
495
+ # emb_features = [c for c in x.columns if re.match(emb_pattern, c) and is_numeric_dtype(x[c])]
496
+ # max_bin_by_feature_type = {
497
+ # feature: 63 if feature in emb_features else 255 for feature in x.columns
498
+ # }
499
+ # params["max_bin_by_feature_type"] = max_bin_by_feature_type
492
500
  if target_type == ModelTaskType.MULTICLASS:
493
501
  # params = _get_add_params(params, CATBOOST_MULTICLASS_PARAMS)
494
502
  # params = _get_add_params(params, add_params)
@@ -526,9 +534,7 @@ class EstimatorWrapper:
526
534
  logger.error(
527
535
  f"Client cat_feature `{cat_feature}` not found in x columns: {x.columns.to_list()}"
528
536
  )
529
- estimator_copy.set_params(
530
- cat_features=cat_features
531
- )
537
+ estimator_copy.set_params(cat_features=cat_features)
532
538
  estimator = CatBoostWrapper(**kwargs)
533
539
  else:
534
540
  if isinstance(estimator, (LGBMClassifier, LGBMRegressor)):
@@ -576,8 +582,9 @@ class CatBoostWrapper(EstimatorWrapper):
576
582
  x, y, groups, params = super()._prepare_to_fit(x, y)
577
583
 
578
584
  # Find embeddings
579
- from catboost import CatBoostClassifier
580
585
  import catboost
586
+ from catboost import CatBoostClassifier
587
+
581
588
  if hasattr(CatBoostClassifier, "get_embedding_feature_indices"):
582
589
  emb_pattern = r"(.+)_emb\d+"
583
590
  self.emb_features = [c for c in x.columns if re.match(emb_pattern, c) and is_numeric_dtype(x[c])]
@@ -701,6 +708,7 @@ class CatBoostWrapper(EstimatorWrapper):
701
708
  def calculate_shap(self, x: pd.DataFrame, y: pd.Series, estimator) -> Optional[Dict[str, float]]:
702
709
  try:
703
710
  from catboost import Pool
711
+
704
712
  # Create Pool for fold data, if need (for example, when categorical features are present)
705
713
  fold_pool = Pool(
706
714
  x,
@@ -781,8 +789,8 @@ class LightGBMWrapper(EstimatorWrapper):
781
789
 
782
790
  def calculate_shap(self, x: pd.DataFrame, y: pd.Series, estimator) -> Optional[Dict[str, float]]:
783
791
  try:
784
- import shap
785
792
  import lightgbm as lgb
793
+ import shap
786
794
 
787
795
  if not isinstance(estimator, (lgb.LGBMRegressor, lgb.LGBMClassifier)):
788
796
  return None
@@ -1 +0,0 @@
1
- __version__ = "1.2.68a3832.dev6"