upgini 1.2.72a3659.dev1__tar.gz → 1.2.73__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. {upgini-1.2.72a3659.dev1 → upgini-1.2.73}/PKG-INFO +1 -1
  2. upgini-1.2.73/src/upgini/__about__.py +1 -0
  3. {upgini-1.2.72a3659.dev1 → upgini-1.2.73}/src/upgini/autofe/vector.py +1 -23
  4. {upgini-1.2.72a3659.dev1 → upgini-1.2.73}/src/upgini/metrics.py +7 -21
  5. {upgini-1.2.72a3659.dev1 → upgini-1.2.73}/src/upgini/utils/target_utils.py +2 -2
  6. upgini-1.2.72a3659.dev1/src/upgini/__about__.py +0 -1
  7. {upgini-1.2.72a3659.dev1 → upgini-1.2.73}/.gitignore +0 -0
  8. {upgini-1.2.72a3659.dev1 → upgini-1.2.73}/LICENSE +0 -0
  9. {upgini-1.2.72a3659.dev1 → upgini-1.2.73}/README.md +0 -0
  10. {upgini-1.2.72a3659.dev1 → upgini-1.2.73}/pyproject.toml +0 -0
  11. {upgini-1.2.72a3659.dev1 → upgini-1.2.73}/src/upgini/__init__.py +0 -0
  12. {upgini-1.2.72a3659.dev1 → upgini-1.2.73}/src/upgini/ads.py +0 -0
  13. {upgini-1.2.72a3659.dev1 → upgini-1.2.73}/src/upgini/ads_management/__init__.py +0 -0
  14. {upgini-1.2.72a3659.dev1 → upgini-1.2.73}/src/upgini/ads_management/ads_manager.py +0 -0
  15. {upgini-1.2.72a3659.dev1 → upgini-1.2.73}/src/upgini/autofe/__init__.py +0 -0
  16. {upgini-1.2.72a3659.dev1 → upgini-1.2.73}/src/upgini/autofe/all_operators.py +0 -0
  17. {upgini-1.2.72a3659.dev1 → upgini-1.2.73}/src/upgini/autofe/binary.py +0 -0
  18. {upgini-1.2.72a3659.dev1 → upgini-1.2.73}/src/upgini/autofe/date.py +0 -0
  19. {upgini-1.2.72a3659.dev1 → upgini-1.2.73}/src/upgini/autofe/feature.py +0 -0
  20. {upgini-1.2.72a3659.dev1 → upgini-1.2.73}/src/upgini/autofe/groupby.py +0 -0
  21. {upgini-1.2.72a3659.dev1 → upgini-1.2.73}/src/upgini/autofe/operator.py +0 -0
  22. {upgini-1.2.72a3659.dev1 → upgini-1.2.73}/src/upgini/autofe/timeseries/__init__.py +0 -0
  23. {upgini-1.2.72a3659.dev1 → upgini-1.2.73}/src/upgini/autofe/timeseries/base.py +0 -0
  24. {upgini-1.2.72a3659.dev1 → upgini-1.2.73}/src/upgini/autofe/timeseries/cross.py +0 -0
  25. {upgini-1.2.72a3659.dev1 → upgini-1.2.73}/src/upgini/autofe/timeseries/delta.py +0 -0
  26. {upgini-1.2.72a3659.dev1 → upgini-1.2.73}/src/upgini/autofe/timeseries/lag.py +0 -0
  27. {upgini-1.2.72a3659.dev1 → upgini-1.2.73}/src/upgini/autofe/timeseries/roll.py +0 -0
  28. {upgini-1.2.72a3659.dev1 → upgini-1.2.73}/src/upgini/autofe/timeseries/trend.py +0 -0
  29. {upgini-1.2.72a3659.dev1 → upgini-1.2.73}/src/upgini/autofe/timeseries/volatility.py +0 -0
  30. {upgini-1.2.72a3659.dev1 → upgini-1.2.73}/src/upgini/autofe/unary.py +0 -0
  31. {upgini-1.2.72a3659.dev1 → upgini-1.2.73}/src/upgini/autofe/utils.py +0 -0
  32. {upgini-1.2.72a3659.dev1 → upgini-1.2.73}/src/upgini/data_source/__init__.py +0 -0
  33. {upgini-1.2.72a3659.dev1 → upgini-1.2.73}/src/upgini/data_source/data_source_publisher.py +0 -0
  34. {upgini-1.2.72a3659.dev1 → upgini-1.2.73}/src/upgini/dataset.py +0 -0
  35. {upgini-1.2.72a3659.dev1 → upgini-1.2.73}/src/upgini/errors.py +0 -0
  36. {upgini-1.2.72a3659.dev1 → upgini-1.2.73}/src/upgini/features_enricher.py +0 -0
  37. {upgini-1.2.72a3659.dev1 → upgini-1.2.73}/src/upgini/http.py +0 -0
  38. {upgini-1.2.72a3659.dev1 → upgini-1.2.73}/src/upgini/mdc/__init__.py +0 -0
  39. {upgini-1.2.72a3659.dev1 → upgini-1.2.73}/src/upgini/mdc/context.py +0 -0
  40. {upgini-1.2.72a3659.dev1 → upgini-1.2.73}/src/upgini/metadata.py +0 -0
  41. {upgini-1.2.72a3659.dev1 → upgini-1.2.73}/src/upgini/normalizer/__init__.py +0 -0
  42. {upgini-1.2.72a3659.dev1 → upgini-1.2.73}/src/upgini/normalizer/normalize_utils.py +0 -0
  43. {upgini-1.2.72a3659.dev1 → upgini-1.2.73}/src/upgini/resource_bundle/__init__.py +0 -0
  44. {upgini-1.2.72a3659.dev1 → upgini-1.2.73}/src/upgini/resource_bundle/exceptions.py +0 -0
  45. {upgini-1.2.72a3659.dev1 → upgini-1.2.73}/src/upgini/resource_bundle/strings.properties +0 -0
  46. {upgini-1.2.72a3659.dev1 → upgini-1.2.73}/src/upgini/resource_bundle/strings_widget.properties +0 -0
  47. {upgini-1.2.72a3659.dev1 → upgini-1.2.73}/src/upgini/sampler/__init__.py +0 -0
  48. {upgini-1.2.72a3659.dev1 → upgini-1.2.73}/src/upgini/sampler/base.py +0 -0
  49. {upgini-1.2.72a3659.dev1 → upgini-1.2.73}/src/upgini/sampler/random_under_sampler.py +0 -0
  50. {upgini-1.2.72a3659.dev1 → upgini-1.2.73}/src/upgini/sampler/utils.py +0 -0
  51. {upgini-1.2.72a3659.dev1 → upgini-1.2.73}/src/upgini/search_task.py +0 -0
  52. {upgini-1.2.72a3659.dev1 → upgini-1.2.73}/src/upgini/spinner.py +0 -0
  53. {upgini-1.2.72a3659.dev1 → upgini-1.2.73}/src/upgini/utils/Roboto-Regular.ttf +0 -0
  54. {upgini-1.2.72a3659.dev1 → upgini-1.2.73}/src/upgini/utils/__init__.py +0 -0
  55. {upgini-1.2.72a3659.dev1 → upgini-1.2.73}/src/upgini/utils/base_search_key_detector.py +0 -0
  56. {upgini-1.2.72a3659.dev1 → upgini-1.2.73}/src/upgini/utils/blocked_time_series.py +0 -0
  57. {upgini-1.2.72a3659.dev1 → upgini-1.2.73}/src/upgini/utils/country_utils.py +0 -0
  58. {upgini-1.2.72a3659.dev1 → upgini-1.2.73}/src/upgini/utils/custom_loss_utils.py +0 -0
  59. {upgini-1.2.72a3659.dev1 → upgini-1.2.73}/src/upgini/utils/cv_utils.py +0 -0
  60. {upgini-1.2.72a3659.dev1 → upgini-1.2.73}/src/upgini/utils/datetime_utils.py +0 -0
  61. {upgini-1.2.72a3659.dev1 → upgini-1.2.73}/src/upgini/utils/deduplicate_utils.py +0 -0
  62. {upgini-1.2.72a3659.dev1 → upgini-1.2.73}/src/upgini/utils/display_utils.py +0 -0
  63. {upgini-1.2.72a3659.dev1 → upgini-1.2.73}/src/upgini/utils/email_utils.py +0 -0
  64. {upgini-1.2.72a3659.dev1 → upgini-1.2.73}/src/upgini/utils/fallback_progress_bar.py +0 -0
  65. {upgini-1.2.72a3659.dev1 → upgini-1.2.73}/src/upgini/utils/feature_info.py +0 -0
  66. {upgini-1.2.72a3659.dev1 → upgini-1.2.73}/src/upgini/utils/features_validator.py +0 -0
  67. {upgini-1.2.72a3659.dev1 → upgini-1.2.73}/src/upgini/utils/format.py +0 -0
  68. {upgini-1.2.72a3659.dev1 → upgini-1.2.73}/src/upgini/utils/ip_utils.py +0 -0
  69. {upgini-1.2.72a3659.dev1 → upgini-1.2.73}/src/upgini/utils/mstats.py +0 -0
  70. {upgini-1.2.72a3659.dev1 → upgini-1.2.73}/src/upgini/utils/phone_utils.py +0 -0
  71. {upgini-1.2.72a3659.dev1 → upgini-1.2.73}/src/upgini/utils/postal_code_utils.py +0 -0
  72. {upgini-1.2.72a3659.dev1 → upgini-1.2.73}/src/upgini/utils/progress_bar.py +0 -0
  73. {upgini-1.2.72a3659.dev1 → upgini-1.2.73}/src/upgini/utils/sklearn_ext.py +0 -0
  74. {upgini-1.2.72a3659.dev1 → upgini-1.2.73}/src/upgini/utils/sort.py +0 -0
  75. {upgini-1.2.72a3659.dev1 → upgini-1.2.73}/src/upgini/utils/track_info.py +0 -0
  76. {upgini-1.2.72a3659.dev1 → upgini-1.2.73}/src/upgini/utils/ts_utils.py +0 -0
  77. {upgini-1.2.72a3659.dev1 → upgini-1.2.73}/src/upgini/utils/warning_counter.py +0 -0
  78. {upgini-1.2.72a3659.dev1 → upgini-1.2.73}/src/upgini/version_validator.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: upgini
3
- Version: 1.2.72a3659.dev1
3
+ Version: 1.2.73
4
4
  Summary: Intelligent data search & enrichment for Machine Learning
5
5
  Project-URL: Bug Reports, https://github.com/upgini/upgini/issues
6
6
  Project-URL: Homepage, https://upgini.com/
@@ -0,0 +1 @@
1
+ __version__ = "1.2.73"
@@ -1,4 +1,4 @@
1
- from typing import Dict, List, Optional
1
+ from typing import List, Optional
2
2
 
3
3
  import pandas as pd
4
4
 
@@ -22,25 +22,3 @@ class Sum(PandasOperator, VectorizableMixin):
22
22
 
23
23
  def calculate_vector(self, data: List[pd.Series]) -> pd.Series:
24
24
  return pd.DataFrame(data).T.fillna(0).sum(axis=1)
25
-
26
-
27
- class OnnxModel(PandasOperator):
28
- name: str = "onnx"
29
- is_vector: bool = True
30
- output_type: Optional[str] = "float"
31
- model_name: str
32
-
33
- def get_params(self) -> Dict[str, Optional[str]]:
34
- res = super().get_params()
35
- res.update(
36
- {
37
- "model_name": self.model_name,
38
- }
39
- )
40
- return res
41
-
42
- # def load_model(self):
43
- # ...
44
-
45
- # def calculate_vector(self, data: List[pd.Series]) -> pd.Series:
46
- # ...
@@ -19,6 +19,7 @@ from sklearn.preprocessing import OrdinalEncoder
19
19
 
20
20
  from upgini.utils.features_validator import FeaturesValidator
21
21
  from upgini.utils.sklearn_ext import cross_validate
22
+ from upgini.utils.blocked_time_series import BlockedTimeSeriesSplit
22
23
 
23
24
  try:
24
25
  from sklearn.metrics import get_scorer_names
@@ -30,7 +31,7 @@ except ImportError:
30
31
  available_scorers = SCORERS
31
32
  from sklearn.metrics import mean_squared_error
32
33
  from sklearn.metrics._regression import _check_reg_targets, check_consistent_length
33
- from sklearn.model_selection import BaseCrossValidator
34
+ from sklearn.model_selection import BaseCrossValidator, TimeSeriesSplit
34
35
 
35
36
  from upgini.errors import ValidationError
36
37
  from upgini.metadata import ModelTaskType
@@ -84,22 +85,6 @@ CATBOOST_MULTICLASS_PARAMS = {
84
85
  "auto_class_weights": "Balanced",
85
86
  }
86
87
 
87
- LIGHTGBM_PARAMS = {
88
- "random_state": DEFAULT_RANDOM_STATE,
89
- # "num_leaves": 16,
90
- # "n_estimators": 150,
91
- # "min_child_weight": 1,
92
- "max_depth": 4,
93
- "max_cat_threshold": 80,
94
- "min_data_per_group": 25,
95
- "num_boost_round": 150,
96
- "cat_l2": 10,
97
- "cat_smooth": 12,
98
- "learning_rate": 0.05,
99
- "feature_fraction": 1.0,
100
- "min_sum_hessian_in_leaf": 0.01,
101
- }
102
-
103
88
  LIGHTGBM_REGRESSION_PARAMS = {
104
89
  "random_state": DEFAULT_RANDOM_STATE,
105
90
  "deterministic": True,
@@ -128,7 +113,7 @@ LIGHTGBM_MULTICLASS_PARAMS = {
128
113
  "cat_smooth": 18,
129
114
  "cat_l2": 8,
130
115
  "objective": "multiclass",
131
- "class_weight": "balanced",
116
+ # "class_weight": "balanced",
132
117
  "use_quantized_grad": "true",
133
118
  "num_grad_quant_bins": "8",
134
119
  "stochastic_rounding": "true",
@@ -142,7 +127,7 @@ LIGHTGBM_BINARY_PARAMS = {
142
127
  "max_depth": 5,
143
128
  "learning_rate": 0.05,
144
129
  "objective": "binary",
145
- "class_weight": "balanced",
130
+ # "class_weight": "balanced",
146
131
  "deterministic": True,
147
132
  "max_cat_threshold": 80,
148
133
  "min_data_per_group": 20,
@@ -496,7 +481,7 @@ class EstimatorWrapper:
496
481
  "logger": logger,
497
482
  }
498
483
  if estimator is None:
499
- params = {}
484
+ params = {"random_state": DEFAULT_RANDOM_STATE}
500
485
  if target_type == ModelTaskType.MULTICLASS:
501
486
  params = _get_add_params(params, LIGHTGBM_MULTICLASS_PARAMS)
502
487
  params = _get_add_params(params, add_params)
@@ -506,7 +491,8 @@ class EstimatorWrapper:
506
491
  params = _get_add_params(params, add_params)
507
492
  estimator = LightGBMWrapper(LGBMClassifier(**params), **kwargs)
508
493
  elif target_type == ModelTaskType.REGRESSION:
509
- params = _get_add_params(params, LIGHTGBM_REGRESSION_PARAMS)
494
+ if not isinstance(cv, TimeSeriesSplit) and not isinstance(cv, BlockedTimeSeriesSplit):
495
+ params = _get_add_params(params, LIGHTGBM_REGRESSION_PARAMS)
510
496
  params = _get_add_params(params, add_params)
511
497
  estimator = LightGBMWrapper(LGBMRegressor(**params), **kwargs)
512
498
  else:
@@ -297,9 +297,9 @@ def balance_undersample_time_series_trunc(
297
297
  time_unit_threshold: pd.Timedelta = DEFAULT_TIME_UNIT_THRESHOLD,
298
298
  **kwargs,
299
299
  ):
300
- # Convert date column to datetime
301
300
  if id_columns is None:
302
- id_columns = [date_column]
301
+ id_columns = []
302
+ # Convert date column to datetime
303
303
  dates_df = df[id_columns + [date_column]].copy()
304
304
  dates_df[date_column] = pd.to_datetime(dates_df[date_column], unit="ms")
305
305
 
@@ -1 +0,0 @@
1
- __version__ = "1.2.72a3659.dev1"
File without changes
File without changes
File without changes