upgini 1.2.72a3659.dev1__py3-none-any.whl → 1.2.73__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- upgini/__about__.py +1 -1
- upgini/autofe/vector.py +1 -23
- upgini/metrics.py +7 -21
- upgini/utils/target_utils.py +2 -2
- {upgini-1.2.72a3659.dev1.dist-info → upgini-1.2.73.dist-info}/METADATA +1 -1
- {upgini-1.2.72a3659.dev1.dist-info → upgini-1.2.73.dist-info}/RECORD +8 -8
- {upgini-1.2.72a3659.dev1.dist-info → upgini-1.2.73.dist-info}/WHEEL +0 -0
- {upgini-1.2.72a3659.dev1.dist-info → upgini-1.2.73.dist-info}/licenses/LICENSE +0 -0
upgini/__about__.py
CHANGED
@@ -1 +1 @@
|
|
1
|
-
__version__ = "1.2.
|
1
|
+
__version__ = "1.2.73"
|
upgini/autofe/vector.py
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
from typing import
|
1
|
+
from typing import List, Optional
|
2
2
|
|
3
3
|
import pandas as pd
|
4
4
|
|
@@ -22,25 +22,3 @@ class Sum(PandasOperator, VectorizableMixin):
|
|
22
22
|
|
23
23
|
def calculate_vector(self, data: List[pd.Series]) -> pd.Series:
|
24
24
|
return pd.DataFrame(data).T.fillna(0).sum(axis=1)
|
25
|
-
|
26
|
-
|
27
|
-
class OnnxModel(PandasOperator):
|
28
|
-
name: str = "onnx"
|
29
|
-
is_vector: bool = True
|
30
|
-
output_type: Optional[str] = "float"
|
31
|
-
model_name: str
|
32
|
-
|
33
|
-
def get_params(self) -> Dict[str, Optional[str]]:
|
34
|
-
res = super().get_params()
|
35
|
-
res.update(
|
36
|
-
{
|
37
|
-
"model_name": self.model_name,
|
38
|
-
}
|
39
|
-
)
|
40
|
-
return res
|
41
|
-
|
42
|
-
# def load_model(self):
|
43
|
-
# ...
|
44
|
-
|
45
|
-
# def calculate_vector(self, data: List[pd.Series]) -> pd.Series:
|
46
|
-
# ...
|
upgini/metrics.py
CHANGED
@@ -19,6 +19,7 @@ from sklearn.preprocessing import OrdinalEncoder
|
|
19
19
|
|
20
20
|
from upgini.utils.features_validator import FeaturesValidator
|
21
21
|
from upgini.utils.sklearn_ext import cross_validate
|
22
|
+
from upgini.utils.blocked_time_series import BlockedTimeSeriesSplit
|
22
23
|
|
23
24
|
try:
|
24
25
|
from sklearn.metrics import get_scorer_names
|
@@ -30,7 +31,7 @@ except ImportError:
|
|
30
31
|
available_scorers = SCORERS
|
31
32
|
from sklearn.metrics import mean_squared_error
|
32
33
|
from sklearn.metrics._regression import _check_reg_targets, check_consistent_length
|
33
|
-
from sklearn.model_selection import BaseCrossValidator
|
34
|
+
from sklearn.model_selection import BaseCrossValidator, TimeSeriesSplit
|
34
35
|
|
35
36
|
from upgini.errors import ValidationError
|
36
37
|
from upgini.metadata import ModelTaskType
|
@@ -84,22 +85,6 @@ CATBOOST_MULTICLASS_PARAMS = {
|
|
84
85
|
"auto_class_weights": "Balanced",
|
85
86
|
}
|
86
87
|
|
87
|
-
LIGHTGBM_PARAMS = {
|
88
|
-
"random_state": DEFAULT_RANDOM_STATE,
|
89
|
-
# "num_leaves": 16,
|
90
|
-
# "n_estimators": 150,
|
91
|
-
# "min_child_weight": 1,
|
92
|
-
"max_depth": 4,
|
93
|
-
"max_cat_threshold": 80,
|
94
|
-
"min_data_per_group": 25,
|
95
|
-
"num_boost_round": 150,
|
96
|
-
"cat_l2": 10,
|
97
|
-
"cat_smooth": 12,
|
98
|
-
"learning_rate": 0.05,
|
99
|
-
"feature_fraction": 1.0,
|
100
|
-
"min_sum_hessian_in_leaf": 0.01,
|
101
|
-
}
|
102
|
-
|
103
88
|
LIGHTGBM_REGRESSION_PARAMS = {
|
104
89
|
"random_state": DEFAULT_RANDOM_STATE,
|
105
90
|
"deterministic": True,
|
@@ -128,7 +113,7 @@ LIGHTGBM_MULTICLASS_PARAMS = {
|
|
128
113
|
"cat_smooth": 18,
|
129
114
|
"cat_l2": 8,
|
130
115
|
"objective": "multiclass",
|
131
|
-
"class_weight": "balanced",
|
116
|
+
# "class_weight": "balanced",
|
132
117
|
"use_quantized_grad": "true",
|
133
118
|
"num_grad_quant_bins": "8",
|
134
119
|
"stochastic_rounding": "true",
|
@@ -142,7 +127,7 @@ LIGHTGBM_BINARY_PARAMS = {
|
|
142
127
|
"max_depth": 5,
|
143
128
|
"learning_rate": 0.05,
|
144
129
|
"objective": "binary",
|
145
|
-
"class_weight": "balanced",
|
130
|
+
# "class_weight": "balanced",
|
146
131
|
"deterministic": True,
|
147
132
|
"max_cat_threshold": 80,
|
148
133
|
"min_data_per_group": 20,
|
@@ -496,7 +481,7 @@ class EstimatorWrapper:
|
|
496
481
|
"logger": logger,
|
497
482
|
}
|
498
483
|
if estimator is None:
|
499
|
-
params = {}
|
484
|
+
params = {"random_state": DEFAULT_RANDOM_STATE}
|
500
485
|
if target_type == ModelTaskType.MULTICLASS:
|
501
486
|
params = _get_add_params(params, LIGHTGBM_MULTICLASS_PARAMS)
|
502
487
|
params = _get_add_params(params, add_params)
|
@@ -506,7 +491,8 @@ class EstimatorWrapper:
|
|
506
491
|
params = _get_add_params(params, add_params)
|
507
492
|
estimator = LightGBMWrapper(LGBMClassifier(**params), **kwargs)
|
508
493
|
elif target_type == ModelTaskType.REGRESSION:
|
509
|
-
|
494
|
+
if not isinstance(cv, TimeSeriesSplit) and not isinstance(cv, BlockedTimeSeriesSplit):
|
495
|
+
params = _get_add_params(params, LIGHTGBM_REGRESSION_PARAMS)
|
510
496
|
params = _get_add_params(params, add_params)
|
511
497
|
estimator = LightGBMWrapper(LGBMRegressor(**params), **kwargs)
|
512
498
|
else:
|
upgini/utils/target_utils.py
CHANGED
@@ -297,9 +297,9 @@ def balance_undersample_time_series_trunc(
|
|
297
297
|
time_unit_threshold: pd.Timedelta = DEFAULT_TIME_UNIT_THRESHOLD,
|
298
298
|
**kwargs,
|
299
299
|
):
|
300
|
-
# Convert date column to datetime
|
301
300
|
if id_columns is None:
|
302
|
-
id_columns = [
|
301
|
+
id_columns = []
|
302
|
+
# Convert date column to datetime
|
303
303
|
dates_df = df[id_columns + [date_column]].copy()
|
304
304
|
dates_df[date_column] = pd.to_datetime(dates_df[date_column], unit="ms")
|
305
305
|
|
@@ -1,4 +1,4 @@
|
|
1
|
-
upgini/__about__.py,sha256=
|
1
|
+
upgini/__about__.py,sha256=ysTXj773PyL7hI1RrRY3UUCt20W_VqXKP4K7JDjDZ9A,23
|
2
2
|
upgini/__init__.py,sha256=LXSfTNU0HnlOkE69VCxkgIKDhWP-JFo_eBQ71OxTr5Y,261
|
3
3
|
upgini/ads.py,sha256=nvuRxRx5MHDMgPr9SiU-fsqRdFaBv8p4_v1oqiysKpc,2714
|
4
4
|
upgini/dataset.py,sha256=aspri7ZAgwkNNUiIgQ1GRXvw8XQii3F4RfNXSrF4wrw,35365
|
@@ -6,7 +6,7 @@ upgini/errors.py,sha256=2b_Wbo0OYhLUbrZqdLIx5jBnAsiD1Mcenh-VjR4HCTw,950
|
|
6
6
|
upgini/features_enricher.py,sha256=Li1sPihWVkPUPcma8HRbPFwpCqd9V9d2p5zQUgkpdpU,206998
|
7
7
|
upgini/http.py,sha256=RvzcShpDXssLs6ycGN8xilkKi8ZV9XGUrrk8bwdUzbw,43607
|
8
8
|
upgini/metadata.py,sha256=Yd6iW2f7Wz6vUkg5uvR4xylN16ANnCKVKqAsAkap7p8,12354
|
9
|
-
upgini/metrics.py,sha256=
|
9
|
+
upgini/metrics.py,sha256=UNNA3H7wWATq-lTb9BChDdFc14MOYH9FTWY2Te4OU2o,39024
|
10
10
|
upgini/search_task.py,sha256=EuCGp0iCWz2fpuJgN6M47aP_CtIi3Oq9zw78w0mkKiU,17595
|
11
11
|
upgini/spinner.py,sha256=4iMd-eIe_BnkqFEMIliULTbj6rNI2HkN_VJ4qYe0cUc,1118
|
12
12
|
upgini/version_validator.py,sha256=DvbaAvuYFoJqYt0fitpsk6Xcv-H1BYDJYHUMxaKSH_Y,1509
|
@@ -21,7 +21,7 @@ upgini/autofe/groupby.py,sha256=IYmQV9uoCdRcpkeWZj_kI3ObzoNCNx3ff3h8sTL01tk,3603
|
|
21
21
|
upgini/autofe/operator.py,sha256=EOffJw6vKXpEh5yymqb1RFNJPxGxmnHdFRo9dB5SCFo,4969
|
22
22
|
upgini/autofe/unary.py,sha256=yVgPvtfnPSOhrii0YgezddmgWPwyOBCR0JutaIkdTTc,4658
|
23
23
|
upgini/autofe/utils.py,sha256=fK1am2_tQj3fL2vDslblye8lmyfWgGIUOX1beYVBz4k,2420
|
24
|
-
upgini/autofe/vector.py,sha256
|
24
|
+
upgini/autofe/vector.py,sha256=l0KdKg-txlZxDSE4hPPfCtfGQofYbl7oaABPr830sPI,667
|
25
25
|
upgini/autofe/timeseries/__init__.py,sha256=PGwwDAMwvkXl3el12tXVEmZUgDUvlmIPlXtROm6bD18,738
|
26
26
|
upgini/autofe/timeseries/base.py,sha256=rWJqRuFAzTZEsUdWG5s1Vhif9zzRRmalASXvarufRxI,3610
|
27
27
|
upgini/autofe/timeseries/cross.py,sha256=BTINVwuZSbm_4NKkVm0FGM68SrvZLENZKXN7-UyvhYI,5319
|
@@ -66,11 +66,11 @@ upgini/utils/postal_code_utils.py,sha256=5M0sUqH2DAr33kARWCTXR-ACyzWbjDq_-0mmEml
|
|
66
66
|
upgini/utils/progress_bar.py,sha256=N-Sfdah2Hg8lXP_fV9EfUTXz_PyRt4lo9fAHoUDOoLc,1550
|
67
67
|
upgini/utils/sklearn_ext.py,sha256=HpaNQaKJisgNE7IZ71n7uswxTj7kbPglU2G3s1sORAc,45042
|
68
68
|
upgini/utils/sort.py,sha256=8uuHs2nfSMVnz8GgvbOmgMB1PgEIZP1uhmeRFxcwnYw,7039
|
69
|
-
upgini/utils/target_utils.py,sha256=
|
69
|
+
upgini/utils/target_utils.py,sha256=P0cCVRaakWLydYwFjk3TEaQfr0p0hfsJCvKRD8qcxiE,16650
|
70
70
|
upgini/utils/track_info.py,sha256=G5Lu1xxakg2_TQjKZk4b5SvrHsATTXNVV3NbvWtT8k8,5663
|
71
71
|
upgini/utils/ts_utils.py,sha256=26vhC0pN7vLXK6R09EEkMK3Lwb9IVPH7LRdqFIQ3kPs,1383
|
72
72
|
upgini/utils/warning_counter.py,sha256=-GRY8EUggEBKODPSuXAkHn9KnEQwAORC0mmz_tim-PM,254
|
73
|
-
upgini-1.2.
|
74
|
-
upgini-1.2.
|
75
|
-
upgini-1.2.
|
76
|
-
upgini-1.2.
|
73
|
+
upgini-1.2.73.dist-info/METADATA,sha256=2_n-JWXpk6BuaGu1acn7jhZF7zmBTYR42EKErTjhWX8,49091
|
74
|
+
upgini-1.2.73.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
|
75
|
+
upgini-1.2.73.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
|
76
|
+
upgini-1.2.73.dist-info/RECORD,,
|
File without changes
|
File without changes
|