upgini 1.2.71a3832.dev13__py3-none-any.whl → 1.2.72a3659.dev1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- upgini/__about__.py +1 -1
- upgini/autofe/vector.py +23 -1
- upgini/features_enricher.py +1 -2
- upgini/metrics.py +9 -29
- {upgini-1.2.71a3832.dev13.dist-info → upgini-1.2.72a3659.dev1.dist-info}/METADATA +1 -1
- {upgini-1.2.71a3832.dev13.dist-info → upgini-1.2.72a3659.dev1.dist-info}/RECORD +8 -8
- {upgini-1.2.71a3832.dev13.dist-info → upgini-1.2.72a3659.dev1.dist-info}/WHEEL +0 -0
- {upgini-1.2.71a3832.dev13.dist-info → upgini-1.2.72a3659.dev1.dist-info}/licenses/LICENSE +0 -0
upgini/__about__.py
CHANGED
@@ -1 +1 @@
|
|
1
|
-
__version__ = "1.2.
|
1
|
+
__version__ = "1.2.72a3659.dev1"
|
upgini/autofe/vector.py
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
from typing import List, Optional
|
1
|
+
from typing import Dict, List, Optional
|
2
2
|
|
3
3
|
import pandas as pd
|
4
4
|
|
@@ -22,3 +22,25 @@ class Sum(PandasOperator, VectorizableMixin):
|
|
22
22
|
|
23
23
|
def calculate_vector(self, data: List[pd.Series]) -> pd.Series:
|
24
24
|
return pd.DataFrame(data).T.fillna(0).sum(axis=1)
|
25
|
+
|
26
|
+
|
27
|
+
class OnnxModel(PandasOperator):
|
28
|
+
name: str = "onnx"
|
29
|
+
is_vector: bool = True
|
30
|
+
output_type: Optional[str] = "float"
|
31
|
+
model_name: str
|
32
|
+
|
33
|
+
def get_params(self) -> Dict[str, Optional[str]]:
|
34
|
+
res = super().get_params()
|
35
|
+
res.update(
|
36
|
+
{
|
37
|
+
"model_name": self.model_name,
|
38
|
+
}
|
39
|
+
)
|
40
|
+
return res
|
41
|
+
|
42
|
+
# def load_model(self):
|
43
|
+
# ...
|
44
|
+
|
45
|
+
# def calculate_vector(self, data: List[pd.Series]) -> pd.Series:
|
46
|
+
# ...
|
upgini/features_enricher.py
CHANGED
@@ -3250,8 +3250,7 @@ if response.status_code == 200:
|
|
3250
3250
|
def _validate_eval_set_pair(self, X: pd.DataFrame, eval_pair: Tuple) -> Tuple[pd.DataFrame, pd.Series]:
|
3251
3251
|
if len(eval_pair) != 2:
|
3252
3252
|
raise ValidationError(self.bundle.get("eval_set_invalid_tuple_size").format(len(eval_pair)))
|
3253
|
-
eval_X = eval_pair
|
3254
|
-
eval_y = eval_pair[1]
|
3253
|
+
eval_X, eval_y = eval_pair
|
3255
3254
|
|
3256
3255
|
if _num_samples(eval_X) == 0:
|
3257
3256
|
raise ValidationError(self.bundle.get("eval_x_is_empty"))
|
upgini/metrics.py
CHANGED
@@ -8,13 +8,12 @@ from copy import deepcopy
|
|
8
8
|
from dataclasses import dataclass
|
9
9
|
from typing import Any, Callable, Dict, List, Optional, Tuple, Union
|
10
10
|
|
11
|
+
import lightgbm as lgb
|
11
12
|
import numpy as np
|
12
13
|
import pandas as pd
|
13
14
|
from lightgbm import LGBMClassifier, LGBMRegressor
|
14
|
-
import lightgbm as lgb
|
15
15
|
from numpy import log1p
|
16
16
|
from pandas.api.types import is_numeric_dtype
|
17
|
-
# from sklearn.calibration import LabelEncoder
|
18
17
|
from sklearn.metrics import check_scoring, get_scorer, make_scorer, roc_auc_score
|
19
18
|
from sklearn.preprocessing import OrdinalEncoder
|
20
19
|
|
@@ -127,7 +126,7 @@ LIGHTGBM_MULTICLASS_PARAMS = {
|
|
127
126
|
"max_cat_threshold": 80,
|
128
127
|
"min_data_per_group": 20,
|
129
128
|
"cat_smooth": 18,
|
130
|
-
"cat_l2"
|
129
|
+
"cat_l2": 8,
|
131
130
|
"objective": "multiclass",
|
132
131
|
"class_weight": "balanced",
|
133
132
|
"use_quantized_grad": "true",
|
@@ -148,7 +147,7 @@ LIGHTGBM_BINARY_PARAMS = {
|
|
148
147
|
"max_cat_threshold": 80,
|
149
148
|
"min_data_per_group": 20,
|
150
149
|
"cat_smooth": 18,
|
151
|
-
"cat_l2"
|
150
|
+
"cat_l2": 8,
|
152
151
|
"verbosity": -1,
|
153
152
|
}
|
154
153
|
|
@@ -756,7 +755,6 @@ class LightGBMWrapper(EstimatorWrapper):
|
|
756
755
|
logger=logger,
|
757
756
|
)
|
758
757
|
self.cat_features = None
|
759
|
-
# self.cat_features_encoders = dict()
|
760
758
|
self.cat_encoder = None
|
761
759
|
self.n_classes = None
|
762
760
|
|
@@ -768,23 +766,13 @@ class LightGBMWrapper(EstimatorWrapper):
|
|
768
766
|
params["callbacks"] = [lgb.early_stopping(stopping_rounds=LIGHTGBM_EARLY_STOPPING_ROUNDS, verbose=False)]
|
769
767
|
self.cat_features = _get_cat_features(x)
|
770
768
|
if self.cat_features:
|
771
|
-
params["categorical_feature"] = self.cat_features
|
772
|
-
# params["categorical_feature"] = [x.columns.get_loc(c) for c in self.cat_features] Works
|
773
|
-
# params["categorical_feature"] = "notauto"
|
774
|
-
# params["categorical_feature"] = "name:" + ",".join(self.cat_features) # Doesn't work
|
775
|
-
# cat_indices = [str(x.columns.get_loc(c)) for c in self.cat_features] Doesn't work
|
776
|
-
# params["categorical_feature"] = ",".join(cat_indices)
|
777
|
-
pass
|
778
769
|
x = fill_na_cat_features(x, self.cat_features)
|
779
770
|
encoder = OrdinalEncoder(handle_unknown="use_encoded_value", unknown_value=-1)
|
780
|
-
encoded =
|
771
|
+
encoded = pd.DataFrame(
|
772
|
+
encoder.fit_transform(x[self.cat_features]), columns=self.cat_features, dtype="category"
|
773
|
+
)
|
781
774
|
x[self.cat_features] = encoded
|
782
775
|
self.cat_encoder = encoder
|
783
|
-
# for feature in self.cat_features:
|
784
|
-
# encoder = OrdinalEncoder(handle_unknown="use_encoded_value", unknown_value=-1)
|
785
|
-
# x[feature] = encoder.fit_transform(x[feature])
|
786
|
-
# self.cat_features_encoders[feature] = encoder
|
787
|
-
# x[feature] = x[feature].astype("category").cat.codes
|
788
776
|
if not is_numeric_dtype(y_numpy):
|
789
777
|
y_numpy = correct_string_target(y_numpy)
|
790
778
|
|
@@ -793,19 +781,11 @@ class LightGBMWrapper(EstimatorWrapper):
|
|
793
781
|
def _prepare_to_calculate(self, x: pd.DataFrame, y: pd.Series) -> Tuple[pd.DataFrame, np.ndarray, dict]:
|
794
782
|
x, y_numpy, params = super()._prepare_to_calculate(x, y)
|
795
783
|
if self.cat_features is not None:
|
796
|
-
params["categorical_feature"] = self.cat_features
|
797
|
-
# params["categorical_feature"] = [x.columns.get_loc(c) for c in self.cat_features]
|
798
|
-
# params["categorical_feature"] = "notauto"
|
799
|
-
# params["categorical_feature"] = "name:" + ",".join(self.cat_features) # Doesn't work
|
800
|
-
# cat_indices = [str(x.columns.get_loc(c)) for c in self.cat_features]
|
801
|
-
# params["categorical_feature"] = ",".join(cat_indices)
|
802
784
|
x = fill_na_cat_features(x, self.cat_features)
|
803
785
|
if self.cat_encoder is not None:
|
804
|
-
x[self.cat_features] =
|
805
|
-
|
806
|
-
|
807
|
-
# x[feature] = encoder.transform(x[feature])
|
808
|
-
# x[feature] = x[feature].astype("category").cat.codes
|
786
|
+
x[self.cat_features] = pd.DataFrame(
|
787
|
+
self.cat_encoder.transform(x[self.cat_features]), columns=self.cat_features, dtype="category"
|
788
|
+
)
|
809
789
|
if not is_numeric_dtype(y):
|
810
790
|
y_numpy = correct_string_target(y_numpy)
|
811
791
|
return x, y_numpy, params
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.3
|
2
2
|
Name: upgini
|
3
|
-
Version: 1.2.
|
3
|
+
Version: 1.2.72a3659.dev1
|
4
4
|
Summary: Intelligent data search & enrichment for Machine Learning
|
5
5
|
Project-URL: Bug Reports, https://github.com/upgini/upgini/issues
|
6
6
|
Project-URL: Homepage, https://upgini.com/
|
@@ -1,12 +1,12 @@
|
|
1
|
-
upgini/__about__.py,sha256=
|
1
|
+
upgini/__about__.py,sha256=n3Di7UqdUYABUquK0tXIme5xiFjO7fpJ3AKGXnT-Jec,33
|
2
2
|
upgini/__init__.py,sha256=LXSfTNU0HnlOkE69VCxkgIKDhWP-JFo_eBQ71OxTr5Y,261
|
3
3
|
upgini/ads.py,sha256=nvuRxRx5MHDMgPr9SiU-fsqRdFaBv8p4_v1oqiysKpc,2714
|
4
4
|
upgini/dataset.py,sha256=aspri7ZAgwkNNUiIgQ1GRXvw8XQii3F4RfNXSrF4wrw,35365
|
5
5
|
upgini/errors.py,sha256=2b_Wbo0OYhLUbrZqdLIx5jBnAsiD1Mcenh-VjR4HCTw,950
|
6
|
-
upgini/features_enricher.py,sha256=
|
6
|
+
upgini/features_enricher.py,sha256=Li1sPihWVkPUPcma8HRbPFwpCqd9V9d2p5zQUgkpdpU,206998
|
7
7
|
upgini/http.py,sha256=RvzcShpDXssLs6ycGN8xilkKi8ZV9XGUrrk8bwdUzbw,43607
|
8
8
|
upgini/metadata.py,sha256=Yd6iW2f7Wz6vUkg5uvR4xylN16ANnCKVKqAsAkap7p8,12354
|
9
|
-
upgini/metrics.py,sha256=
|
9
|
+
upgini/metrics.py,sha256=jobZL_Hg7guufDYH2XdanxgbyJTuC9ZAMZodeptE3I4,39177
|
10
10
|
upgini/search_task.py,sha256=EuCGp0iCWz2fpuJgN6M47aP_CtIi3Oq9zw78w0mkKiU,17595
|
11
11
|
upgini/spinner.py,sha256=4iMd-eIe_BnkqFEMIliULTbj6rNI2HkN_VJ4qYe0cUc,1118
|
12
12
|
upgini/version_validator.py,sha256=DvbaAvuYFoJqYt0fitpsk6Xcv-H1BYDJYHUMxaKSH_Y,1509
|
@@ -21,7 +21,7 @@ upgini/autofe/groupby.py,sha256=IYmQV9uoCdRcpkeWZj_kI3ObzoNCNx3ff3h8sTL01tk,3603
|
|
21
21
|
upgini/autofe/operator.py,sha256=EOffJw6vKXpEh5yymqb1RFNJPxGxmnHdFRo9dB5SCFo,4969
|
22
22
|
upgini/autofe/unary.py,sha256=yVgPvtfnPSOhrii0YgezddmgWPwyOBCR0JutaIkdTTc,4658
|
23
23
|
upgini/autofe/utils.py,sha256=fK1am2_tQj3fL2vDslblye8lmyfWgGIUOX1beYVBz4k,2420
|
24
|
-
upgini/autofe/vector.py,sha256
|
24
|
+
upgini/autofe/vector.py,sha256=-aLI4cA5HI2p42Skj4Sfb3XAPAFfbcu7FjukWsxVFdM,1161
|
25
25
|
upgini/autofe/timeseries/__init__.py,sha256=PGwwDAMwvkXl3el12tXVEmZUgDUvlmIPlXtROm6bD18,738
|
26
26
|
upgini/autofe/timeseries/base.py,sha256=rWJqRuFAzTZEsUdWG5s1Vhif9zzRRmalASXvarufRxI,3610
|
27
27
|
upgini/autofe/timeseries/cross.py,sha256=BTINVwuZSbm_4NKkVm0FGM68SrvZLENZKXN7-UyvhYI,5319
|
@@ -70,7 +70,7 @@ upgini/utils/target_utils.py,sha256=KNFzJta1SpGU4sp07dHKSeVJlDs_9qgD2wcw5YuJfOc,
|
|
70
70
|
upgini/utils/track_info.py,sha256=G5Lu1xxakg2_TQjKZk4b5SvrHsATTXNVV3NbvWtT8k8,5663
|
71
71
|
upgini/utils/ts_utils.py,sha256=26vhC0pN7vLXK6R09EEkMK3Lwb9IVPH7LRdqFIQ3kPs,1383
|
72
72
|
upgini/utils/warning_counter.py,sha256=-GRY8EUggEBKODPSuXAkHn9KnEQwAORC0mmz_tim-PM,254
|
73
|
-
upgini-1.2.
|
74
|
-
upgini-1.2.
|
75
|
-
upgini-1.2.
|
76
|
-
upgini-1.2.
|
73
|
+
upgini-1.2.72a3659.dev1.dist-info/METADATA,sha256=tuv9DtWEtwHVjoIMPK4LKOvrmaQ3suMZS43JeEcEDiY,49101
|
74
|
+
upgini-1.2.72a3659.dev1.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
|
75
|
+
upgini-1.2.72a3659.dev1.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
|
76
|
+
upgini-1.2.72a3659.dev1.dist-info/RECORD,,
|
File without changes
|
File without changes
|