upgini 1.2.71a3832.dev13__py3-none-any.whl → 1.2.72a3659.dev1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
upgini/__about__.py CHANGED
@@ -1 +1 @@
1
- __version__ = "1.2.71a3832.dev13"
1
+ __version__ = "1.2.72a3659.dev1"
upgini/autofe/vector.py CHANGED
@@ -1,4 +1,4 @@
1
- from typing import List, Optional
1
+ from typing import Dict, List, Optional
2
2
 
3
3
  import pandas as pd
4
4
 
@@ -22,3 +22,25 @@ class Sum(PandasOperator, VectorizableMixin):
22
22
 
23
23
  def calculate_vector(self, data: List[pd.Series]) -> pd.Series:
24
24
  return pd.DataFrame(data).T.fillna(0).sum(axis=1)
25
+
26
+
27
+ class OnnxModel(PandasOperator):
28
+ name: str = "onnx"
29
+ is_vector: bool = True
30
+ output_type: Optional[str] = "float"
31
+ model_name: str
32
+
33
+ def get_params(self) -> Dict[str, Optional[str]]:
34
+ res = super().get_params()
35
+ res.update(
36
+ {
37
+ "model_name": self.model_name,
38
+ }
39
+ )
40
+ return res
41
+
42
+ # def load_model(self):
43
+ # ...
44
+
45
+ # def calculate_vector(self, data: List[pd.Series]) -> pd.Series:
46
+ # ...
@@ -3250,8 +3250,7 @@ if response.status_code == 200:
3250
3250
  def _validate_eval_set_pair(self, X: pd.DataFrame, eval_pair: Tuple) -> Tuple[pd.DataFrame, pd.Series]:
3251
3251
  if len(eval_pair) != 2:
3252
3252
  raise ValidationError(self.bundle.get("eval_set_invalid_tuple_size").format(len(eval_pair)))
3253
- eval_X = eval_pair[0]
3254
- eval_y = eval_pair[1]
3253
+ eval_X, eval_y = eval_pair
3255
3254
 
3256
3255
  if _num_samples(eval_X) == 0:
3257
3256
  raise ValidationError(self.bundle.get("eval_x_is_empty"))
upgini/metrics.py CHANGED
@@ -8,13 +8,12 @@ from copy import deepcopy
8
8
  from dataclasses import dataclass
9
9
  from typing import Any, Callable, Dict, List, Optional, Tuple, Union
10
10
 
11
+ import lightgbm as lgb
11
12
  import numpy as np
12
13
  import pandas as pd
13
14
  from lightgbm import LGBMClassifier, LGBMRegressor
14
- import lightgbm as lgb
15
15
  from numpy import log1p
16
16
  from pandas.api.types import is_numeric_dtype
17
- # from sklearn.calibration import LabelEncoder
18
17
  from sklearn.metrics import check_scoring, get_scorer, make_scorer, roc_auc_score
19
18
  from sklearn.preprocessing import OrdinalEncoder
20
19
 
@@ -127,7 +126,7 @@ LIGHTGBM_MULTICLASS_PARAMS = {
127
126
  "max_cat_threshold": 80,
128
127
  "min_data_per_group": 20,
129
128
  "cat_smooth": 18,
130
- "cat_l2" : 8,
129
+ "cat_l2": 8,
131
130
  "objective": "multiclass",
132
131
  "class_weight": "balanced",
133
132
  "use_quantized_grad": "true",
@@ -148,7 +147,7 @@ LIGHTGBM_BINARY_PARAMS = {
148
147
  "max_cat_threshold": 80,
149
148
  "min_data_per_group": 20,
150
149
  "cat_smooth": 18,
151
- "cat_l2" : 8,
150
+ "cat_l2": 8,
152
151
  "verbosity": -1,
153
152
  }
154
153
 
@@ -756,7 +755,6 @@ class LightGBMWrapper(EstimatorWrapper):
756
755
  logger=logger,
757
756
  )
758
757
  self.cat_features = None
759
- # self.cat_features_encoders = dict()
760
758
  self.cat_encoder = None
761
759
  self.n_classes = None
762
760
 
@@ -768,23 +766,13 @@ class LightGBMWrapper(EstimatorWrapper):
768
766
  params["callbacks"] = [lgb.early_stopping(stopping_rounds=LIGHTGBM_EARLY_STOPPING_ROUNDS, verbose=False)]
769
767
  self.cat_features = _get_cat_features(x)
770
768
  if self.cat_features:
771
- params["categorical_feature"] = self.cat_features
772
- # params["categorical_feature"] = [x.columns.get_loc(c) for c in self.cat_features] Works
773
- # params["categorical_feature"] = "notauto"
774
- # params["categorical_feature"] = "name:" + ",".join(self.cat_features) # Doesn't work
775
- # cat_indices = [str(x.columns.get_loc(c)) for c in self.cat_features] Doesn't work
776
- # params["categorical_feature"] = ",".join(cat_indices)
777
- pass
778
769
  x = fill_na_cat_features(x, self.cat_features)
779
770
  encoder = OrdinalEncoder(handle_unknown="use_encoded_value", unknown_value=-1)
780
- encoded = encoder.fit_transform(x[self.cat_features], y_numpy)
771
+ encoded = pd.DataFrame(
772
+ encoder.fit_transform(x[self.cat_features]), columns=self.cat_features, dtype="category"
773
+ )
781
774
  x[self.cat_features] = encoded
782
775
  self.cat_encoder = encoder
783
- # for feature in self.cat_features:
784
- # encoder = OrdinalEncoder(handle_unknown="use_encoded_value", unknown_value=-1)
785
- # x[feature] = encoder.fit_transform(x[feature])
786
- # self.cat_features_encoders[feature] = encoder
787
- # x[feature] = x[feature].astype("category").cat.codes
788
776
  if not is_numeric_dtype(y_numpy):
789
777
  y_numpy = correct_string_target(y_numpy)
790
778
 
@@ -793,19 +781,11 @@ class LightGBMWrapper(EstimatorWrapper):
793
781
  def _prepare_to_calculate(self, x: pd.DataFrame, y: pd.Series) -> Tuple[pd.DataFrame, np.ndarray, dict]:
794
782
  x, y_numpy, params = super()._prepare_to_calculate(x, y)
795
783
  if self.cat_features is not None:
796
- params["categorical_feature"] = self.cat_features
797
- # params["categorical_feature"] = [x.columns.get_loc(c) for c in self.cat_features]
798
- # params["categorical_feature"] = "notauto"
799
- # params["categorical_feature"] = "name:" + ",".join(self.cat_features) # Doesn't work
800
- # cat_indices = [str(x.columns.get_loc(c)) for c in self.cat_features]
801
- # params["categorical_feature"] = ",".join(cat_indices)
802
784
  x = fill_na_cat_features(x, self.cat_features)
803
785
  if self.cat_encoder is not None:
804
- x[self.cat_features] = self.cat_encoder.transform(x[self.cat_features])
805
- # for feature in self.cat_features:
806
- # encoder = self.cat_features_encoders[feature]
807
- # x[feature] = encoder.transform(x[feature])
808
- # x[feature] = x[feature].astype("category").cat.codes
786
+ x[self.cat_features] = pd.DataFrame(
787
+ self.cat_encoder.transform(x[self.cat_features]), columns=self.cat_features, dtype="category"
788
+ )
809
789
  if not is_numeric_dtype(y):
810
790
  y_numpy = correct_string_target(y_numpy)
811
791
  return x, y_numpy, params
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: upgini
3
- Version: 1.2.71a3832.dev13
3
+ Version: 1.2.72a3659.dev1
4
4
  Summary: Intelligent data search & enrichment for Machine Learning
5
5
  Project-URL: Bug Reports, https://github.com/upgini/upgini/issues
6
6
  Project-URL: Homepage, https://upgini.com/
@@ -1,12 +1,12 @@
1
- upgini/__about__.py,sha256=buorll9F2OX4EgV8VmlIrj09nqmsSmqAG8T8p6hRCls,34
1
+ upgini/__about__.py,sha256=n3Di7UqdUYABUquK0tXIme5xiFjO7fpJ3AKGXnT-Jec,33
2
2
  upgini/__init__.py,sha256=LXSfTNU0HnlOkE69VCxkgIKDhWP-JFo_eBQ71OxTr5Y,261
3
3
  upgini/ads.py,sha256=nvuRxRx5MHDMgPr9SiU-fsqRdFaBv8p4_v1oqiysKpc,2714
4
4
  upgini/dataset.py,sha256=aspri7ZAgwkNNUiIgQ1GRXvw8XQii3F4RfNXSrF4wrw,35365
5
5
  upgini/errors.py,sha256=2b_Wbo0OYhLUbrZqdLIx5jBnAsiD1Mcenh-VjR4HCTw,950
6
- upgini/features_enricher.py,sha256=lk80Bx9U36lva6T4lPHBFk88ivrpZ-2uwwMwQg0LglE,207023
6
+ upgini/features_enricher.py,sha256=Li1sPihWVkPUPcma8HRbPFwpCqd9V9d2p5zQUgkpdpU,206998
7
7
  upgini/http.py,sha256=RvzcShpDXssLs6ycGN8xilkKi8ZV9XGUrrk8bwdUzbw,43607
8
8
  upgini/metadata.py,sha256=Yd6iW2f7Wz6vUkg5uvR4xylN16ANnCKVKqAsAkap7p8,12354
9
- upgini/metrics.py,sha256=ot6AhxfRRTzM-dNApWTvmteLBAmGjD9OyAuKmtUTprE,40630
9
+ upgini/metrics.py,sha256=jobZL_Hg7guufDYH2XdanxgbyJTuC9ZAMZodeptE3I4,39177
10
10
  upgini/search_task.py,sha256=EuCGp0iCWz2fpuJgN6M47aP_CtIi3Oq9zw78w0mkKiU,17595
11
11
  upgini/spinner.py,sha256=4iMd-eIe_BnkqFEMIliULTbj6rNI2HkN_VJ4qYe0cUc,1118
12
12
  upgini/version_validator.py,sha256=DvbaAvuYFoJqYt0fitpsk6Xcv-H1BYDJYHUMxaKSH_Y,1509
@@ -21,7 +21,7 @@ upgini/autofe/groupby.py,sha256=IYmQV9uoCdRcpkeWZj_kI3ObzoNCNx3ff3h8sTL01tk,3603
21
21
  upgini/autofe/operator.py,sha256=EOffJw6vKXpEh5yymqb1RFNJPxGxmnHdFRo9dB5SCFo,4969
22
22
  upgini/autofe/unary.py,sha256=yVgPvtfnPSOhrii0YgezddmgWPwyOBCR0JutaIkdTTc,4658
23
23
  upgini/autofe/utils.py,sha256=fK1am2_tQj3fL2vDslblye8lmyfWgGIUOX1beYVBz4k,2420
24
- upgini/autofe/vector.py,sha256=l0KdKg-txlZxDSE4hPPfCtfGQofYbl7oaABPr830sPI,667
24
+ upgini/autofe/vector.py,sha256=-aLI4cA5HI2p42Skj4Sfb3XAPAFfbcu7FjukWsxVFdM,1161
25
25
  upgini/autofe/timeseries/__init__.py,sha256=PGwwDAMwvkXl3el12tXVEmZUgDUvlmIPlXtROm6bD18,738
26
26
  upgini/autofe/timeseries/base.py,sha256=rWJqRuFAzTZEsUdWG5s1Vhif9zzRRmalASXvarufRxI,3610
27
27
  upgini/autofe/timeseries/cross.py,sha256=BTINVwuZSbm_4NKkVm0FGM68SrvZLENZKXN7-UyvhYI,5319
@@ -70,7 +70,7 @@ upgini/utils/target_utils.py,sha256=KNFzJta1SpGU4sp07dHKSeVJlDs_9qgD2wcw5YuJfOc,
70
70
  upgini/utils/track_info.py,sha256=G5Lu1xxakg2_TQjKZk4b5SvrHsATTXNVV3NbvWtT8k8,5663
71
71
  upgini/utils/ts_utils.py,sha256=26vhC0pN7vLXK6R09EEkMK3Lwb9IVPH7LRdqFIQ3kPs,1383
72
72
  upgini/utils/warning_counter.py,sha256=-GRY8EUggEBKODPSuXAkHn9KnEQwAORC0mmz_tim-PM,254
73
- upgini-1.2.71a3832.dev13.dist-info/METADATA,sha256=JdRugxJAMW4KLyRuz7yIX_PqSz_nObynmhkW5-g_lVs,49102
74
- upgini-1.2.71a3832.dev13.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
75
- upgini-1.2.71a3832.dev13.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
76
- upgini-1.2.71a3832.dev13.dist-info/RECORD,,
73
+ upgini-1.2.72a3659.dev1.dist-info/METADATA,sha256=tuv9DtWEtwHVjoIMPK4LKOvrmaQ3suMZS43JeEcEDiY,49101
74
+ upgini-1.2.72a3659.dev1.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
75
+ upgini-1.2.72a3659.dev1.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
76
+ upgini-1.2.72a3659.dev1.dist-info/RECORD,,