upgini 1.2.93__py3-none-any.whl → 1.2.95__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of upgini might be problematic. Click here for more details.
- upgini/__about__.py +1 -1
- upgini/features_enricher.py +5 -8
- upgini/metrics.py +8 -1
- {upgini-1.2.93.dist-info → upgini-1.2.95.dist-info}/METADATA +3 -3
- {upgini-1.2.93.dist-info → upgini-1.2.95.dist-info}/RECORD +7 -7
- {upgini-1.2.93.dist-info → upgini-1.2.95.dist-info}/WHEEL +0 -0
- {upgini-1.2.93.dist-info → upgini-1.2.95.dist-info}/licenses/LICENSE +0 -0
upgini/__about__.py
CHANGED
@@ -1 +1 @@
|
|
1
|
-
__version__ = "1.2.
|
1
|
+
__version__ = "1.2.95"
|
upgini/features_enricher.py
CHANGED
@@ -120,10 +120,7 @@ except Exception:
|
|
120
120
|
|
121
121
|
from upgini.utils.sample_utils import SampleColumns, SampleConfig, _num_samples, sample
|
122
122
|
from upgini.utils.sort import sort_columns
|
123
|
-
from upgini.utils.target_utils import
|
124
|
-
calculate_psi,
|
125
|
-
define_task,
|
126
|
-
)
|
123
|
+
from upgini.utils.target_utils import calculate_psi, define_task
|
127
124
|
from upgini.utils.warning_counter import WarningCounter
|
128
125
|
from upgini.version_validator import validate_version
|
129
126
|
|
@@ -227,7 +224,7 @@ class FeaturesEnricher(TransformerMixin):
|
|
227
224
|
random_state: int = 42,
|
228
225
|
cv: Optional[CVType] = None,
|
229
226
|
loss: Optional[str] = None,
|
230
|
-
|
227
|
+
autodetect_search_keys: bool = True,
|
231
228
|
generate_features: Optional[List[str]] = None,
|
232
229
|
columns_for_online_api: Optional[List[str]] = None,
|
233
230
|
round_embeddings: Optional[int] = None,
|
@@ -336,7 +333,7 @@ class FeaturesEnricher(TransformerMixin):
|
|
336
333
|
self.runtime_parameters.properties["feature_generation_params.hash_index"] = True
|
337
334
|
self.date_format = date_format
|
338
335
|
self.random_state = random_state
|
339
|
-
self.
|
336
|
+
self.autodetect_search_keys = autodetect_search_keys
|
340
337
|
self.cv = cv
|
341
338
|
if cv is not None:
|
342
339
|
self.runtime_parameters.properties["cv_type"] = cv.name
|
@@ -3630,7 +3627,7 @@ if response.status_code == 200:
|
|
3630
3627
|
f"Random state: {self.random_state}\n"
|
3631
3628
|
f"Generate features: {self.generate_features}\n"
|
3632
3629
|
f"Round embeddings: {self.round_embeddings}\n"
|
3633
|
-
f"Detect missing search keys: {self.
|
3630
|
+
f"Detect missing search keys: {self.autodetect_search_keys}\n"
|
3634
3631
|
f"Exclude columns: {self.exclude_columns}\n"
|
3635
3632
|
f"Exclude features sources: {exclude_features_sources}\n"
|
3636
3633
|
f"Calculate metrics: {calculate_metrics}\n"
|
@@ -4342,7 +4339,7 @@ if response.status_code == 200:
|
|
4342
4339
|
):
|
4343
4340
|
raise ValidationError(self.bundle.get("empty_search_key").format(column_name))
|
4344
4341
|
|
4345
|
-
if self.
|
4342
|
+
if self.autodetect_search_keys and (
|
4346
4343
|
not is_transform or set(valid_search_keys.values()) != set(self.fit_search_keys.values())
|
4347
4344
|
):
|
4348
4345
|
valid_search_keys = self.__detect_missing_search_keys(
|
upgini/metrics.py
CHANGED
@@ -1037,14 +1037,21 @@ def _get_scorer_by_name(scoring: str) -> Tuple[Callable, str, int]:
|
|
1037
1037
|
metric_name = scoring
|
1038
1038
|
multiplier = 1
|
1039
1039
|
if metric_name == "mean_squared_log_error" or metric_name == "MSLE" or metric_name == "msle":
|
1040
|
+
metric_name = "MSLE"
|
1040
1041
|
scoring = make_scorer(_ext_mean_squared_log_error, greater_is_better=False)
|
1041
1042
|
multiplier = -1
|
1042
1043
|
elif "root_mean_squared_log_error" in metric_name or metric_name == "RMSLE" or metric_name == "rmsle":
|
1044
|
+
metric_name = "RMSLE"
|
1043
1045
|
scoring = make_scorer(_ext_root_mean_squared_log_error, greater_is_better=False)
|
1044
1046
|
multiplier = -1
|
1045
1047
|
elif metric_name == "root_mean_squared_error" or metric_name == "RMSE" or metric_name == "rmse":
|
1048
|
+
metric_name = "RMSE"
|
1046
1049
|
scoring = get_scorer("neg_root_mean_squared_error")
|
1047
1050
|
multiplier = -1
|
1051
|
+
elif metric_name == "mean_absolute_percentage_error" or metric_name == "MAPE" or metric_name == "mape":
|
1052
|
+
metric_name = "MAPE"
|
1053
|
+
scoring = get_scorer("neg_mean_absolute_percentage_error")
|
1054
|
+
multiplier = -1
|
1048
1055
|
elif scoring in available_scorers:
|
1049
1056
|
scoring = get_scorer(scoring)
|
1050
1057
|
elif ("neg_" + scoring) in available_scorers:
|
@@ -1078,7 +1085,7 @@ def define_scorer(target_type: ModelTaskType, scoring: Union[Callable, str, None
|
|
1078
1085
|
elif target_type == ModelTaskType.MULTICLASS:
|
1079
1086
|
scoring = "accuracy"
|
1080
1087
|
elif target_type == ModelTaskType.REGRESSION:
|
1081
|
-
scoring = "
|
1088
|
+
scoring = "MAPE"
|
1082
1089
|
else:
|
1083
1090
|
raise Exception(bundle.get("metrics_unsupported_target_type").format(target_type))
|
1084
1091
|
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.3
|
2
2
|
Name: upgini
|
3
|
-
Version: 1.2.
|
3
|
+
Version: 1.2.95
|
4
4
|
Summary: Intelligent data search & enrichment for Machine Learning
|
5
5
|
Project-URL: Bug Reports, https://github.com/upgini/upgini/issues
|
6
6
|
Project-URL: Homepage, https://upgini.com/
|
@@ -786,12 +786,12 @@ enricher.transform(X, exclude_features_sources=(trial_features + paid_features))
|
|
786
786
|
|
787
787
|
### Turn off autodetection for search key columns
|
788
788
|
Upgini has autodetection of search keys on by default.
|
789
|
-
To turn off use `
|
789
|
+
To turn off use `autodetect_search_keys=False`:
|
790
790
|
|
791
791
|
```python
|
792
792
|
enricher = FeaturesEnricher(
|
793
793
|
search_keys={"date": SearchKey.DATE},
|
794
|
-
|
794
|
+
autodetect_search_keys=False,
|
795
795
|
)
|
796
796
|
|
797
797
|
enricher.fit(X, y)
|
@@ -1,12 +1,12 @@
|
|
1
|
-
upgini/__about__.py,sha256=
|
1
|
+
upgini/__about__.py,sha256=LCX0tFxJjMTxVwJ1UGPIrmKPhm54wP14HUlxerVcKdo,23
|
2
2
|
upgini/__init__.py,sha256=LXSfTNU0HnlOkE69VCxkgIKDhWP-JFo_eBQ71OxTr5Y,261
|
3
3
|
upgini/ads.py,sha256=nvuRxRx5MHDMgPr9SiU-fsqRdFaBv8p4_v1oqiysKpc,2714
|
4
4
|
upgini/dataset.py,sha256=e6JDYTZ2AwC5aF-dqclKZKkiKrHo2f6cFmMQO2ZZmjM,32724
|
5
5
|
upgini/errors.py,sha256=2b_Wbo0OYhLUbrZqdLIx5jBnAsiD1Mcenh-VjR4HCTw,950
|
6
|
-
upgini/features_enricher.py,sha256=
|
6
|
+
upgini/features_enricher.py,sha256=DFBA-3_yZSDcvJnfZjPCvNFFSC8OZwDl992-dlathm0,218432
|
7
7
|
upgini/http.py,sha256=4i7fQwrwU3WzDUOWzrgR-4C8eJwj_5dBwRAR-UjUtlc,44345
|
8
8
|
upgini/metadata.py,sha256=vsbbHyPCP3Rs8WkeDgQg99uAA_zmsbDStAT-NwDYhO4,12455
|
9
|
-
upgini/metrics.py,sha256=
|
9
|
+
upgini/metrics.py,sha256=UbKEsHB7XDzoyGNqDx846zbh1t65GpqdnnhViccdoKU,45615
|
10
10
|
upgini/search_task.py,sha256=Q5HjBpLIB3OCxAD1zNv5yQ3ZNJx696WCK_-H35_y7Rs,17912
|
11
11
|
upgini/spinner.py,sha256=4iMd-eIe_BnkqFEMIliULTbj6rNI2HkN_VJ4qYe0cUc,1118
|
12
12
|
upgini/version_validator.py,sha256=DvbaAvuYFoJqYt0fitpsk6Xcv-H1BYDJYHUMxaKSH_Y,1509
|
@@ -71,7 +71,7 @@ upgini/utils/target_utils.py,sha256=i3Xt5l9ybB2_nF_ma5cfPuL3OeFTs2dY2xDI0p4Azpg,
|
|
71
71
|
upgini/utils/track_info.py,sha256=G5Lu1xxakg2_TQjKZk4b5SvrHsATTXNVV3NbvWtT8k8,5663
|
72
72
|
upgini/utils/ts_utils.py,sha256=26vhC0pN7vLXK6R09EEkMK3Lwb9IVPH7LRdqFIQ3kPs,1383
|
73
73
|
upgini/utils/warning_counter.py,sha256=-GRY8EUggEBKODPSuXAkHn9KnEQwAORC0mmz_tim-PM,254
|
74
|
-
upgini-1.2.
|
75
|
-
upgini-1.2.
|
76
|
-
upgini-1.2.
|
77
|
-
upgini-1.2.
|
74
|
+
upgini-1.2.95.dist-info/METADATA,sha256=vWKd6YcI1dhF0RgGXYVclvvWRPq34nAEkBjHhKfGoEg,49528
|
75
|
+
upgini-1.2.95.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
|
76
|
+
upgini-1.2.95.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
|
77
|
+
upgini-1.2.95.dist-info/RECORD,,
|
File without changes
|
File without changes
|