upgini 1.1.166a2__py3-none-any.whl → 1.1.168__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of upgini might be problematic. Click here for more details.
- upgini/dataset.py +3 -3
- upgini/features_enricher.py +4 -3
- upgini/metrics.py +9 -2
- upgini/resource_bundle/strings.properties +1 -1
- {upgini-1.1.166a2.dist-info → upgini-1.1.168.dist-info}/METADATA +1 -1
- {upgini-1.1.166a2.dist-info → upgini-1.1.168.dist-info}/RECORD +9 -9
- {upgini-1.1.166a2.dist-info → upgini-1.1.168.dist-info}/LICENSE +0 -0
- {upgini-1.1.166a2.dist-info → upgini-1.1.168.dist-info}/WHEEL +0 -0
- {upgini-1.1.166a2.dist-info → upgini-1.1.168.dist-info}/top_level.txt +0 -0
upgini/dataset.py
CHANGED
|
@@ -45,11 +45,11 @@ from upgini.utils.warning_counter import WarningCounter
|
|
|
45
45
|
|
|
46
46
|
class Dataset: # (pd.DataFrame):
|
|
47
47
|
MIN_ROWS_COUNT = 100
|
|
48
|
-
MAX_ROWS =
|
|
48
|
+
MAX_ROWS = 200_000
|
|
49
49
|
FIT_SAMPLE_ROWS = 200_000
|
|
50
50
|
FIT_SAMPLE_THRESHOLD = 200_000
|
|
51
|
-
FIT_SAMPLE_WITH_EVAL_SET_ROWS =
|
|
52
|
-
FIT_SAMPLE_WITH_EVAL_SET_THRESHOLD =
|
|
51
|
+
FIT_SAMPLE_WITH_EVAL_SET_ROWS = 200_000
|
|
52
|
+
FIT_SAMPLE_WITH_EVAL_SET_THRESHOLD = 200_000
|
|
53
53
|
MIN_SAMPLE_THRESHOLD = 20_000
|
|
54
54
|
IMBALANCE_THESHOLD = 0.4
|
|
55
55
|
MIN_TARGET_CLASS_ROWS = 100
|
upgini/features_enricher.py
CHANGED
|
@@ -111,6 +111,7 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
111
111
|
RANDOM_STATE = 42
|
|
112
112
|
CALCULATE_METRICS_THRESHOLD = 50_000_000
|
|
113
113
|
CALCULATE_METRICS_MIN_THRESHOLD = 500
|
|
114
|
+
GENERATE_FEATURES_LIMIT = 10
|
|
114
115
|
EMPTY_FEATURES_INFO = pd.DataFrame(
|
|
115
116
|
columns=[
|
|
116
117
|
bundle.get("features_info_provider"),
|
|
@@ -211,8 +212,8 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
211
212
|
self.generate_features = generate_features
|
|
212
213
|
self.round_embeddings = round_embeddings
|
|
213
214
|
if generate_features is not None:
|
|
214
|
-
if len(generate_features) >
|
|
215
|
-
msg = bundle.get("too_many_generate_features")
|
|
215
|
+
if len(generate_features) > self.GENERATE_FEATURES_LIMIT:
|
|
216
|
+
msg = bundle.get("too_many_generate_features").format(self.GENERATE_FEATURES_LIMIT)
|
|
216
217
|
self.logger.error(msg)
|
|
217
218
|
raise ValidationError(msg)
|
|
218
219
|
self.runtime_parameters.properties["generate_features"] = ",".join(generate_features)
|
|
@@ -1755,7 +1756,7 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
1755
1756
|
and len(self._search_task.unused_features_for_generation) > 0
|
|
1756
1757
|
):
|
|
1757
1758
|
unused_features_for_generation = [
|
|
1758
|
-
dataset.columns_renaming.get(col) for col in self._search_task.unused_features_for_generation
|
|
1759
|
+
dataset.columns_renaming.get(col) or col for col in self._search_task.unused_features_for_generation
|
|
1759
1760
|
]
|
|
1760
1761
|
msg = bundle.get("features_not_generated").format(unused_features_for_generation)
|
|
1761
1762
|
self.logger.warning(msg)
|
upgini/metrics.py
CHANGED
|
@@ -8,7 +8,15 @@ from catboost import CatBoostClassifier, CatBoostRegressor
|
|
|
8
8
|
from lightgbm import LGBMClassifier, LGBMRegressor
|
|
9
9
|
from numpy import log1p
|
|
10
10
|
from pandas.api.types import is_numeric_dtype
|
|
11
|
-
from sklearn.metrics import
|
|
11
|
+
from sklearn.metrics import check_scoring, get_scorer, make_scorer
|
|
12
|
+
|
|
13
|
+
try:
|
|
14
|
+
from sklearn.metrics import get_scorer_names
|
|
15
|
+
available_scorers = get_scorer_names()
|
|
16
|
+
except ImportError:
|
|
17
|
+
from sklearn.metrics._scorer import SCORERS
|
|
18
|
+
available_scorers = SCORERS
|
|
19
|
+
|
|
12
20
|
from sklearn.metrics._regression import (
|
|
13
21
|
_check_reg_targets,
|
|
14
22
|
check_consistent_length,
|
|
@@ -385,7 +393,6 @@ def _get_scorer(target_type: ModelTaskType, scoring: Union[Callable, str, None])
|
|
|
385
393
|
|
|
386
394
|
multiplier = 1
|
|
387
395
|
if isinstance(scoring, str):
|
|
388
|
-
available_scorers = get_scorer_names()
|
|
389
396
|
metric_name = scoring
|
|
390
397
|
if "mean_squared_log_error" == metric_name or "MSLE" == metric_name or "msle" == metric_name:
|
|
391
398
|
scoring = make_scorer(_ext_mean_squared_log_error, greater_is_better=False)
|
|
@@ -59,7 +59,7 @@ no_connection_to_upgini=No connection to Upgini server https://search.upgini.com
|
|
|
59
59
|
no_internet_connection=No internet connection from Jupyter server {} to initiate external data search with Upgini service, please try with Google Colab https://colab.research.google.com
|
|
60
60
|
access_denied=Access denied
|
|
61
61
|
unsupported_search_key=Search key {} not supported
|
|
62
|
-
too_many_generate_features=Too many columns passed in `generate_features` argument. Only
|
|
62
|
+
too_many_generate_features=Too many columns passed in `generate_features` argument. Only {} columns supported to generate features now
|
|
63
63
|
invalid_round_embeddings=Argument `round_embeddings` should be non negative integer
|
|
64
64
|
no_important_features_for_transform=There are no important features for transform. Return input as transformed
|
|
65
65
|
|
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
upgini/__init__.py,sha256=asENHgEVHQBIkV-e_0IhE_ZWqkCG6398U3ZLrNzAH6k,407
|
|
2
2
|
upgini/ads.py,sha256=mre6xn44wcC_fg63iLT_kTh4mViZqR9AKRJZAtpQz8Y,2592
|
|
3
|
-
upgini/dataset.py,sha256=
|
|
3
|
+
upgini/dataset.py,sha256=GMIebWv9FNFnlLs2XwVzKnQQAaU5VqTKIptCzZUAgBg,45054
|
|
4
4
|
upgini/errors.py,sha256=BqpvfhW2jJW5fa5KXj0alhXatGl-WK4xTl309-QNLp8,959
|
|
5
|
-
upgini/features_enricher.py,sha256=
|
|
5
|
+
upgini/features_enricher.py,sha256=ISbksk_4HEGYMQPVbdXz4hujSCEFy4p2ofJZaCaldbU,128529
|
|
6
6
|
upgini/fingerprint.js,sha256=VygVIQlN1v4NGZfjHqtRogOw8zjTnnMNJg_f7M5iGQU,33442
|
|
7
7
|
upgini/http.py,sha256=3gVHdNEYmZfegpImHLN7u5wHoqftqcFEdXxdLonREGE,36326
|
|
8
8
|
upgini/metadata.py,sha256=Oefg-rkA4PsZUHIho_clZcnyZwdtVJ1gXPvEY6oBmpg,5969
|
|
9
|
-
upgini/metrics.py,sha256=
|
|
9
|
+
upgini/metrics.py,sha256=4h6gkfYUdOOJADhMzxGq_yfkF750MTwbcE1xcwEIXAs,19653
|
|
10
10
|
upgini/search_task.py,sha256=-csRukeVPH04f7RLbHHAtxHGOi9_z_2Cf5kIuyTATUY,16434
|
|
11
11
|
upgini/spinner.py,sha256=yhakBaydMNS8E8TRAwTdCMdnWrHeWT0cR1M8c9hP6jA,1157
|
|
12
12
|
upgini/version_validator.py,sha256=rDIncP6BEko4J2F2hUcMOtKm_vZbI4ICWcNcw8hrwM4,1400
|
|
@@ -20,7 +20,7 @@ upgini/normalizer/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU
|
|
|
20
20
|
upgini/normalizer/phone_normalizer.py,sha256=VIgLXuDuzzjPEXiy_LyDVLZKGaS7-le6Fh6T4D-TQDU,9930
|
|
21
21
|
upgini/resource_bundle/__init__.py,sha256=M7GtS7KPQw9pinz8P2aQWXpSkD2YFwUPVGk1w92Pn84,7888
|
|
22
22
|
upgini/resource_bundle/exceptions.py,sha256=KT-OnqA2J4OTfLjhbEl3KFZM2ci7EOPjqJuY_rXp3vs,622
|
|
23
|
-
upgini/resource_bundle/strings.properties,sha256=
|
|
23
|
+
upgini/resource_bundle/strings.properties,sha256=K3EdTMYl-OlXI8LHA4wYi9SOufX9Pwj7XUBMEapA_fk,22853
|
|
24
24
|
upgini/sampler/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
25
25
|
upgini/sampler/base.py,sha256=X2PVsfZ3Rl7twpFDh5UWyxqY2K_jcMGxZ2NcHLwFRj4,6489
|
|
26
26
|
upgini/sampler/random_under_sampler.py,sha256=whX_f_TtalHH8Seyn_7n3sX_TSiDHeYfALmme9saqDg,4082
|
|
@@ -42,8 +42,8 @@ upgini/utils/postal_code_utils.py,sha256=_8CR9tBqsPptQsmMUvnrCAmBaMIQSWH3JfJ4ly3
|
|
|
42
42
|
upgini/utils/target_utils.py,sha256=cu52icjhDIPpEStHYMXrD2hIl9gzvfnxZr0Ra5osV0k,1616
|
|
43
43
|
upgini/utils/track_info.py,sha256=2IGGyHPXBLhWcLO8-Q-5qir52k_kD6DtdU-sv_Z2hHY,5325
|
|
44
44
|
upgini/utils/warning_counter.py,sha256=vnmdFo5-7GBkU2bK9h_uC0K0Y_wtfcYstxOdeRfacO0,228
|
|
45
|
-
upgini-1.1.
|
|
46
|
-
upgini-1.1.
|
|
47
|
-
upgini-1.1.
|
|
48
|
-
upgini-1.1.
|
|
49
|
-
upgini-1.1.
|
|
45
|
+
upgini-1.1.168.dist-info/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
|
|
46
|
+
upgini-1.1.168.dist-info/METADATA,sha256=ZLuxZdDftGNXmRwHJD-FuTq3D9wGmVP2mCoDEkqo9bE,47895
|
|
47
|
+
upgini-1.1.168.dist-info/WHEEL,sha256=G16H4A3IeoQmnOrYV4ueZGKSjhipXx8zc8nu9FGlvMA,92
|
|
48
|
+
upgini-1.1.168.dist-info/top_level.txt,sha256=OFhTGiDIWKl5gFI49qvWq1R9IKflPaE2PekcbDXDtx4,7
|
|
49
|
+
upgini-1.1.168.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|