upgini 1.2.32__py3-none-any.whl → 1.2.33__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of upgini might be problematic. Click here for more details.
- upgini/__about__.py +1 -1
- upgini/features_enricher.py +47 -6
- upgini/metadata.py +1 -0
- {upgini-1.2.32.dist-info → upgini-1.2.33.dist-info}/METADATA +2 -2
- {upgini-1.2.32.dist-info → upgini-1.2.33.dist-info}/RECORD +7 -7
- {upgini-1.2.32.dist-info → upgini-1.2.33.dist-info}/WHEEL +0 -0
- {upgini-1.2.32.dist-info → upgini-1.2.33.dist-info}/licenses/LICENSE +0 -0
upgini/__about__.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "1.2.
|
|
1
|
+
__version__ = "1.2.33"
|
upgini/features_enricher.py
CHANGED
|
@@ -111,7 +111,11 @@ try:
|
|
|
111
111
|
except Exception:
|
|
112
112
|
from upgini.utils.fallback_progress_bar import CustomFallbackProgressBar as ProgressBar
|
|
113
113
|
|
|
114
|
-
from upgini.utils.target_utils import
|
|
114
|
+
from upgini.utils.target_utils import (
|
|
115
|
+
balance_undersample_forced,
|
|
116
|
+
calculate_psi,
|
|
117
|
+
define_task,
|
|
118
|
+
)
|
|
115
119
|
from upgini.utils.warning_counter import WarningCounter
|
|
116
120
|
from upgini.version_validator import validate_version
|
|
117
121
|
|
|
@@ -967,6 +971,13 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
967
971
|
self.__log_warning(self.bundle.get("metrics_no_important_free_features"))
|
|
968
972
|
return None
|
|
969
973
|
|
|
974
|
+
maybe_phone_column = self._get_phone_column(self.search_keys)
|
|
975
|
+
text_features = (
|
|
976
|
+
[f for f in self.generate_features if f != maybe_phone_column]
|
|
977
|
+
if self.generate_features is not None
|
|
978
|
+
else None
|
|
979
|
+
)
|
|
980
|
+
|
|
970
981
|
print(self.bundle.get("metrics_start"))
|
|
971
982
|
with Spinner():
|
|
972
983
|
self._check_train_and_eval_target_distribution(y_sorted, fitting_eval_set_dict)
|
|
@@ -982,7 +993,7 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
982
993
|
fitting_enriched_X,
|
|
983
994
|
scoring,
|
|
984
995
|
groups=groups,
|
|
985
|
-
text_features=
|
|
996
|
+
text_features=text_features,
|
|
986
997
|
has_date=has_date,
|
|
987
998
|
)
|
|
988
999
|
metric = wrapper.metric_name
|
|
@@ -1009,7 +1020,7 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
1009
1020
|
cat_features,
|
|
1010
1021
|
add_params=custom_loss_add_params,
|
|
1011
1022
|
groups=groups,
|
|
1012
|
-
text_features=
|
|
1023
|
+
text_features=text_features,
|
|
1013
1024
|
has_date=has_date,
|
|
1014
1025
|
)
|
|
1015
1026
|
etalon_cv_result = baseline_estimator.cross_val_predict(
|
|
@@ -1044,7 +1055,7 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
1044
1055
|
cat_features,
|
|
1045
1056
|
add_params=custom_loss_add_params,
|
|
1046
1057
|
groups=groups,
|
|
1047
|
-
text_features=
|
|
1058
|
+
text_features=text_features,
|
|
1048
1059
|
has_date=has_date,
|
|
1049
1060
|
)
|
|
1050
1061
|
enriched_cv_result = enriched_estimator.cross_val_predict(fitting_enriched_X, enriched_y_sorted)
|
|
@@ -1827,7 +1838,27 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
1827
1838
|
|
|
1828
1839
|
# downsample if need to eval_set threshold
|
|
1829
1840
|
num_samples = _num_samples(df)
|
|
1830
|
-
|
|
1841
|
+
phone_column = self._get_phone_column(self.search_keys)
|
|
1842
|
+
force_downsampling = (
|
|
1843
|
+
not self.disable_force_downsampling
|
|
1844
|
+
and self.generate_features is not None
|
|
1845
|
+
and phone_column is not None
|
|
1846
|
+
and self.fit_columns_renaming[phone_column] in self.generate_features
|
|
1847
|
+
and num_samples > Dataset.FORCE_SAMPLE_SIZE
|
|
1848
|
+
)
|
|
1849
|
+
if force_downsampling:
|
|
1850
|
+
self.logger.info(f"Force downsampling from {num_samples} to {Dataset.FORCE_SAMPLE_SIZE}")
|
|
1851
|
+
df = balance_undersample_forced(
|
|
1852
|
+
df=df,
|
|
1853
|
+
target_column=TARGET,
|
|
1854
|
+
task_type=self.model_task_type,
|
|
1855
|
+
random_state=self.random_state,
|
|
1856
|
+
sample_size=Dataset.FORCE_SAMPLE_SIZE,
|
|
1857
|
+
logger=self.logger,
|
|
1858
|
+
bundle=self.bundle,
|
|
1859
|
+
warning_callback=self.__log_warning,
|
|
1860
|
+
)
|
|
1861
|
+
elif num_samples > Dataset.FIT_SAMPLE_WITH_EVAL_SET_THRESHOLD:
|
|
1831
1862
|
self.logger.info(f"Downsampling from {num_samples} to {Dataset.FIT_SAMPLE_WITH_EVAL_SET_ROWS}")
|
|
1832
1863
|
df = df.sample(n=Dataset.FIT_SAMPLE_WITH_EVAL_SET_ROWS, random_state=self.random_state)
|
|
1833
1864
|
|
|
@@ -2063,6 +2094,15 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
2063
2094
|
self.__display_support_link(msg)
|
|
2064
2095
|
return None, {c: c for c in X.columns}, []
|
|
2065
2096
|
|
|
2097
|
+
features_meta = self._search_task.get_all_features_metadata_v2()
|
|
2098
|
+
online_api_features = [fm.name for fm in features_meta if fm.from_online_api]
|
|
2099
|
+
if len(online_api_features) > 0:
|
|
2100
|
+
self.logger.warning(
|
|
2101
|
+
f"There are important features for transform, that generated by online API: {online_api_features}"
|
|
2102
|
+
)
|
|
2103
|
+
# TODO
|
|
2104
|
+
raise Exception("There are features selected that are paid. Contact support (sales@upgini.com)")
|
|
2105
|
+
|
|
2066
2106
|
if not metrics_calculation:
|
|
2067
2107
|
transform_usage = self.rest_client.get_current_transform_usage(trace_id)
|
|
2068
2108
|
self.logger.info(f"Current transform usage: {transform_usage}. Transforming {len(X)} rows")
|
|
@@ -2708,8 +2748,9 @@ class FeaturesEnricher(TransformerMixin):
|
|
|
2708
2748
|
and self.generate_features is not None
|
|
2709
2749
|
and phone_column is not None
|
|
2710
2750
|
and self.fit_columns_renaming[phone_column] in self.generate_features
|
|
2751
|
+
and len(df) > Dataset.FORCE_SAMPLE_SIZE
|
|
2711
2752
|
)
|
|
2712
|
-
if force_downsampling
|
|
2753
|
+
if force_downsampling:
|
|
2713
2754
|
runtime_parameters.properties["fast_fit"] = True
|
|
2714
2755
|
|
|
2715
2756
|
dataset = Dataset(
|
upgini/metadata.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: upgini
|
|
3
|
-
Version: 1.2.
|
|
3
|
+
Version: 1.2.33
|
|
4
4
|
Summary: Intelligent data search & enrichment for Machine Learning
|
|
5
5
|
Project-URL: Bug Reports, https://github.com/upgini/upgini/issues
|
|
6
6
|
Project-URL: Homepage, https://upgini.com/
|
|
@@ -110,7 +110,7 @@ Description-Content-Type: text/markdown
|
|
|
110
110
|
</tr>
|
|
111
111
|
</table>
|
|
112
112
|
|
|
113
|
-
⭐️ [Simple Drag & Drop Search UI](https://upgini.com/
|
|
113
|
+
⭐️ [Simple Drag & Drop Search UI](https://www.upgini.com/data-search-widget):
|
|
114
114
|
<a href="https://upgini.com/upgini-widget">
|
|
115
115
|
<img width="710" alt="Drag & Drop Search UI" src="https://github.com/upgini/upgini/assets/95645411/36b6460c-51f3-400e-9f04-445b938bf45e">
|
|
116
116
|
</a>
|
|
@@ -1,12 +1,12 @@
|
|
|
1
|
-
upgini/__about__.py,sha256=
|
|
1
|
+
upgini/__about__.py,sha256=ozUAhuj1IzPzq1FQeoqbf-7laxntI-m4qA0LSTBVtrw,23
|
|
2
2
|
upgini/__init__.py,sha256=LXSfTNU0HnlOkE69VCxkgIKDhWP-JFo_eBQ71OxTr5Y,261
|
|
3
3
|
upgini/ads.py,sha256=nvuRxRx5MHDMgPr9SiU-fsqRdFaBv8p4_v1oqiysKpc,2714
|
|
4
4
|
upgini/dataset.py,sha256=KnkqV7Nnx3kxfQ89giDao3bmCm4MFJWqJUrONy85E-k,32030
|
|
5
5
|
upgini/errors.py,sha256=2b_Wbo0OYhLUbrZqdLIx5jBnAsiD1Mcenh-VjR4HCTw,950
|
|
6
|
-
upgini/features_enricher.py,sha256=
|
|
6
|
+
upgini/features_enricher.py,sha256=q11aMFPlCJy1m4sOFfGZFfb4vdG3-hdd0wgm2BXgs9A,194748
|
|
7
7
|
upgini/http.py,sha256=plZGTGoi1h2edd8Cnjt4eYB8t4NbBGnZz7DtPTByiNc,42885
|
|
8
8
|
upgini/lazy_import.py,sha256=74gQ8JuA48BGRLxAo7lNHNKY2D2emMxrUxKGdxVGhuY,1012
|
|
9
|
-
upgini/metadata.py,sha256=
|
|
9
|
+
upgini/metadata.py,sha256=ACzIQQwCHCFHlUqXqKpxd3IQ4bBAaVvy8UaCGTqLGQs,11278
|
|
10
10
|
upgini/metrics.py,sha256=hr7UwLphbZ_FEglLuO2lzr_pFgxOJ4c3WBeg7H-fNqY,35521
|
|
11
11
|
upgini/search_task.py,sha256=qxUxAD-bed-FpZYmTB_4orW7YJsW_O6a1TcgnZIRFr4,17307
|
|
12
12
|
upgini/spinner.py,sha256=4iMd-eIe_BnkqFEMIliULTbj6rNI2HkN_VJ4qYe0cUc,1118
|
|
@@ -59,7 +59,7 @@ upgini/utils/sklearn_ext.py,sha256=13jQS_k7v0aUtudXV6nGUEWjttPQzAW9AFYL5wgEz9k,4
|
|
|
59
59
|
upgini/utils/target_utils.py,sha256=Ed5IXkPjV9AfAZQAwCYksAmKaPGQliplvDYS_yeWdfk,11330
|
|
60
60
|
upgini/utils/track_info.py,sha256=G5Lu1xxakg2_TQjKZk4b5SvrHsATTXNVV3NbvWtT8k8,5663
|
|
61
61
|
upgini/utils/warning_counter.py,sha256=-GRY8EUggEBKODPSuXAkHn9KnEQwAORC0mmz_tim-PM,254
|
|
62
|
-
upgini-1.2.
|
|
63
|
-
upgini-1.2.
|
|
64
|
-
upgini-1.2.
|
|
65
|
-
upgini-1.2.
|
|
62
|
+
upgini-1.2.33.dist-info/METADATA,sha256=EG9Nr1Z8cls4rBaqrPykCTWZhSSoSxPaICd1EylsiKE,48587
|
|
63
|
+
upgini-1.2.33.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
|
|
64
|
+
upgini-1.2.33.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
|
|
65
|
+
upgini-1.2.33.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|