upgini 1.2.81a3832.dev4__py3-none-any.whl → 1.2.81a3832.dev6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of upgini might be problematic. Click here for more details.
- upgini/__about__.py +1 -1
- upgini/features_enricher.py +18 -17
- upgini/metrics.py +2 -44
- upgini/resource_bundle/strings.properties +1 -1
- upgini/utils/display_utils.py +12 -7
- {upgini-1.2.81a3832.dev4.dist-info → upgini-1.2.81a3832.dev6.dist-info}/METADATA +1 -1
- {upgini-1.2.81a3832.dev4.dist-info → upgini-1.2.81a3832.dev6.dist-info}/RECORD +9 -9
- {upgini-1.2.81a3832.dev4.dist-info → upgini-1.2.81a3832.dev6.dist-info}/WHEEL +0 -0
- {upgini-1.2.81a3832.dev4.dist-info → upgini-1.2.81a3832.dev6.dist-info}/licenses/LICENSE +0 -0
upgini/__about__.py
CHANGED
@@ -1 +1 @@
|
|
1
|
-
__version__ = "1.2.81a3832.
|
1
|
+
__version__ = "1.2.81a3832.dev6"
|
upgini/features_enricher.py
CHANGED
@@ -1023,12 +1023,12 @@ class FeaturesEnricher(TransformerMixin):
|
|
1023
1023
|
self.__log_warning(self.bundle.get("metrics_no_important_free_features"))
|
1024
1024
|
return None
|
1025
1025
|
|
1026
|
-
|
1027
|
-
text_features
|
1028
|
-
|
1029
|
-
|
1030
|
-
|
1031
|
-
|
1026
|
+
text_features = self.generate_features.copy() if self.generate_features else None
|
1027
|
+
if text_features:
|
1028
|
+
for renamed, original in columns_renaming.items():
|
1029
|
+
if original in text_features:
|
1030
|
+
text_features.remove(original)
|
1031
|
+
text_features.append(renamed)
|
1032
1032
|
|
1033
1033
|
print(self.bundle.get("metrics_start"))
|
1034
1034
|
with Spinner():
|
@@ -1041,9 +1041,7 @@ class FeaturesEnricher(TransformerMixin):
|
|
1041
1041
|
enriched_cat_features = [f for f in cat_features if f in fitting_enriched_X.columns]
|
1042
1042
|
if len(enriched_cat_features) < len(cat_features):
|
1043
1043
|
missing_cat_features = [f for f in cat_features if f not in fitting_enriched_X.columns]
|
1044
|
-
self.logger.warning(
|
1045
|
-
f"Some cat_features were not found in enriched_X: {missing_cat_features}"
|
1046
|
-
)
|
1044
|
+
self.logger.warning(f"Some cat_features were not found in enriched_X: {missing_cat_features}")
|
1047
1045
|
|
1048
1046
|
_, metric, multiplier = define_scorer(model_task_type, scoring)
|
1049
1047
|
|
@@ -2750,7 +2748,9 @@ if response.status_code == 200:
|
|
2750
2748
|
if self.id_columns is not None:
|
2751
2749
|
for id_column in self.id_columns:
|
2752
2750
|
if id_column not in validated_X.columns:
|
2753
|
-
raise ValidationError(
|
2751
|
+
raise ValidationError(
|
2752
|
+
self.bundle.get("missing_id_column").format(id_column, list(validated_X.columns))
|
2753
|
+
)
|
2754
2754
|
|
2755
2755
|
validate_scoring_argument(scoring)
|
2756
2756
|
|
@@ -3092,7 +3092,7 @@ if response.status_code == 200:
|
|
3092
3092
|
self.__show_selected_features(self.fit_search_keys)
|
3093
3093
|
|
3094
3094
|
autofe_description = self.get_autofe_features_description()
|
3095
|
-
if autofe_description is not None:
|
3095
|
+
if autofe_description is not None and len(autofe_description) > 0:
|
3096
3096
|
self.logger.info(f"AutoFE descriptions: {autofe_description}")
|
3097
3097
|
self.autofe_features_display_handle = display_html_dataframe(
|
3098
3098
|
df=autofe_description,
|
@@ -4260,12 +4260,13 @@ if response.status_code == 200:
|
|
4260
4260
|
display_id=f"features_info_{uuid.uuid4()}",
|
4261
4261
|
)
|
4262
4262
|
|
4263
|
-
self.
|
4264
|
-
self.
|
4265
|
-
|
4266
|
-
|
4267
|
-
|
4268
|
-
|
4263
|
+
if len(self.relevant_data_sources) > 0:
|
4264
|
+
self.data_sources_display_handle = display_html_dataframe(
|
4265
|
+
self.relevant_data_sources,
|
4266
|
+
self._relevant_data_sources_wo_links,
|
4267
|
+
self.bundle.get("relevant_data_sources_header"),
|
4268
|
+
display_id=f"data_sources_{uuid.uuid4()}",
|
4269
|
+
)
|
4269
4270
|
else:
|
4270
4271
|
msg = self.bundle.get("features_info_zero_important_features")
|
4271
4272
|
self.__log_warning(msg, show_support_link=True)
|
upgini/metrics.py
CHANGED
@@ -18,7 +18,6 @@ from numpy import log1p
|
|
18
18
|
from pandas.api.types import is_numeric_dtype
|
19
19
|
from sklearn.metrics import check_scoring, get_scorer, make_scorer, roc_auc_score
|
20
20
|
|
21
|
-
# from upgini.utils.blocked_time_series import BlockedTimeSeriesSplit
|
22
21
|
from upgini.utils.features_validator import FeaturesValidator
|
23
22
|
from upgini.utils.sklearn_ext import cross_validate
|
24
23
|
|
@@ -100,7 +99,6 @@ LIGHTGBM_REGRESSION_PARAMS = {
|
|
100
99
|
"min_sum_hessian_in_leaf": 0.01,
|
101
100
|
"objective": "huber",
|
102
101
|
"deterministic": "true",
|
103
|
-
# "force_col_wise": "true",
|
104
102
|
"verbosity": -1,
|
105
103
|
}
|
106
104
|
|
@@ -115,12 +113,10 @@ LIGHTGBM_MULTICLASS_PARAMS = {
|
|
115
113
|
"cat_smooth": 18,
|
116
114
|
"cat_l2": 8,
|
117
115
|
"objective": "multiclass",
|
118
|
-
# "class_weight": "balanced",
|
119
116
|
"use_quantized_grad": "true",
|
120
117
|
"num_grad_quant_bins": "8",
|
121
118
|
"stochastic_rounding": "true",
|
122
119
|
"deterministic": "true",
|
123
|
-
# "force_col_wise": "true",
|
124
120
|
"verbosity": -1,
|
125
121
|
}
|
126
122
|
|
@@ -131,13 +127,11 @@ LIGHTGBM_BINARY_PARAMS = {
|
|
131
127
|
"max_depth": 5,
|
132
128
|
"learning_rate": 0.05,
|
133
129
|
"objective": "binary",
|
134
|
-
# "class_weight": "balanced",
|
135
130
|
"max_cat_threshold": 80,
|
136
131
|
"min_data_per_group": 20,
|
137
132
|
"cat_smooth": 18,
|
138
133
|
"cat_l2": 8,
|
139
134
|
"deterministic": "true",
|
140
|
-
# "force_col_wise": "true",
|
141
135
|
"verbosity": -1,
|
142
136
|
}
|
143
137
|
|
@@ -146,34 +140,6 @@ LIGHTGBM_EARLY_STOPPING_ROUNDS = 20
|
|
146
140
|
N_FOLDS = 5
|
147
141
|
BLOCKED_TS_TEST_SIZE = 0.2
|
148
142
|
|
149
|
-
# NA_VALUES = [
|
150
|
-
# "",
|
151
|
-
# " ",
|
152
|
-
# " ",
|
153
|
-
# "#n/a",
|
154
|
-
# "#n/a n/a",
|
155
|
-
# "#na",
|
156
|
-
# "-1.#ind",
|
157
|
-
# "-1.#qnan",
|
158
|
-
# "-nan",
|
159
|
-
# "1.#ind",
|
160
|
-
# "1.#qnan",
|
161
|
-
# "n/a",
|
162
|
-
# "na",
|
163
|
-
# "null",
|
164
|
-
# "nan",
|
165
|
-
# "n/a",
|
166
|
-
# "nan",
|
167
|
-
# "none",
|
168
|
-
# "-",
|
169
|
-
# "undefined",
|
170
|
-
# "[[unknown]]",
|
171
|
-
# "[not provided]",
|
172
|
-
# "[unknown]",
|
173
|
-
# ]
|
174
|
-
|
175
|
-
# NA_REPLACEMENT = "NA"
|
176
|
-
|
177
143
|
SUPPORTED_CATBOOST_METRICS = {
|
178
144
|
s.upper(): s
|
179
145
|
for s in (
|
@@ -975,7 +941,8 @@ def _get_cat_features(
|
|
975
941
|
|
976
942
|
logger.info(f"Selected categorical features: {cat_features}")
|
977
943
|
|
978
|
-
|
944
|
+
non_encode_features = list(set(x.select_dtypes(exclude=[np.number, np.datetime64, pd.CategoricalDtype()]).columns))
|
945
|
+
features_to_encode = [f for f in cat_features if f not in non_encode_features]
|
979
946
|
|
980
947
|
logger.info(f"Features to encode: {features_to_encode}")
|
981
948
|
|
@@ -1067,12 +1034,3 @@ def _ext_mean_squared_log_error(y_true, y_pred, *, sample_weight=None, multioutp
|
|
1067
1034
|
multioutput=multioutput,
|
1068
1035
|
)
|
1069
1036
|
return mse if squared else np.sqrt(mse)
|
1070
|
-
|
1071
|
-
|
1072
|
-
# def fill_na_cat_features(df: pd.DataFrame, cat_features: List[str]) -> pd.DataFrame:
|
1073
|
-
# for c in cat_features:
|
1074
|
-
# if c in df.columns:
|
1075
|
-
# df[c] = df[c].astype("string").fillna(NA_REPLACEMENT).astype(str)
|
1076
|
-
# na_filter = df[c].str.lower().isin(NA_VALUES)
|
1077
|
-
# df.loc[na_filter, c] = NA_REPLACEMENT
|
1078
|
-
# return df
|
@@ -140,7 +140,7 @@ baseline_score_column_not_exists=baseline_score_column {} doesn't exist in input
|
|
140
140
|
baseline_score_column_has_na=baseline_score_column contains NaN. Clear it and and retry
|
141
141
|
missing_features_for_transform=Missing some features for transform that were presented on fit: {}
|
142
142
|
missing_target_for_transform=Search contains features on target. Please add y to the call and try again
|
143
|
-
missing_id_column=Id column {} not found in X
|
143
|
+
missing_id_column=Id column {} not found in X: {}
|
144
144
|
# target validation
|
145
145
|
empty_target=Target is empty in all rows
|
146
146
|
# non_numeric_target=Binary target should be numerical type
|
upgini/utils/display_utils.py
CHANGED
@@ -92,9 +92,9 @@ def display_html_dataframe(
|
|
92
92
|
if table_tsv is not None:
|
93
93
|
copy_and_share = f"""
|
94
94
|
<div style="text-align: right">
|
95
|
-
<button onclick=navigator.clipboard.writeText(decodeURI('{table_tsv}'))>\
|
95
|
+
<button onclick=navigator.clipboard.writeText(decodeURI('{table_tsv}'))>\U0001f4c2 Copy</button>
|
96
96
|
<a href='mailto:<Share with...>?subject={email_subject}&body={table_tsv}'>
|
97
|
-
<button>\
|
97
|
+
<button>\U0001f4e8 Share</button>
|
98
98
|
</a>
|
99
99
|
</div>"""
|
100
100
|
else:
|
@@ -112,6 +112,7 @@ def display_html_dataframe(
|
|
112
112
|
|
113
113
|
.upgini-df tbody td {{
|
114
114
|
padding: 0.5em;
|
115
|
+
color: black;
|
115
116
|
}}
|
116
117
|
|
117
118
|
.upgini-df tbody tr:nth-child(odd) {{
|
@@ -164,10 +165,12 @@ def make_html_report(
|
|
164
165
|
|
165
166
|
try:
|
166
167
|
from importlib.resources import files
|
167
|
-
|
168
|
+
|
169
|
+
font_path = files("upgini.utils").joinpath("Roboto-Regular.ttf")
|
168
170
|
except Exception:
|
169
171
|
from pkg_resources import resource_filename
|
170
|
-
|
172
|
+
|
173
|
+
font_path = resource_filename("upgini.utils", "Roboto-Regular.ttf")
|
171
174
|
|
172
175
|
return f"""<html>
|
173
176
|
<head>
|
@@ -274,8 +277,10 @@ def make_html_report(
|
|
274
277
|
if metrics_df is not None
|
275
278
|
else ""
|
276
279
|
}
|
277
|
-
<h3>Relevant data sources</h3>
|
278
|
-
|
280
|
+
{"<h3>Relevant data sources</h3>" + make_table(relevant_datasources_df)
|
281
|
+
if len(relevant_datasources_df) > 0
|
282
|
+
else ""
|
283
|
+
}
|
279
284
|
<h3>All relevant features. Listing ({len(relevant_features_df)} items)</h3>
|
280
285
|
{make_table(relevant_features_df, wrap_long_string=25)}
|
281
286
|
{"<h3>Description of AutoFE feature names</h3>" + make_table(autofe_descriptions_df, wrap_long_string=25)
|
@@ -311,7 +316,7 @@ def prepare_and_show_report(
|
|
311
316
|
|
312
317
|
|
313
318
|
def show_button_download_pdf(
|
314
|
-
source: str, title="\
|
319
|
+
source: str, title="\U0001f4ca Download PDF report", display_id: Optional[str] = None, display_handle=None
|
315
320
|
):
|
316
321
|
from IPython.display import HTML, display
|
317
322
|
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.3
|
2
2
|
Name: upgini
|
3
|
-
Version: 1.2.81a3832.
|
3
|
+
Version: 1.2.81a3832.dev6
|
4
4
|
Summary: Intelligent data search & enrichment for Machine Learning
|
5
5
|
Project-URL: Bug Reports, https://github.com/upgini/upgini/issues
|
6
6
|
Project-URL: Homepage, https://upgini.com/
|
@@ -1,12 +1,12 @@
|
|
1
|
-
upgini/__about__.py,sha256=
|
1
|
+
upgini/__about__.py,sha256=yNrgPKOedmyNgT4TYavHML3irFQc9hNEAf0TxhtzLzA,33
|
2
2
|
upgini/__init__.py,sha256=LXSfTNU0HnlOkE69VCxkgIKDhWP-JFo_eBQ71OxTr5Y,261
|
3
3
|
upgini/ads.py,sha256=nvuRxRx5MHDMgPr9SiU-fsqRdFaBv8p4_v1oqiysKpc,2714
|
4
4
|
upgini/dataset.py,sha256=aspri7ZAgwkNNUiIgQ1GRXvw8XQii3F4RfNXSrF4wrw,35365
|
5
5
|
upgini/errors.py,sha256=2b_Wbo0OYhLUbrZqdLIx5jBnAsiD1Mcenh-VjR4HCTw,950
|
6
|
-
upgini/features_enricher.py,sha256=
|
6
|
+
upgini/features_enricher.py,sha256=ODCSzFw62y_8vUrfbcZtDu0dWMIDCGYKWD2F54QDFII,210787
|
7
7
|
upgini/http.py,sha256=AfaJ3c8z_tK2hZFEehNybDKE0mp1tYcyAP_l0_p8bLQ,43933
|
8
8
|
upgini/metadata.py,sha256=Yd6iW2f7Wz6vUkg5uvR4xylN16ANnCKVKqAsAkap7p8,12354
|
9
|
-
upgini/metrics.py,sha256=
|
9
|
+
upgini/metrics.py,sha256=lWFF_dQAWcgI7EOQlTXiLjsAEoPLxNv1PCp_egoKolc,38821
|
10
10
|
upgini/search_task.py,sha256=RcvAE785yksWTsTNWuZFVNlk32jHElMoEna1T_C5N8Q,17823
|
11
11
|
upgini/spinner.py,sha256=4iMd-eIe_BnkqFEMIliULTbj6rNI2HkN_VJ4qYe0cUc,1118
|
12
12
|
upgini/version_validator.py,sha256=DvbaAvuYFoJqYt0fitpsk6Xcv-H1BYDJYHUMxaKSH_Y,1509
|
@@ -38,7 +38,7 @@ upgini/normalizer/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU
|
|
38
38
|
upgini/normalizer/normalize_utils.py,sha256=Ft2MwSgVoBilXAORAOYAuwPD79GOLfwn4qQE3IUFzzg,7218
|
39
39
|
upgini/resource_bundle/__init__.py,sha256=S5F2G47pnJd2LDpmFsjDqEwiKkP8Hm-hcseDbMka6Ko,8345
|
40
40
|
upgini/resource_bundle/exceptions.py,sha256=5fRvx0_vWdE1-7HcSgF0tckB4A9AKyf5RiinZkInTsI,621
|
41
|
-
upgini/resource_bundle/strings.properties,sha256=
|
41
|
+
upgini/resource_bundle/strings.properties,sha256=GmkTgxowpykuuviubVH5cMF_lNFQJEqfRoBJaj3c72E,27957
|
42
42
|
upgini/resource_bundle/strings_widget.properties,sha256=gOdqvZWntP2LCza_tyVk1_yRYcG4c04K9sQOAVhF_gw,1577
|
43
43
|
upgini/sampler/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
44
44
|
upgini/sampler/base.py,sha256=7GpjYqjOp58vYcJLiX__1R5wjUlyQbxvHJ2klFnup_M,6389
|
@@ -53,7 +53,7 @@ upgini/utils/custom_loss_utils.py,sha256=kieNZYBYZm5ZGBltF1F_jOSF4ea6C29rYuCyiDc
|
|
53
53
|
upgini/utils/cv_utils.py,sha256=w6FQb9nO8BWDx88EF83NpjPLarK4eR4ia0Wg0kLBJC4,3525
|
54
54
|
upgini/utils/datetime_utils.py,sha256=_jq-kn_dGNFfs-DGXcWCGzy9bkplfAjrZ8SsmN28zXc,13535
|
55
55
|
upgini/utils/deduplicate_utils.py,sha256=AcMLoObMjhOTQ_fMS1LWy0GKp6WXnZ-FNux_8V3nbZU,8914
|
56
|
-
upgini/utils/display_utils.py,sha256=
|
56
|
+
upgini/utils/display_utils.py,sha256=hAeWEcJtPDg8fAVcMNrNB-azFD2WJp1nvbPAhR7SeP4,12071
|
57
57
|
upgini/utils/email_utils.py,sha256=pZ2vCfNxLIPUhxr0-OlABNXm12jjU44isBk8kGmqQzA,5277
|
58
58
|
upgini/utils/fallback_progress_bar.py,sha256=PDaKb8dYpVZaWMroNcOHsTc3pSjgi9mOm0--cOFTwJ0,1074
|
59
59
|
upgini/utils/feature_info.py,sha256=Q9HN6A-fvfVD-irFWrmOqqZG9RsUSvh5MTY_k0xu-tE,7287
|
@@ -70,7 +70,7 @@ upgini/utils/target_utils.py,sha256=LRN840dzx78-wg7ftdxAkp2c1eu8-JDvkACiRThm4HE,
|
|
70
70
|
upgini/utils/track_info.py,sha256=G5Lu1xxakg2_TQjKZk4b5SvrHsATTXNVV3NbvWtT8k8,5663
|
71
71
|
upgini/utils/ts_utils.py,sha256=26vhC0pN7vLXK6R09EEkMK3Lwb9IVPH7LRdqFIQ3kPs,1383
|
72
72
|
upgini/utils/warning_counter.py,sha256=-GRY8EUggEBKODPSuXAkHn9KnEQwAORC0mmz_tim-PM,254
|
73
|
-
upgini-1.2.81a3832.
|
74
|
-
upgini-1.2.81a3832.
|
75
|
-
upgini-1.2.81a3832.
|
76
|
-
upgini-1.2.81a3832.
|
73
|
+
upgini-1.2.81a3832.dev6.dist-info/METADATA,sha256=WjpXtnU3FUqspcRA2Zl-5iMqo5fqT2xIhHPJXFPcPN4,49172
|
74
|
+
upgini-1.2.81a3832.dev6.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
|
75
|
+
upgini-1.2.81a3832.dev6.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
|
76
|
+
upgini-1.2.81a3832.dev6.dist-info/RECORD,,
|
File without changes
|
File without changes
|