upgini 1.2.81a3832.dev5__py3-none-any.whl → 1.2.81a3832.dev6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of upgini might be problematic. Click here for more details.
- upgini/__about__.py +1 -1
- upgini/features_enricher.py +14 -13
- upgini/metrics.py +2 -44
- upgini/utils/display_utils.py +12 -7
- {upgini-1.2.81a3832.dev5.dist-info → upgini-1.2.81a3832.dev6.dist-info}/METADATA +1 -1
- {upgini-1.2.81a3832.dev5.dist-info → upgini-1.2.81a3832.dev6.dist-info}/RECORD +8 -8
- {upgini-1.2.81a3832.dev5.dist-info → upgini-1.2.81a3832.dev6.dist-info}/WHEEL +0 -0
- {upgini-1.2.81a3832.dev5.dist-info → upgini-1.2.81a3832.dev6.dist-info}/licenses/LICENSE +0 -0
upgini/__about__.py
CHANGED
@@ -1 +1 @@
|
|
1
|
-
__version__ = "1.2.81a3832.
|
1
|
+
__version__ = "1.2.81a3832.dev6"
|
upgini/features_enricher.py
CHANGED
@@ -1023,12 +1023,12 @@ class FeaturesEnricher(TransformerMixin):
|
|
1023
1023
|
self.__log_warning(self.bundle.get("metrics_no_important_free_features"))
|
1024
1024
|
return None
|
1025
1025
|
|
1026
|
-
|
1027
|
-
text_features
|
1028
|
-
|
1029
|
-
|
1030
|
-
|
1031
|
-
|
1026
|
+
text_features = self.generate_features.copy() if self.generate_features else None
|
1027
|
+
if text_features:
|
1028
|
+
for renamed, original in columns_renaming.items():
|
1029
|
+
if original in text_features:
|
1030
|
+
text_features.remove(original)
|
1031
|
+
text_features.append(renamed)
|
1032
1032
|
|
1033
1033
|
print(self.bundle.get("metrics_start"))
|
1034
1034
|
with Spinner():
|
@@ -3092,7 +3092,7 @@ if response.status_code == 200:
|
|
3092
3092
|
self.__show_selected_features(self.fit_search_keys)
|
3093
3093
|
|
3094
3094
|
autofe_description = self.get_autofe_features_description()
|
3095
|
-
if autofe_description is not None:
|
3095
|
+
if autofe_description is not None and len(autofe_description) > 0:
|
3096
3096
|
self.logger.info(f"AutoFE descriptions: {autofe_description}")
|
3097
3097
|
self.autofe_features_display_handle = display_html_dataframe(
|
3098
3098
|
df=autofe_description,
|
@@ -4260,12 +4260,13 @@ if response.status_code == 200:
|
|
4260
4260
|
display_id=f"features_info_{uuid.uuid4()}",
|
4261
4261
|
)
|
4262
4262
|
|
4263
|
-
self.
|
4264
|
-
self.
|
4265
|
-
|
4266
|
-
|
4267
|
-
|
4268
|
-
|
4263
|
+
if len(self.relevant_data_sources) > 0:
|
4264
|
+
self.data_sources_display_handle = display_html_dataframe(
|
4265
|
+
self.relevant_data_sources,
|
4266
|
+
self._relevant_data_sources_wo_links,
|
4267
|
+
self.bundle.get("relevant_data_sources_header"),
|
4268
|
+
display_id=f"data_sources_{uuid.uuid4()}",
|
4269
|
+
)
|
4269
4270
|
else:
|
4270
4271
|
msg = self.bundle.get("features_info_zero_important_features")
|
4271
4272
|
self.__log_warning(msg, show_support_link=True)
|
upgini/metrics.py
CHANGED
@@ -18,7 +18,6 @@ from numpy import log1p
|
|
18
18
|
from pandas.api.types import is_numeric_dtype
|
19
19
|
from sklearn.metrics import check_scoring, get_scorer, make_scorer, roc_auc_score
|
20
20
|
|
21
|
-
# from upgini.utils.blocked_time_series import BlockedTimeSeriesSplit
|
22
21
|
from upgini.utils.features_validator import FeaturesValidator
|
23
22
|
from upgini.utils.sklearn_ext import cross_validate
|
24
23
|
|
@@ -100,7 +99,6 @@ LIGHTGBM_REGRESSION_PARAMS = {
|
|
100
99
|
"min_sum_hessian_in_leaf": 0.01,
|
101
100
|
"objective": "huber",
|
102
101
|
"deterministic": "true",
|
103
|
-
# "force_col_wise": "true",
|
104
102
|
"verbosity": -1,
|
105
103
|
}
|
106
104
|
|
@@ -115,12 +113,10 @@ LIGHTGBM_MULTICLASS_PARAMS = {
|
|
115
113
|
"cat_smooth": 18,
|
116
114
|
"cat_l2": 8,
|
117
115
|
"objective": "multiclass",
|
118
|
-
# "class_weight": "balanced",
|
119
116
|
"use_quantized_grad": "true",
|
120
117
|
"num_grad_quant_bins": "8",
|
121
118
|
"stochastic_rounding": "true",
|
122
119
|
"deterministic": "true",
|
123
|
-
# "force_col_wise": "true",
|
124
120
|
"verbosity": -1,
|
125
121
|
}
|
126
122
|
|
@@ -131,13 +127,11 @@ LIGHTGBM_BINARY_PARAMS = {
|
|
131
127
|
"max_depth": 5,
|
132
128
|
"learning_rate": 0.05,
|
133
129
|
"objective": "binary",
|
134
|
-
# "class_weight": "balanced",
|
135
130
|
"max_cat_threshold": 80,
|
136
131
|
"min_data_per_group": 20,
|
137
132
|
"cat_smooth": 18,
|
138
133
|
"cat_l2": 8,
|
139
134
|
"deterministic": "true",
|
140
|
-
# "force_col_wise": "true",
|
141
135
|
"verbosity": -1,
|
142
136
|
}
|
143
137
|
|
@@ -146,34 +140,6 @@ LIGHTGBM_EARLY_STOPPING_ROUNDS = 20
|
|
146
140
|
N_FOLDS = 5
|
147
141
|
BLOCKED_TS_TEST_SIZE = 0.2
|
148
142
|
|
149
|
-
# NA_VALUES = [
|
150
|
-
# "",
|
151
|
-
# " ",
|
152
|
-
# " ",
|
153
|
-
# "#n/a",
|
154
|
-
# "#n/a n/a",
|
155
|
-
# "#na",
|
156
|
-
# "-1.#ind",
|
157
|
-
# "-1.#qnan",
|
158
|
-
# "-nan",
|
159
|
-
# "1.#ind",
|
160
|
-
# "1.#qnan",
|
161
|
-
# "n/a",
|
162
|
-
# "na",
|
163
|
-
# "null",
|
164
|
-
# "nan",
|
165
|
-
# "n/a",
|
166
|
-
# "nan",
|
167
|
-
# "none",
|
168
|
-
# "-",
|
169
|
-
# "undefined",
|
170
|
-
# "[[unknown]]",
|
171
|
-
# "[not provided]",
|
172
|
-
# "[unknown]",
|
173
|
-
# ]
|
174
|
-
|
175
|
-
# NA_REPLACEMENT = "NA"
|
176
|
-
|
177
143
|
SUPPORTED_CATBOOST_METRICS = {
|
178
144
|
s.upper(): s
|
179
145
|
for s in (
|
@@ -975,7 +941,8 @@ def _get_cat_features(
|
|
975
941
|
|
976
942
|
logger.info(f"Selected categorical features: {cat_features}")
|
977
943
|
|
978
|
-
|
944
|
+
non_encode_features = list(set(x.select_dtypes(exclude=[np.number, np.datetime64, pd.CategoricalDtype()]).columns))
|
945
|
+
features_to_encode = [f for f in cat_features if f not in non_encode_features]
|
979
946
|
|
980
947
|
logger.info(f"Features to encode: {features_to_encode}")
|
981
948
|
|
@@ -1067,12 +1034,3 @@ def _ext_mean_squared_log_error(y_true, y_pred, *, sample_weight=None, multioutp
|
|
1067
1034
|
multioutput=multioutput,
|
1068
1035
|
)
|
1069
1036
|
return mse if squared else np.sqrt(mse)
|
1070
|
-
|
1071
|
-
|
1072
|
-
# def fill_na_cat_features(df: pd.DataFrame, cat_features: List[str]) -> pd.DataFrame:
|
1073
|
-
# for c in cat_features:
|
1074
|
-
# if c in df.columns:
|
1075
|
-
# df[c] = df[c].astype("string").fillna(NA_REPLACEMENT).astype(str)
|
1076
|
-
# na_filter = df[c].str.lower().isin(NA_VALUES)
|
1077
|
-
# df.loc[na_filter, c] = NA_REPLACEMENT
|
1078
|
-
# return df
|
upgini/utils/display_utils.py
CHANGED
@@ -92,9 +92,9 @@ def display_html_dataframe(
|
|
92
92
|
if table_tsv is not None:
|
93
93
|
copy_and_share = f"""
|
94
94
|
<div style="text-align: right">
|
95
|
-
<button onclick=navigator.clipboard.writeText(decodeURI('{table_tsv}'))>\
|
95
|
+
<button onclick=navigator.clipboard.writeText(decodeURI('{table_tsv}'))>\U0001f4c2 Copy</button>
|
96
96
|
<a href='mailto:<Share with...>?subject={email_subject}&body={table_tsv}'>
|
97
|
-
<button>\
|
97
|
+
<button>\U0001f4e8 Share</button>
|
98
98
|
</a>
|
99
99
|
</div>"""
|
100
100
|
else:
|
@@ -112,6 +112,7 @@ def display_html_dataframe(
|
|
112
112
|
|
113
113
|
.upgini-df tbody td {{
|
114
114
|
padding: 0.5em;
|
115
|
+
color: black;
|
115
116
|
}}
|
116
117
|
|
117
118
|
.upgini-df tbody tr:nth-child(odd) {{
|
@@ -164,10 +165,12 @@ def make_html_report(
|
|
164
165
|
|
165
166
|
try:
|
166
167
|
from importlib.resources import files
|
167
|
-
|
168
|
+
|
169
|
+
font_path = files("upgini.utils").joinpath("Roboto-Regular.ttf")
|
168
170
|
except Exception:
|
169
171
|
from pkg_resources import resource_filename
|
170
|
-
|
172
|
+
|
173
|
+
font_path = resource_filename("upgini.utils", "Roboto-Regular.ttf")
|
171
174
|
|
172
175
|
return f"""<html>
|
173
176
|
<head>
|
@@ -274,8 +277,10 @@ def make_html_report(
|
|
274
277
|
if metrics_df is not None
|
275
278
|
else ""
|
276
279
|
}
|
277
|
-
<h3>Relevant data sources</h3>
|
278
|
-
|
280
|
+
{"<h3>Relevant data sources</h3>" + make_table(relevant_datasources_df)
|
281
|
+
if len(relevant_datasources_df) > 0
|
282
|
+
else ""
|
283
|
+
}
|
279
284
|
<h3>All relevant features. Listing ({len(relevant_features_df)} items)</h3>
|
280
285
|
{make_table(relevant_features_df, wrap_long_string=25)}
|
281
286
|
{"<h3>Description of AutoFE feature names</h3>" + make_table(autofe_descriptions_df, wrap_long_string=25)
|
@@ -311,7 +316,7 @@ def prepare_and_show_report(
|
|
311
316
|
|
312
317
|
|
313
318
|
def show_button_download_pdf(
|
314
|
-
source: str, title="\
|
319
|
+
source: str, title="\U0001f4ca Download PDF report", display_id: Optional[str] = None, display_handle=None
|
315
320
|
):
|
316
321
|
from IPython.display import HTML, display
|
317
322
|
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.3
|
2
2
|
Name: upgini
|
3
|
-
Version: 1.2.81a3832.
|
3
|
+
Version: 1.2.81a3832.dev6
|
4
4
|
Summary: Intelligent data search & enrichment for Machine Learning
|
5
5
|
Project-URL: Bug Reports, https://github.com/upgini/upgini/issues
|
6
6
|
Project-URL: Homepage, https://upgini.com/
|
@@ -1,12 +1,12 @@
|
|
1
|
-
upgini/__about__.py,sha256=
|
1
|
+
upgini/__about__.py,sha256=yNrgPKOedmyNgT4TYavHML3irFQc9hNEAf0TxhtzLzA,33
|
2
2
|
upgini/__init__.py,sha256=LXSfTNU0HnlOkE69VCxkgIKDhWP-JFo_eBQ71OxTr5Y,261
|
3
3
|
upgini/ads.py,sha256=nvuRxRx5MHDMgPr9SiU-fsqRdFaBv8p4_v1oqiysKpc,2714
|
4
4
|
upgini/dataset.py,sha256=aspri7ZAgwkNNUiIgQ1GRXvw8XQii3F4RfNXSrF4wrw,35365
|
5
5
|
upgini/errors.py,sha256=2b_Wbo0OYhLUbrZqdLIx5jBnAsiD1Mcenh-VjR4HCTw,950
|
6
|
-
upgini/features_enricher.py,sha256=
|
6
|
+
upgini/features_enricher.py,sha256=ODCSzFw62y_8vUrfbcZtDu0dWMIDCGYKWD2F54QDFII,210787
|
7
7
|
upgini/http.py,sha256=AfaJ3c8z_tK2hZFEehNybDKE0mp1tYcyAP_l0_p8bLQ,43933
|
8
8
|
upgini/metadata.py,sha256=Yd6iW2f7Wz6vUkg5uvR4xylN16ANnCKVKqAsAkap7p8,12354
|
9
|
-
upgini/metrics.py,sha256=
|
9
|
+
upgini/metrics.py,sha256=lWFF_dQAWcgI7EOQlTXiLjsAEoPLxNv1PCp_egoKolc,38821
|
10
10
|
upgini/search_task.py,sha256=RcvAE785yksWTsTNWuZFVNlk32jHElMoEna1T_C5N8Q,17823
|
11
11
|
upgini/spinner.py,sha256=4iMd-eIe_BnkqFEMIliULTbj6rNI2HkN_VJ4qYe0cUc,1118
|
12
12
|
upgini/version_validator.py,sha256=DvbaAvuYFoJqYt0fitpsk6Xcv-H1BYDJYHUMxaKSH_Y,1509
|
@@ -53,7 +53,7 @@ upgini/utils/custom_loss_utils.py,sha256=kieNZYBYZm5ZGBltF1F_jOSF4ea6C29rYuCyiDc
|
|
53
53
|
upgini/utils/cv_utils.py,sha256=w6FQb9nO8BWDx88EF83NpjPLarK4eR4ia0Wg0kLBJC4,3525
|
54
54
|
upgini/utils/datetime_utils.py,sha256=_jq-kn_dGNFfs-DGXcWCGzy9bkplfAjrZ8SsmN28zXc,13535
|
55
55
|
upgini/utils/deduplicate_utils.py,sha256=AcMLoObMjhOTQ_fMS1LWy0GKp6WXnZ-FNux_8V3nbZU,8914
|
56
|
-
upgini/utils/display_utils.py,sha256=
|
56
|
+
upgini/utils/display_utils.py,sha256=hAeWEcJtPDg8fAVcMNrNB-azFD2WJp1nvbPAhR7SeP4,12071
|
57
57
|
upgini/utils/email_utils.py,sha256=pZ2vCfNxLIPUhxr0-OlABNXm12jjU44isBk8kGmqQzA,5277
|
58
58
|
upgini/utils/fallback_progress_bar.py,sha256=PDaKb8dYpVZaWMroNcOHsTc3pSjgi9mOm0--cOFTwJ0,1074
|
59
59
|
upgini/utils/feature_info.py,sha256=Q9HN6A-fvfVD-irFWrmOqqZG9RsUSvh5MTY_k0xu-tE,7287
|
@@ -70,7 +70,7 @@ upgini/utils/target_utils.py,sha256=LRN840dzx78-wg7ftdxAkp2c1eu8-JDvkACiRThm4HE,
|
|
70
70
|
upgini/utils/track_info.py,sha256=G5Lu1xxakg2_TQjKZk4b5SvrHsATTXNVV3NbvWtT8k8,5663
|
71
71
|
upgini/utils/ts_utils.py,sha256=26vhC0pN7vLXK6R09EEkMK3Lwb9IVPH7LRdqFIQ3kPs,1383
|
72
72
|
upgini/utils/warning_counter.py,sha256=-GRY8EUggEBKODPSuXAkHn9KnEQwAORC0mmz_tim-PM,254
|
73
|
-
upgini-1.2.81a3832.
|
74
|
-
upgini-1.2.81a3832.
|
75
|
-
upgini-1.2.81a3832.
|
76
|
-
upgini-1.2.81a3832.
|
73
|
+
upgini-1.2.81a3832.dev6.dist-info/METADATA,sha256=WjpXtnU3FUqspcRA2Zl-5iMqo5fqT2xIhHPJXFPcPN4,49172
|
74
|
+
upgini-1.2.81a3832.dev6.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
|
75
|
+
upgini-1.2.81a3832.dev6.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
|
76
|
+
upgini-1.2.81a3832.dev6.dist-info/RECORD,,
|
File without changes
|
File without changes
|