upgini 1.2.80__py3-none-any.whl → 1.2.81__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- upgini/__about__.py +1 -1
- upgini/autofe/binary.py +2 -2
- upgini/autofe/timeseries/volatility.py +6 -4
- upgini/features_enricher.py +155 -91
- upgini/http.py +21 -21
- upgini/mdc/__init__.py +1 -1
- upgini/metadata.py +1 -1
- upgini/metrics.py +289 -228
- upgini/resource_bundle/strings.properties +1 -1
- upgini/search_task.py +1 -0
- upgini/utils/display_utils.py +12 -7
- upgini/utils/target_utils.py +9 -6
- {upgini-1.2.80.dist-info → upgini-1.2.81.dist-info}/METADATA +3 -1
- {upgini-1.2.80.dist-info → upgini-1.2.81.dist-info}/RECORD +16 -16
- {upgini-1.2.80.dist-info → upgini-1.2.81.dist-info}/WHEEL +0 -0
- {upgini-1.2.80.dist-info → upgini-1.2.81.dist-info}/licenses/LICENSE +0 -0
@@ -140,7 +140,7 @@ baseline_score_column_not_exists=baseline_score_column {} doesn't exist in input
|
|
140
140
|
baseline_score_column_has_na=baseline_score_column contains NaN. Clear it and and retry
|
141
141
|
missing_features_for_transform=Missing some features for transform that were presented on fit: {}
|
142
142
|
missing_target_for_transform=Search contains features on target. Please add y to the call and try again
|
143
|
-
missing_id_column=Id column {} not found in X
|
143
|
+
missing_id_column=Id column {} not found in X: {}
|
144
144
|
# target validation
|
145
145
|
empty_target=Target is empty in all rows
|
146
146
|
# non_numeric_target=Binary target should be numerical type
|
upgini/search_task.py
CHANGED
upgini/utils/display_utils.py
CHANGED
@@ -92,9 +92,9 @@ def display_html_dataframe(
|
|
92
92
|
if table_tsv is not None:
|
93
93
|
copy_and_share = f"""
|
94
94
|
<div style="text-align: right">
|
95
|
-
<button onclick=navigator.clipboard.writeText(decodeURI('{table_tsv}'))>\
|
95
|
+
<button onclick=navigator.clipboard.writeText(decodeURI('{table_tsv}'))>\U0001f4c2 Copy</button>
|
96
96
|
<a href='mailto:<Share with...>?subject={email_subject}&body={table_tsv}'>
|
97
|
-
<button>\
|
97
|
+
<button>\U0001f4e8 Share</button>
|
98
98
|
</a>
|
99
99
|
</div>"""
|
100
100
|
else:
|
@@ -112,6 +112,7 @@ def display_html_dataframe(
|
|
112
112
|
|
113
113
|
.upgini-df tbody td {{
|
114
114
|
padding: 0.5em;
|
115
|
+
color: black;
|
115
116
|
}}
|
116
117
|
|
117
118
|
.upgini-df tbody tr:nth-child(odd) {{
|
@@ -164,10 +165,12 @@ def make_html_report(
|
|
164
165
|
|
165
166
|
try:
|
166
167
|
from importlib.resources import files
|
167
|
-
|
168
|
+
|
169
|
+
font_path = files("upgini.utils").joinpath("Roboto-Regular.ttf")
|
168
170
|
except Exception:
|
169
171
|
from pkg_resources import resource_filename
|
170
|
-
|
172
|
+
|
173
|
+
font_path = resource_filename("upgini.utils", "Roboto-Regular.ttf")
|
171
174
|
|
172
175
|
return f"""<html>
|
173
176
|
<head>
|
@@ -274,8 +277,10 @@ def make_html_report(
|
|
274
277
|
if metrics_df is not None
|
275
278
|
else ""
|
276
279
|
}
|
277
|
-
<h3>Relevant data sources</h3>
|
278
|
-
|
280
|
+
{"<h3>Relevant data sources</h3>" + make_table(relevant_datasources_df)
|
281
|
+
if len(relevant_datasources_df) > 0
|
282
|
+
else ""
|
283
|
+
}
|
279
284
|
<h3>All relevant features. Listing ({len(relevant_features_df)} items)</h3>
|
280
285
|
{make_table(relevant_features_df, wrap_long_string=25)}
|
281
286
|
{"<h3>Description of AutoFE feature names</h3>" + make_table(autofe_descriptions_df, wrap_long_string=25)
|
@@ -311,7 +316,7 @@ def prepare_and_show_report(
|
|
311
316
|
|
312
317
|
|
313
318
|
def show_button_download_pdf(
|
314
|
-
source: str, title="\
|
319
|
+
source: str, title="\U0001f4ca Download PDF report", display_id: Optional[str] = None, display_handle=None
|
315
320
|
):
|
316
321
|
from IPython.display import HTML, display
|
317
322
|
|
upgini/utils/target_utils.py
CHANGED
@@ -3,7 +3,7 @@ from typing import Callable, List, Optional, Union
|
|
3
3
|
|
4
4
|
import numpy as np
|
5
5
|
import pandas as pd
|
6
|
-
from pandas.api.types import
|
6
|
+
from pandas.api.types import is_bool_dtype, is_datetime64_any_dtype, is_numeric_dtype
|
7
7
|
|
8
8
|
from upgini.errors import ValidationError
|
9
9
|
from upgini.metadata import SYSTEM_RECORD_ID, CVType, ModelTaskType
|
@@ -14,11 +14,14 @@ from upgini.utils.ts_utils import get_most_frequent_time_unit, trunc_datetime
|
|
14
14
|
TS_MIN_DIFFERENT_IDS_RATIO = 0.2
|
15
15
|
|
16
16
|
|
17
|
-
def
|
18
|
-
if
|
19
|
-
|
20
|
-
|
21
|
-
|
17
|
+
def prepare_target(y: Union[pd.Series, np.ndarray], target_type: ModelTaskType) -> Union[pd.Series, np.ndarray]:
|
18
|
+
if target_type != ModelTaskType.REGRESSION or (not is_numeric_dtype(y) and not is_datetime64_any_dtype(y)):
|
19
|
+
if isinstance(y, pd.Series):
|
20
|
+
y = y.astype(str).astype("category").cat.codes
|
21
|
+
elif isinstance(y, np.ndarray):
|
22
|
+
y = pd.Series(y).astype(str).astype("category").cat.codes.values
|
23
|
+
|
24
|
+
return y
|
22
25
|
|
23
26
|
|
24
27
|
def define_task(
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.3
|
2
2
|
Name: upgini
|
3
|
-
Version: 1.2.
|
3
|
+
Version: 1.2.81
|
4
4
|
Summary: Intelligent data search & enrichment for Machine Learning
|
5
5
|
Project-URL: Bug Reports, https://github.com/upgini/upgini/issues
|
6
6
|
Project-URL: Homepage, https://upgini.com/
|
@@ -22,6 +22,8 @@ Classifier: Programming Language :: Python :: 3.11
|
|
22
22
|
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
23
23
|
Classifier: Topic :: Scientific/Engineering :: Information Analysis
|
24
24
|
Requires-Python: <3.12,>=3.10
|
25
|
+
Requires-Dist: catboost>=1.0.3
|
26
|
+
Requires-Dist: category-encoders>=2.8.1
|
25
27
|
Requires-Dist: fastparquet>=0.8.1
|
26
28
|
Requires-Dist: ipywidgets>=8.1.0
|
27
29
|
Requires-Dist: jarowinkler>=2.0.0
|
@@ -1,20 +1,20 @@
|
|
1
|
-
upgini/__about__.py,sha256=
|
1
|
+
upgini/__about__.py,sha256=Yiy3WTZ3aZz8a-WWP0GOQrsX6GQJWYTfOAenVKHKVFA,23
|
2
2
|
upgini/__init__.py,sha256=LXSfTNU0HnlOkE69VCxkgIKDhWP-JFo_eBQ71OxTr5Y,261
|
3
3
|
upgini/ads.py,sha256=nvuRxRx5MHDMgPr9SiU-fsqRdFaBv8p4_v1oqiysKpc,2714
|
4
4
|
upgini/dataset.py,sha256=aspri7ZAgwkNNUiIgQ1GRXvw8XQii3F4RfNXSrF4wrw,35365
|
5
5
|
upgini/errors.py,sha256=2b_Wbo0OYhLUbrZqdLIx5jBnAsiD1Mcenh-VjR4HCTw,950
|
6
|
-
upgini/features_enricher.py,sha256=
|
7
|
-
upgini/http.py,sha256=
|
8
|
-
upgini/metadata.py,sha256=
|
9
|
-
upgini/metrics.py,sha256=
|
10
|
-
upgini/search_task.py,sha256=
|
6
|
+
upgini/features_enricher.py,sha256=cWbEA2lOt51x62NrLkyxu1G8I4KQo_2aOgqt3Ypyr1M,212819
|
7
|
+
upgini/http.py,sha256=AfaJ3c8z_tK2hZFEehNybDKE0mp1tYcyAP_l0_p8bLQ,43933
|
8
|
+
upgini/metadata.py,sha256=zt_9k0iQbWXuiRZcel4ORNPdQKt6Ou69ucZD_E1Q46o,12341
|
9
|
+
upgini/metrics.py,sha256=3cip0_L6-OFew74KsRwzxJDU6UFq05h2v7IsyHLcMRc,43164
|
10
|
+
upgini/search_task.py,sha256=Q5HjBpLIB3OCxAD1zNv5yQ3ZNJx696WCK_-H35_y7Rs,17912
|
11
11
|
upgini/spinner.py,sha256=4iMd-eIe_BnkqFEMIliULTbj6rNI2HkN_VJ4qYe0cUc,1118
|
12
12
|
upgini/version_validator.py,sha256=DvbaAvuYFoJqYt0fitpsk6Xcv-H1BYDJYHUMxaKSH_Y,1509
|
13
13
|
upgini/ads_management/__init__.py,sha256=qzyisOToVRP-tquAJD1PblZhNtMrOB8FiyF9JvfkvgE,50
|
14
14
|
upgini/ads_management/ads_manager.py,sha256=igVbN2jz80Umb2BUJixmJVj-zx8unoKpecVo-R-nGdw,2648
|
15
15
|
upgini/autofe/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
16
16
|
upgini/autofe/all_operators.py,sha256=rdjF5eaE4bC6Q4eu_el5Z7ekYt8DjOFermz2bePPbUc,333
|
17
|
-
upgini/autofe/binary.py,sha256=
|
17
|
+
upgini/autofe/binary.py,sha256=oOEECc4nRzZN2tYaiqx8F2XHnfWpk1bVvb7ZkZJ0lO8,7709
|
18
18
|
upgini/autofe/date.py,sha256=MM1S-6imNSzCDOhbNnmsc_bwSqUWBcS8vWAdHF8j1kY,11134
|
19
19
|
upgini/autofe/feature.py,sha256=G_YgnsauIoaMgByx9JXDPiKc4nqs0pwWZUfvoIGMKxY,15305
|
20
20
|
upgini/autofe/groupby.py,sha256=IYmQV9uoCdRcpkeWZj_kI3ObzoNCNx3ff3h8sTL01tk,3603
|
@@ -29,16 +29,16 @@ upgini/autofe/timeseries/delta.py,sha256=h0YhmI1TlPJnjwFpN_GQxLb6r59DQuucnG5tQAX
|
|
29
29
|
upgini/autofe/timeseries/lag.py,sha256=LfQtg484vuqM0mgY4Wft1swHX_Srq7OKKgZswCXoiXI,1882
|
30
30
|
upgini/autofe/timeseries/roll.py,sha256=zADKXU-eYWQnQ5R3am1yEal8uU6Tm0jLAixwPb_aCHg,2794
|
31
31
|
upgini/autofe/timeseries/trend.py,sha256=K1_iw2ko_LIUU8YCUgrvN3n0MkHtsi7-63-8x9er1k4,2129
|
32
|
-
upgini/autofe/timeseries/volatility.py,sha256=
|
32
|
+
upgini/autofe/timeseries/volatility.py,sha256=SvZfhM_ZAWCNpTf87WjSnZsnlblARgruDlu4By4Zvhc,8078
|
33
33
|
upgini/data_source/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
34
34
|
upgini/data_source/data_source_publisher.py,sha256=4S9qwlAklD8vg9tUU_c1pHE2_glUHAh15-wr5hMwKFw,22879
|
35
|
-
upgini/mdc/__init__.py,sha256=
|
35
|
+
upgini/mdc/__init__.py,sha256=iHJlXQg6xRM1-ZOUtaPSJqw5SpQDszvxp4LyqviNLIQ,1027
|
36
36
|
upgini/mdc/context.py,sha256=3u1B-jXt7tXEvNcV3qmR9SDCseudnY7KYsLclBdwVLk,1405
|
37
37
|
upgini/normalizer/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
38
38
|
upgini/normalizer/normalize_utils.py,sha256=Ft2MwSgVoBilXAORAOYAuwPD79GOLfwn4qQE3IUFzzg,7218
|
39
39
|
upgini/resource_bundle/__init__.py,sha256=S5F2G47pnJd2LDpmFsjDqEwiKkP8Hm-hcseDbMka6Ko,8345
|
40
40
|
upgini/resource_bundle/exceptions.py,sha256=5fRvx0_vWdE1-7HcSgF0tckB4A9AKyf5RiinZkInTsI,621
|
41
|
-
upgini/resource_bundle/strings.properties,sha256=
|
41
|
+
upgini/resource_bundle/strings.properties,sha256=GmkTgxowpykuuviubVH5cMF_lNFQJEqfRoBJaj3c72E,27957
|
42
42
|
upgini/resource_bundle/strings_widget.properties,sha256=gOdqvZWntP2LCza_tyVk1_yRYcG4c04K9sQOAVhF_gw,1577
|
43
43
|
upgini/sampler/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
44
44
|
upgini/sampler/base.py,sha256=7GpjYqjOp58vYcJLiX__1R5wjUlyQbxvHJ2klFnup_M,6389
|
@@ -53,7 +53,7 @@ upgini/utils/custom_loss_utils.py,sha256=kieNZYBYZm5ZGBltF1F_jOSF4ea6C29rYuCyiDc
|
|
53
53
|
upgini/utils/cv_utils.py,sha256=w6FQb9nO8BWDx88EF83NpjPLarK4eR4ia0Wg0kLBJC4,3525
|
54
54
|
upgini/utils/datetime_utils.py,sha256=_jq-kn_dGNFfs-DGXcWCGzy9bkplfAjrZ8SsmN28zXc,13535
|
55
55
|
upgini/utils/deduplicate_utils.py,sha256=AcMLoObMjhOTQ_fMS1LWy0GKp6WXnZ-FNux_8V3nbZU,8914
|
56
|
-
upgini/utils/display_utils.py,sha256=
|
56
|
+
upgini/utils/display_utils.py,sha256=hAeWEcJtPDg8fAVcMNrNB-azFD2WJp1nvbPAhR7SeP4,12071
|
57
57
|
upgini/utils/email_utils.py,sha256=pZ2vCfNxLIPUhxr0-OlABNXm12jjU44isBk8kGmqQzA,5277
|
58
58
|
upgini/utils/fallback_progress_bar.py,sha256=PDaKb8dYpVZaWMroNcOHsTc3pSjgi9mOm0--cOFTwJ0,1074
|
59
59
|
upgini/utils/feature_info.py,sha256=Q9HN6A-fvfVD-irFWrmOqqZG9RsUSvh5MTY_k0xu-tE,7287
|
@@ -66,11 +66,11 @@ upgini/utils/postal_code_utils.py,sha256=5M0sUqH2DAr33kARWCTXR-ACyzWbjDq_-0mmEml
|
|
66
66
|
upgini/utils/progress_bar.py,sha256=N-Sfdah2Hg8lXP_fV9EfUTXz_PyRt4lo9fAHoUDOoLc,1550
|
67
67
|
upgini/utils/sklearn_ext.py,sha256=HpaNQaKJisgNE7IZ71n7uswxTj7kbPglU2G3s1sORAc,45042
|
68
68
|
upgini/utils/sort.py,sha256=8uuHs2nfSMVnz8GgvbOmgMB1PgEIZP1uhmeRFxcwnYw,7039
|
69
|
-
upgini/utils/target_utils.py,sha256=
|
69
|
+
upgini/utils/target_utils.py,sha256=LRN840dzx78-wg7ftdxAkp2c1eu8-JDvkACiRThm4HE,16832
|
70
70
|
upgini/utils/track_info.py,sha256=G5Lu1xxakg2_TQjKZk4b5SvrHsATTXNVV3NbvWtT8k8,5663
|
71
71
|
upgini/utils/ts_utils.py,sha256=26vhC0pN7vLXK6R09EEkMK3Lwb9IVPH7LRdqFIQ3kPs,1383
|
72
72
|
upgini/utils/warning_counter.py,sha256=-GRY8EUggEBKODPSuXAkHn9KnEQwAORC0mmz_tim-PM,254
|
73
|
-
upgini-1.2.
|
74
|
-
upgini-1.2.
|
75
|
-
upgini-1.2.
|
76
|
-
upgini-1.2.
|
73
|
+
upgini-1.2.81.dist-info/METADATA,sha256=hmVcLcbz9e9wFcV2LkMYnocAlnwAiy6VQKUMxn5IUyE,49162
|
74
|
+
upgini-1.2.81.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
|
75
|
+
upgini-1.2.81.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
|
76
|
+
upgini-1.2.81.dist-info/RECORD,,
|
File without changes
|
File without changes
|