upgini 1.2.80__py3-none-any.whl → 1.2.81__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -140,7 +140,7 @@ baseline_score_column_not_exists=baseline_score_column {} doesn't exist in input
140
140
  baseline_score_column_has_na=baseline_score_column contains NaN. Clear it and and retry
141
141
  missing_features_for_transform=Missing some features for transform that were presented on fit: {}
142
142
  missing_target_for_transform=Search contains features on target. Please add y to the call and try again
143
- missing_id_column=Id column {} not found in X
143
+ missing_id_column=Id column {} not found in X: {}
144
144
  # target validation
145
145
  empty_target=Target is empty in all rows
146
146
  # non_numeric_target=Binary target should be numerical type
upgini/search_task.py CHANGED
@@ -179,6 +179,7 @@ class SearchTask:
179
179
  for f in meta.generated_features
180
180
  for c in f.base_columns
181
181
  if c.ads_definition_id is None
182
+ and not c.original_name.endswith("_emb") # embeddings already added
182
183
  )
183
184
  return list(features_for_transform)
184
185
 
@@ -92,9 +92,9 @@ def display_html_dataframe(
92
92
  if table_tsv is not None:
93
93
  copy_and_share = f"""
94
94
  <div style="text-align: right">
95
- <button onclick=navigator.clipboard.writeText(decodeURI('{table_tsv}'))>\U0001F4C2 Copy</button>
95
+ <button onclick=navigator.clipboard.writeText(decodeURI('{table_tsv}'))>\U0001f4c2 Copy</button>
96
96
  <a href='mailto:<Share with...>?subject={email_subject}&body={table_tsv}'>
97
- <button>\U0001F4E8 Share</button>
97
+ <button>\U0001f4e8 Share</button>
98
98
  </a>
99
99
  </div>"""
100
100
  else:
@@ -112,6 +112,7 @@ def display_html_dataframe(
112
112
 
113
113
  .upgini-df tbody td {{
114
114
  padding: 0.5em;
115
+ color: black;
115
116
  }}
116
117
 
117
118
  .upgini-df tbody tr:nth-child(odd) {{
@@ -164,10 +165,12 @@ def make_html_report(
164
165
 
165
166
  try:
166
167
  from importlib.resources import files
167
- font_path = files('upgini.utils').joinpath('Roboto-Regular.ttf')
168
+
169
+ font_path = files("upgini.utils").joinpath("Roboto-Regular.ttf")
168
170
  except Exception:
169
171
  from pkg_resources import resource_filename
170
- font_path = resource_filename('upgini.utils', 'Roboto-Regular.ttf')
172
+
173
+ font_path = resource_filename("upgini.utils", "Roboto-Regular.ttf")
171
174
 
172
175
  return f"""<html>
173
176
  <head>
@@ -274,8 +277,10 @@ def make_html_report(
274
277
  if metrics_df is not None
275
278
  else ""
276
279
  }
277
- <h3>Relevant data sources</h3>
278
- {make_table(relevant_datasources_df)}
280
+ {"<h3>Relevant data sources</h3>" + make_table(relevant_datasources_df)
281
+ if len(relevant_datasources_df) > 0
282
+ else ""
283
+ }
279
284
  <h3>All relevant features. Listing ({len(relevant_features_df)} items)</h3>
280
285
  {make_table(relevant_features_df, wrap_long_string=25)}
281
286
  {"<h3>Description of AutoFE feature names</h3>" + make_table(autofe_descriptions_df, wrap_long_string=25)
@@ -311,7 +316,7 @@ def prepare_and_show_report(
311
316
 
312
317
 
313
318
  def show_button_download_pdf(
314
- source: str, title="\U0001F4CA Download PDF report", display_id: Optional[str] = None, display_handle=None
319
+ source: str, title="\U0001f4ca Download PDF report", display_id: Optional[str] = None, display_handle=None
315
320
  ):
316
321
  from IPython.display import HTML, display
317
322
 
@@ -3,7 +3,7 @@ from typing import Callable, List, Optional, Union
3
3
 
4
4
  import numpy as np
5
5
  import pandas as pd
6
- from pandas.api.types import is_numeric_dtype, is_bool_dtype
6
+ from pandas.api.types import is_bool_dtype, is_datetime64_any_dtype, is_numeric_dtype
7
7
 
8
8
  from upgini.errors import ValidationError
9
9
  from upgini.metadata import SYSTEM_RECORD_ID, CVType, ModelTaskType
@@ -14,11 +14,14 @@ from upgini.utils.ts_utils import get_most_frequent_time_unit, trunc_datetime
14
14
  TS_MIN_DIFFERENT_IDS_RATIO = 0.2
15
15
 
16
16
 
17
- def correct_string_target(y: Union[pd.Series, np.ndarray]) -> Union[pd.Series, np.ndarray]:
18
- if isinstance(y, pd.Series):
19
- return y.astype(str).astype("category").cat.codes
20
- elif isinstance(y, np.ndarray):
21
- return pd.Series(y).astype(str).astype("category").cat.codes.values
17
+ def prepare_target(y: Union[pd.Series, np.ndarray], target_type: ModelTaskType) -> Union[pd.Series, np.ndarray]:
18
+ if target_type != ModelTaskType.REGRESSION or (not is_numeric_dtype(y) and not is_datetime64_any_dtype(y)):
19
+ if isinstance(y, pd.Series):
20
+ y = y.astype(str).astype("category").cat.codes
21
+ elif isinstance(y, np.ndarray):
22
+ y = pd.Series(y).astype(str).astype("category").cat.codes.values
23
+
24
+ return y
22
25
 
23
26
 
24
27
  def define_task(
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: upgini
3
- Version: 1.2.80
3
+ Version: 1.2.81
4
4
  Summary: Intelligent data search & enrichment for Machine Learning
5
5
  Project-URL: Bug Reports, https://github.com/upgini/upgini/issues
6
6
  Project-URL: Homepage, https://upgini.com/
@@ -22,6 +22,8 @@ Classifier: Programming Language :: Python :: 3.11
22
22
  Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
23
23
  Classifier: Topic :: Scientific/Engineering :: Information Analysis
24
24
  Requires-Python: <3.12,>=3.10
25
+ Requires-Dist: catboost>=1.0.3
26
+ Requires-Dist: category-encoders>=2.8.1
25
27
  Requires-Dist: fastparquet>=0.8.1
26
28
  Requires-Dist: ipywidgets>=8.1.0
27
29
  Requires-Dist: jarowinkler>=2.0.0
@@ -1,20 +1,20 @@
1
- upgini/__about__.py,sha256=CoguueQtsTfVbd91MeGXrmsF-vGq7K1xnwf9nFL4qz0,23
1
+ upgini/__about__.py,sha256=Yiy3WTZ3aZz8a-WWP0GOQrsX6GQJWYTfOAenVKHKVFA,23
2
2
  upgini/__init__.py,sha256=LXSfTNU0HnlOkE69VCxkgIKDhWP-JFo_eBQ71OxTr5Y,261
3
3
  upgini/ads.py,sha256=nvuRxRx5MHDMgPr9SiU-fsqRdFaBv8p4_v1oqiysKpc,2714
4
4
  upgini/dataset.py,sha256=aspri7ZAgwkNNUiIgQ1GRXvw8XQii3F4RfNXSrF4wrw,35365
5
5
  upgini/errors.py,sha256=2b_Wbo0OYhLUbrZqdLIx5jBnAsiD1Mcenh-VjR4HCTw,950
6
- upgini/features_enricher.py,sha256=MQZ01u-7jR8nSTlsyvMzUt-FvsbsBjds2TvQZG5F4vM,208296
7
- upgini/http.py,sha256=UH7nswcZ221un3O_VW9limCBO5oRsyg1eKUHiVslRPs,43737
8
- upgini/metadata.py,sha256=Yd6iW2f7Wz6vUkg5uvR4xylN16ANnCKVKqAsAkap7p8,12354
9
- upgini/metrics.py,sha256=pv3LELb8QObiaKcUco5YUfM_rP2c7hseK2qtjKmjBGk,39378
10
- upgini/search_task.py,sha256=RcvAE785yksWTsTNWuZFVNlk32jHElMoEna1T_C5N8Q,17823
6
+ upgini/features_enricher.py,sha256=cWbEA2lOt51x62NrLkyxu1G8I4KQo_2aOgqt3Ypyr1M,212819
7
+ upgini/http.py,sha256=AfaJ3c8z_tK2hZFEehNybDKE0mp1tYcyAP_l0_p8bLQ,43933
8
+ upgini/metadata.py,sha256=zt_9k0iQbWXuiRZcel4ORNPdQKt6Ou69ucZD_E1Q46o,12341
9
+ upgini/metrics.py,sha256=3cip0_L6-OFew74KsRwzxJDU6UFq05h2v7IsyHLcMRc,43164
10
+ upgini/search_task.py,sha256=Q5HjBpLIB3OCxAD1zNv5yQ3ZNJx696WCK_-H35_y7Rs,17912
11
11
  upgini/spinner.py,sha256=4iMd-eIe_BnkqFEMIliULTbj6rNI2HkN_VJ4qYe0cUc,1118
12
12
  upgini/version_validator.py,sha256=DvbaAvuYFoJqYt0fitpsk6Xcv-H1BYDJYHUMxaKSH_Y,1509
13
13
  upgini/ads_management/__init__.py,sha256=qzyisOToVRP-tquAJD1PblZhNtMrOB8FiyF9JvfkvgE,50
14
14
  upgini/ads_management/ads_manager.py,sha256=igVbN2jz80Umb2BUJixmJVj-zx8unoKpecVo-R-nGdw,2648
15
15
  upgini/autofe/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
16
16
  upgini/autofe/all_operators.py,sha256=rdjF5eaE4bC6Q4eu_el5Z7ekYt8DjOFermz2bePPbUc,333
17
- upgini/autofe/binary.py,sha256=MnQuFiERpocjCPQUjOljlsq5FE-04GPfwtNjzvfNMyU,7671
17
+ upgini/autofe/binary.py,sha256=oOEECc4nRzZN2tYaiqx8F2XHnfWpk1bVvb7ZkZJ0lO8,7709
18
18
  upgini/autofe/date.py,sha256=MM1S-6imNSzCDOhbNnmsc_bwSqUWBcS8vWAdHF8j1kY,11134
19
19
  upgini/autofe/feature.py,sha256=G_YgnsauIoaMgByx9JXDPiKc4nqs0pwWZUfvoIGMKxY,15305
20
20
  upgini/autofe/groupby.py,sha256=IYmQV9uoCdRcpkeWZj_kI3ObzoNCNx3ff3h8sTL01tk,3603
@@ -29,16 +29,16 @@ upgini/autofe/timeseries/delta.py,sha256=h0YhmI1TlPJnjwFpN_GQxLb6r59DQuucnG5tQAX
29
29
  upgini/autofe/timeseries/lag.py,sha256=LfQtg484vuqM0mgY4Wft1swHX_Srq7OKKgZswCXoiXI,1882
30
30
  upgini/autofe/timeseries/roll.py,sha256=zADKXU-eYWQnQ5R3am1yEal8uU6Tm0jLAixwPb_aCHg,2794
31
31
  upgini/autofe/timeseries/trend.py,sha256=K1_iw2ko_LIUU8YCUgrvN3n0MkHtsi7-63-8x9er1k4,2129
32
- upgini/autofe/timeseries/volatility.py,sha256=9shUmIKjpWTHVYjj80YBsk0XheBJ9uBuLv5NW9Mchnk,7953
32
+ upgini/autofe/timeseries/volatility.py,sha256=SvZfhM_ZAWCNpTf87WjSnZsnlblARgruDlu4By4Zvhc,8078
33
33
  upgini/data_source/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
34
34
  upgini/data_source/data_source_publisher.py,sha256=4S9qwlAklD8vg9tUU_c1pHE2_glUHAh15-wr5hMwKFw,22879
35
- upgini/mdc/__init__.py,sha256=aM08nIWFc2gWdWUa3_IuEnNND0cQPkBGnYpRMnfFN8k,1019
35
+ upgini/mdc/__init__.py,sha256=iHJlXQg6xRM1-ZOUtaPSJqw5SpQDszvxp4LyqviNLIQ,1027
36
36
  upgini/mdc/context.py,sha256=3u1B-jXt7tXEvNcV3qmR9SDCseudnY7KYsLclBdwVLk,1405
37
37
  upgini/normalizer/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
38
38
  upgini/normalizer/normalize_utils.py,sha256=Ft2MwSgVoBilXAORAOYAuwPD79GOLfwn4qQE3IUFzzg,7218
39
39
  upgini/resource_bundle/__init__.py,sha256=S5F2G47pnJd2LDpmFsjDqEwiKkP8Hm-hcseDbMka6Ko,8345
40
40
  upgini/resource_bundle/exceptions.py,sha256=5fRvx0_vWdE1-7HcSgF0tckB4A9AKyf5RiinZkInTsI,621
41
- upgini/resource_bundle/strings.properties,sha256=o9iLgAJSxXm6mkQgtNwBVQKIRVwLF1__Dn9gSXb1kLY,27953
41
+ upgini/resource_bundle/strings.properties,sha256=GmkTgxowpykuuviubVH5cMF_lNFQJEqfRoBJaj3c72E,27957
42
42
  upgini/resource_bundle/strings_widget.properties,sha256=gOdqvZWntP2LCza_tyVk1_yRYcG4c04K9sQOAVhF_gw,1577
43
43
  upgini/sampler/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
44
44
  upgini/sampler/base.py,sha256=7GpjYqjOp58vYcJLiX__1R5wjUlyQbxvHJ2klFnup_M,6389
@@ -53,7 +53,7 @@ upgini/utils/custom_loss_utils.py,sha256=kieNZYBYZm5ZGBltF1F_jOSF4ea6C29rYuCyiDc
53
53
  upgini/utils/cv_utils.py,sha256=w6FQb9nO8BWDx88EF83NpjPLarK4eR4ia0Wg0kLBJC4,3525
54
54
  upgini/utils/datetime_utils.py,sha256=_jq-kn_dGNFfs-DGXcWCGzy9bkplfAjrZ8SsmN28zXc,13535
55
55
  upgini/utils/deduplicate_utils.py,sha256=AcMLoObMjhOTQ_fMS1LWy0GKp6WXnZ-FNux_8V3nbZU,8914
56
- upgini/utils/display_utils.py,sha256=DsBjJ8jEYAh8BPgfAbzq5imoGFV6IACP20PQ78BQCX0,11964
56
+ upgini/utils/display_utils.py,sha256=hAeWEcJtPDg8fAVcMNrNB-azFD2WJp1nvbPAhR7SeP4,12071
57
57
  upgini/utils/email_utils.py,sha256=pZ2vCfNxLIPUhxr0-OlABNXm12jjU44isBk8kGmqQzA,5277
58
58
  upgini/utils/fallback_progress_bar.py,sha256=PDaKb8dYpVZaWMroNcOHsTc3pSjgi9mOm0--cOFTwJ0,1074
59
59
  upgini/utils/feature_info.py,sha256=Q9HN6A-fvfVD-irFWrmOqqZG9RsUSvh5MTY_k0xu-tE,7287
@@ -66,11 +66,11 @@ upgini/utils/postal_code_utils.py,sha256=5M0sUqH2DAr33kARWCTXR-ACyzWbjDq_-0mmEml
66
66
  upgini/utils/progress_bar.py,sha256=N-Sfdah2Hg8lXP_fV9EfUTXz_PyRt4lo9fAHoUDOoLc,1550
67
67
  upgini/utils/sklearn_ext.py,sha256=HpaNQaKJisgNE7IZ71n7uswxTj7kbPglU2G3s1sORAc,45042
68
68
  upgini/utils/sort.py,sha256=8uuHs2nfSMVnz8GgvbOmgMB1PgEIZP1uhmeRFxcwnYw,7039
69
- upgini/utils/target_utils.py,sha256=P0cCVRaakWLydYwFjk3TEaQfr0p0hfsJCvKRD8qcxiE,16650
69
+ upgini/utils/target_utils.py,sha256=LRN840dzx78-wg7ftdxAkp2c1eu8-JDvkACiRThm4HE,16832
70
70
  upgini/utils/track_info.py,sha256=G5Lu1xxakg2_TQjKZk4b5SvrHsATTXNVV3NbvWtT8k8,5663
71
71
  upgini/utils/ts_utils.py,sha256=26vhC0pN7vLXK6R09EEkMK3Lwb9IVPH7LRdqFIQ3kPs,1383
72
72
  upgini/utils/warning_counter.py,sha256=-GRY8EUggEBKODPSuXAkHn9KnEQwAORC0mmz_tim-PM,254
73
- upgini-1.2.80.dist-info/METADATA,sha256=szsz09LH3Kv4SMNG8Ogut33IDG0Tzqln2JsrLiEXPBc,49091
74
- upgini-1.2.80.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
75
- upgini-1.2.80.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
76
- upgini-1.2.80.dist-info/RECORD,,
73
+ upgini-1.2.81.dist-info/METADATA,sha256=hmVcLcbz9e9wFcV2LkMYnocAlnwAiy6VQKUMxn5IUyE,49162
74
+ upgini-1.2.81.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
75
+ upgini-1.2.81.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
76
+ upgini-1.2.81.dist-info/RECORD,,