upgini 1.2.24a1__py3-none-any.whl → 1.2.25__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
upgini/__about__.py CHANGED
@@ -1 +1 @@
1
- __version__ = "1.2.24a1"
1
+ __version__ = "1.2.25"
@@ -3194,9 +3194,8 @@ class FeaturesEnricher(TransformerMixin):
3194
3194
 
3195
3195
  return df
3196
3196
 
3197
- @staticmethod
3198
3197
  def _add_current_date_as_key(
3199
- df: pd.DataFrame, search_keys: Dict[str, SearchKey], logger: logging.Logger, bundle: ResourceBundle
3198
+ self, df: pd.DataFrame, search_keys: Dict[str, SearchKey], logger: logging.Logger, bundle: ResourceBundle
3200
3199
  ) -> pd.DataFrame:
3201
3200
  if (
3202
3201
  set(search_keys.values()) == {SearchKey.PHONE}
@@ -3204,9 +3203,7 @@ class FeaturesEnricher(TransformerMixin):
3204
3203
  or set(search_keys.values()) == {SearchKey.HEM}
3205
3204
  or set(search_keys.values()) == {SearchKey.COUNTRY, SearchKey.POSTAL_CODE}
3206
3205
  ):
3207
- msg = bundle.get("current_date_added")
3208
- print(msg)
3209
- logger.warning(msg)
3206
+ self.__log_warning(bundle.get("current_date_added"))
3210
3207
  df[FeaturesEnricher.CURRENT_DATE] = datetime.date.today()
3211
3208
  search_keys[FeaturesEnricher.CURRENT_DATE] = SearchKey.DATE
3212
3209
  converter = DateTimeSearchKeyConverter(FeaturesEnricher.CURRENT_DATE)
upgini/metrics.py CHANGED
@@ -437,7 +437,8 @@ class EstimatorWrapper:
437
437
  f"Client cat_feature `{cat_feature}` not found in x columns: {x.columns.to_list()}"
438
438
  )
439
439
  estimator_copy.set_params(
440
- cat_features=[x.columns.get_loc(cat_feature) for cat_feature in cat_features]
440
+ # cat_features=[x.columns.get_loc(cat_feature) for cat_feature in cat_features]
441
+ cat_features=cat_features
441
442
  )
442
443
  estimator = CatBoostWrapper(**kwargs)
443
444
  else:
@@ -9,7 +9,7 @@ search_stopped=Search request stopped
9
9
  polling_search_task=\nRunning search request, search_id={}
10
10
  polling_unregister_information=We'll send email notification once it's completed, just use your personal api_key from profile.upgini.com
11
11
  ads_upload_finish=Thank you for your submission!\nWe'll check your data sharing proposal and get back to you
12
- demo_dataset_info=Demo training dataset detected. Registration for an API key is not required.
12
+ demo_dataset_info=Demo training dataset detected. Registration for an API key is not required.\n
13
13
  transform_usage_info=You use Trial access to Upgini data enrichment. Limit for Trial: {} rows. You have already enriched: {} rows.
14
14
  transform_usage_warning=You are trying to launch enrichment for {} rows, which will exceed the rest limit {}.
15
15
 
Binary file
@@ -24,7 +24,7 @@ def remove_fintech_duplicates(
24
24
  date_format: Optional[str] = None,
25
25
  logger: Optional[Logger] = None,
26
26
  bundle: ResourceBundle = None,
27
- ) -> tuple[pd.DataFrame, Optional[List[str]]]:
27
+ ) -> Tuple[pd.DataFrame, Optional[List[str]]]:
28
28
  # Initial checks for target type and date column
29
29
  bundle = bundle or get_custom_bundle()
30
30
  if logger is None:
@@ -60,7 +60,7 @@ def remove_fintech_duplicates(
60
60
 
61
61
  warning_messages = []
62
62
 
63
- def process_df(segment_df: pd.DataFrame, eval_index=0) -> tuple[pd.DataFrame, Optional[str]]:
63
+ def process_df(segment_df: pd.DataFrame, eval_index=0) -> Tuple[pd.DataFrame, Optional[str]]:
64
64
  """Process a subset of the dataset to remove duplicates based on personal keys."""
65
65
  # Fast check for duplicates based on personal keys
66
66
  if not segment_df[personal_cols].duplicated().any():
@@ -4,7 +4,7 @@ import textwrap
4
4
  import urllib.parse
5
5
  import uuid
6
6
  from datetime import datetime, timezone
7
- from io import BytesIO
7
+ from io import StringIO
8
8
  from typing import Callable, List, Optional
9
9
 
10
10
  import pandas as pd
@@ -150,7 +150,7 @@ def make_html_report(
150
150
  search_id: str,
151
151
  email: Optional[str] = None,
152
152
  search_keys: Optional[List[str]] = None,
153
- ):
153
+ ) -> str:
154
154
  # relevant_features_df = relevant_features_df.copy()
155
155
  # relevant_features_df["Feature name"] = relevant_features_df["Feature name"].apply(
156
156
  # lambda x: "*" + x if x.contains("_autofe_") else x
@@ -161,9 +161,18 @@ def make_html_report(
161
161
  """<button type="button">Request a quote</button></a>"""
162
162
  )
163
163
  relevant_datasources_df.rename(columns={"action": "&nbsp;"}, inplace=True)
164
+
165
+ try:
166
+ from importlib.resources import files
167
+ font_path = files('upgini.utils').joinpath('Roboto-Regular.ttf')
168
+ except Exception:
169
+ from pkg_resources import resource_filename
170
+ font_path = resource_filename('upgini.utils', 'Roboto-Regular.ttf')
171
+
164
172
  return f"""<html>
165
173
  <head>
166
174
  <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
175
+ <meta charset="UTF-8">
167
176
  <style>
168
177
  @page {{
169
178
  size: a4 portrait;
@@ -184,12 +193,14 @@ def make_html_report(
184
193
  }}
185
194
 
186
195
  @font-face {{
187
- font-family: "Alice-Regular";
188
- src: url("/fonts/Alice-Regular.ttf") format("truetype");
196
+ font-family: "Roboto";
197
+ src: url("{font_path}") format("truetype");
189
198
  }}
190
199
 
191
200
  body {{
192
- font-family: "Alice-Regular", Arial, sans-serif;
201
+ font-family: "Roboto", sans-serif;
202
+ font-weight: 400;
203
+ font-style: normal;
193
204
  }}
194
205
 
195
206
  #header_content {{
@@ -305,8 +316,13 @@ def show_button_download_pdf(
305
316
  from IPython.display import HTML, display
306
317
 
307
318
  file_name = f"upgini-report-{uuid.uuid4()}.pdf"
319
+
320
+ # from weasyprint import HTML
321
+
322
+ # html = HTML(string=source)
323
+ # html.write_pdf(file_name)
308
324
  with open(file_name, "wb") as output:
309
- pisa.CreatePDF(src=BytesIO(source.encode("UTF-8")), dest=output)
325
+ pisa.CreatePDF(src=StringIO(source), dest=output, encoding="UTF-8")
310
326
 
311
327
  with open(file_name, "rb") as f:
312
328
  b64 = base64.b64encode(f.read())
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: upgini
3
- Version: 1.2.24a1
3
+ Version: 1.2.25
4
4
  Summary: Intelligent data search & enrichment for Machine Learning
5
5
  Project-URL: Bug Reports, https://github.com/upgini/upgini/issues
6
6
  Project-URL: Homepage, https://upgini.com/
@@ -1,13 +1,13 @@
1
- upgini/__about__.py,sha256=TCYncqb9tQ1P6IPCnFvo0uTWg2scIXJ42Ey9L3G22qo,25
1
+ upgini/__about__.py,sha256=9j001fhDZzLSWs6YCbGQFuK5ERX5EUWz3RKLN8l5JS8,23
2
2
  upgini/__init__.py,sha256=M64LwQTBa-5Jz24Zm2h8rWwlKQQ1J8nP7gGgIciS0WU,589
3
3
  upgini/ads.py,sha256=nvuRxRx5MHDMgPr9SiU-fsqRdFaBv8p4_v1oqiysKpc,2714
4
4
  upgini/dataset.py,sha256=iPFiMJtk4HF1ytw9wCQr8H9RfoOKj_TIo8XYZKWgcMc,31331
5
5
  upgini/errors.py,sha256=2b_Wbo0OYhLUbrZqdLIx5jBnAsiD1Mcenh-VjR4HCTw,950
6
- upgini/features_enricher.py,sha256=rctS3kRWwTJmU5X203t7sUZ_B40XYVBPeXy_0hPw2Ec,193667
6
+ upgini/features_enricher.py,sha256=V1zzUHYzmhdouZVdoFBbMH1OIqUCqV1p0mIXfcTvj6Y,193614
7
7
  upgini/http.py,sha256=21asexflvavydzCOONJDGQBtQanCElrbnqLXakJ9Cu8,42880
8
8
  upgini/lazy_import.py,sha256=74gQ8JuA48BGRLxAo7lNHNKY2D2emMxrUxKGdxVGhuY,1012
9
9
  upgini/metadata.py,sha256=osmzdNESeh7yP3BZday6N9Q3eaIHfzhhRM1d6NSgcf0,11223
10
- upgini/metrics.py,sha256=SRTtQyrX_yIlOO1wSfq8R1FFTVABnl0FOYKqlmQEZGo,34536
10
+ upgini/metrics.py,sha256=PoY1fq6XYAHNzn-rmnwRQZjCoVYP5bJNmKhR0ST2Txk,34588
11
11
  upgini/search_task.py,sha256=qxUxAD-bed-FpZYmTB_4orW7YJsW_O6a1TcgnZIRFr4,17307
12
12
  upgini/spinner.py,sha256=4iMd-eIe_BnkqFEMIliULTbj6rNI2HkN_VJ4qYe0cUc,1118
13
13
  upgini/version_validator.py,sha256=h1GViOWzULy5vf6M4dpTJuIk-4V38UCrTY1sb9yLa5I,1594
@@ -30,12 +30,13 @@ upgini/normalizer/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU
30
30
  upgini/normalizer/normalize_utils.py,sha256=Ft2MwSgVoBilXAORAOYAuwPD79GOLfwn4qQE3IUFzzg,7218
31
31
  upgini/resource_bundle/__init__.py,sha256=S5F2G47pnJd2LDpmFsjDqEwiKkP8Hm-hcseDbMka6Ko,8345
32
32
  upgini/resource_bundle/exceptions.py,sha256=5fRvx0_vWdE1-7HcSgF0tckB4A9AKyf5RiinZkInTsI,621
33
- upgini/resource_bundle/strings.properties,sha256=ikL5KvPcJz9fGyVK-xOvvo6LyRfeOey8xXjoq5nnWqU,26667
33
+ upgini/resource_bundle/strings.properties,sha256=l3yg9H17NwCwvfZQyOYTvXbPP6mwdXH_CGlqyxOQVFY,26669
34
34
  upgini/resource_bundle/strings_widget.properties,sha256=gOdqvZWntP2LCza_tyVk1_yRYcG4c04K9sQOAVhF_gw,1577
35
35
  upgini/sampler/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
36
36
  upgini/sampler/base.py,sha256=7GpjYqjOp58vYcJLiX__1R5wjUlyQbxvHJ2klFnup_M,6389
37
37
  upgini/sampler/random_under_sampler.py,sha256=TIbm7ATo-bCMF-IiS5sZeDC1ad1SYg0eY_rRmg84yIQ,4024
38
38
  upgini/sampler/utils.py,sha256=PYOk3kKSnFlyxcpdtDNLBEEhTB4lO_iP7pQHqeUcmAc,20211
39
+ upgini/utils/Roboto-Regular.ttf,sha256=kqYnZjMRQMpbyLulIChCLSdgYa1XF8GsUIoRi2Gcauw,168260
39
40
  upgini/utils/__init__.py,sha256=O_KgzKiJjW3g4NoqZ7lAxUpoHcBi_gze6r3ndEjCH74,842
40
41
  upgini/utils/base_search_key_detector.py,sha256=Inc6iGG-VXQdejWFfbekIkZk2ahC4k7CdGqzOkie6Bs,1021
41
42
  upgini/utils/blocked_time_series.py,sha256=Uqr3vp4YqNclj2-PzEYqVy763GSXHn86sbpIl1UOB4s,3382
@@ -43,8 +44,8 @@ upgini/utils/country_utils.py,sha256=lY-eXWwFVegdVENFttbvLcgGDjFO17Sex8hd2PyJaRk
43
44
  upgini/utils/custom_loss_utils.py,sha256=kieNZYBYZm5ZGBltF1F_jOSF4ea6C29rYuCyiDcqVNY,3857
44
45
  upgini/utils/cv_utils.py,sha256=w6FQb9nO8BWDx88EF83NpjPLarK4eR4ia0Wg0kLBJC4,3525
45
46
  upgini/utils/datetime_utils.py,sha256=a8X4jX2y3-6E7ZNZIG5z61qfzCvsvaNEjR1Bi5KUqfM,11279
46
- upgini/utils/deduplicate_utils.py,sha256=kINO1KoH8kPRA3JSYogzv4jaUP1Ceguv5etBPtLcsSw,8855
47
- upgini/utils/display_utils.py,sha256=NGhki1aGMsS8OeI69eLXEpmS_s41k8ojKHQxacJaXiU,11493
47
+ upgini/utils/deduplicate_utils.py,sha256=SMZx9IKIhWI5HqXepfKiQb3uDJrogQZtG6jcWuMo5Z4,8855
48
+ upgini/utils/display_utils.py,sha256=DsBjJ8jEYAh8BPgfAbzq5imoGFV6IACP20PQ78BQCX0,11964
48
49
  upgini/utils/email_utils.py,sha256=j0Ug1R_0AnCg1Y92zIZ4XMwvKo3G5_pcOlBN1OH_gZs,5191
49
50
  upgini/utils/fallback_progress_bar.py,sha256=PDaKb8dYpVZaWMroNcOHsTc3pSjgi9mOm0--cOFTwJ0,1074
50
51
  upgini/utils/features_validator.py,sha256=1Xj2ir5LzzYiX3NH8o88c2J6RTTetaTwu0MhjLTyuvM,3378
@@ -57,7 +58,7 @@ upgini/utils/sklearn_ext.py,sha256=13jQS_k7v0aUtudXV6nGUEWjttPQzAW9AFYL5wgEz9k,4
57
58
  upgini/utils/target_utils.py,sha256=qHzZRmICFbLNCrmVqGkaBcjm91L2ERRZMppci36acV4,10085
58
59
  upgini/utils/track_info.py,sha256=G5Lu1xxakg2_TQjKZk4b5SvrHsATTXNVV3NbvWtT8k8,5663
59
60
  upgini/utils/warning_counter.py,sha256=-GRY8EUggEBKODPSuXAkHn9KnEQwAORC0mmz_tim-PM,254
60
- upgini-1.2.24a1.dist-info/METADATA,sha256=EUaoN3ViaM5Al2JbZCmR_pZAjslGQQy4Q5IngbvOD3Q,48580
61
- upgini-1.2.24a1.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
62
- upgini-1.2.24a1.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
63
- upgini-1.2.24a1.dist-info/RECORD,,
61
+ upgini-1.2.25.dist-info/METADATA,sha256=EeYzVMje4sNfeZjoeYf5mF7Y-O8ig75asLBQFWnDI_E,48578
62
+ upgini-1.2.25.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
63
+ upgini-1.2.25.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
64
+ upgini-1.2.25.dist-info/RECORD,,