upgini 1.1.280.dev1__py3-none-any.whl → 1.1.281__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of upgini might be problematic. Click here for more details.

upgini/__about__.py CHANGED
@@ -1 +1 @@
1
- __version__ = "1.1.280.dev1"
1
+ __version__ = "1.1.281"
@@ -930,6 +930,7 @@ class FeaturesEnricher(TransformerMixin):
930
930
  scoring,
931
931
  groups=groups,
932
932
  text_features=self.generate_features,
933
+ has_date=has_date,
933
934
  )
934
935
  metric = wrapper.metric_name
935
936
  multiplier = wrapper.multiplier
@@ -956,6 +957,7 @@ class FeaturesEnricher(TransformerMixin):
956
957
  add_params=custom_loss_add_params,
957
958
  groups=groups,
958
959
  text_features=self.generate_features,
960
+ has_date=has_date,
959
961
  )
960
962
  etalon_metric = baseline_estimator.cross_val_predict(
961
963
  fitting_X, y_sorted, self.baseline_score_column
@@ -981,6 +983,7 @@ class FeaturesEnricher(TransformerMixin):
981
983
  add_params=custom_loss_add_params,
982
984
  groups=groups,
983
985
  text_features=self.generate_features,
986
+ has_date=has_date,
984
987
  )
985
988
  enriched_metric = enriched_estimator.cross_val_predict(fitting_enriched_X, enriched_y_sorted)
986
989
  self.logger.info(f"Enriched {metric} on train combined features: {enriched_metric}")
@@ -1333,8 +1336,6 @@ class FeaturesEnricher(TransformerMixin):
1333
1336
  excluding_search_keys = list(search_keys.keys())
1334
1337
  if search_keys_for_metrics is not None and len(search_keys_for_metrics) > 0:
1335
1338
  excluding_search_keys = [sk for sk in excluding_search_keys if sk not in search_keys_for_metrics]
1336
- meta = self._search_task.get_all_features_metadata_v2()
1337
- zero_importance_client_features = [m.name for m in meta if m.source == "etalon" and m.shap_value == 0.0]
1338
1339
 
1339
1340
  client_features = [
1340
1341
  c
@@ -1344,7 +1345,6 @@ class FeaturesEnricher(TransformerMixin):
1344
1345
  excluding_search_keys
1345
1346
  + list(self.fit_dropped_features)
1346
1347
  + [DateTimeSearchKeyConverter.DATETIME_COL, SYSTEM_RECORD_ID]
1347
- + zero_importance_client_features
1348
1348
  )
1349
1349
  ]
1350
1350
 
@@ -3720,7 +3720,7 @@ class FeaturesEnricher(TransformerMixin):
3720
3720
  if y is not None:
3721
3721
  with open(f"{tmp_dir}/y.pickle", "wb") as y_file:
3722
3722
  pickle.dump(sample(y, xy_sample_index), y_file)
3723
- if eval_set:
3723
+ if eval_set and _num_samples(eval_set[0][0]) > 0:
3724
3724
  eval_xy_sample_index = rnd.randint(0, _num_samples(eval_set[0][0]), size=1000)
3725
3725
  with open(f"{tmp_dir}/eval_x.pickle", "wb") as eval_x_file:
3726
3726
  pickle.dump(sample(eval_set[0][0], eval_xy_sample_index), eval_x_file)
upgini/metrics.py CHANGED
@@ -314,9 +314,17 @@ class EstimatorWrapper:
314
314
  metrics_by_fold = cv_results["test_score"]
315
315
  self.cv_estimators = cv_results["estimator"]
316
316
 
317
+ self.check_fold_metrics(metrics_by_fold)
318
+
317
319
  metric = np.mean(metrics_by_fold) * self.multiplier
318
320
  return self.post_process_metric(metric)
319
321
 
322
+ def check_fold_metrics(self, metrics_by_fold: List[float]):
323
+ first_metric_sign = 1 if metrics_by_fold[0] >= 0 else -1
324
+ for metric in metrics_by_fold[1:]:
325
+ if first_metric_sign * metric < 0:
326
+ self.logger.warning(f"Sign of metrics differs between folds: {metrics_by_fold}")
327
+
320
328
  def post_process_metric(self, metric: float) -> float:
321
329
  if self.metric_name == "GINI":
322
330
  metric = 2 * metric - 1
@@ -346,6 +354,7 @@ class EstimatorWrapper:
346
354
  text_features: Optional[List[str]] = None,
347
355
  add_params: Optional[Dict[str, Any]] = None,
348
356
  groups: Optional[List[str]] = None,
357
+ has_date: Optional[bool] = None,
349
358
  ) -> EstimatorWrapper:
350
359
  scorer, metric_name, multiplier = _get_scorer(target_type, scoring)
351
360
  kwargs = {
@@ -360,6 +369,7 @@ class EstimatorWrapper:
360
369
  }
361
370
  if estimator is None:
362
371
  params = dict()
372
+ params["has_time"] = has_date
363
373
  # if metric_name.upper() in SUPPORTED_CATBOOST_METRICS:
364
374
  # params["eval_metric"] = SUPPORTED_CATBOOST_METRICS[metric_name.upper()]
365
375
  if target_type == ModelTaskType.MULTICLASS:
@@ -475,7 +485,7 @@ class CatBoostWrapper(EstimatorWrapper):
475
485
 
476
486
  # Find rest categorical features
477
487
  self.cat_features = _get_cat_features(x, self.text_features, embedding_features)
478
- x = fill_na_cat_features(x, self.cat_features)
488
+ # x = fill_na_cat_features(x, self.cat_features)
479
489
  unique_cat_features = []
480
490
  for name in self.cat_features:
481
491
  # Remove constant categorical features
@@ -525,7 +535,7 @@ class CatBoostWrapper(EstimatorWrapper):
525
535
  x, emb_columns = self.group_embeddings(x)
526
536
  params["embedding_features"] = emb_columns
527
537
  if self.cat_features:
528
- x = fill_na_cat_features(x, self.cat_features)
538
+ # x = fill_na_cat_features(x, self.cat_features)
529
539
  params["cat_features"] = self.cat_features
530
540
 
531
541
  return x, y, params
@@ -9,6 +9,7 @@ from typing import Callable, List, Optional
9
9
 
10
10
  import pandas as pd
11
11
  from xhtml2pdf import pisa
12
+ from upgini.__about__ import __version__
12
13
 
13
14
 
14
15
  def ipython_available() -> bool:
@@ -166,12 +167,12 @@ def make_html_report(
166
167
  /*-pdf-frame-border: 1;*/
167
168
  }}
168
169
  @frame content_frame {{
169
- left: 10pt; width: 574pt; top: 50pt; height: 752pt;
170
+ left: 10pt; width: 574pt; top: 50pt; height: 742pt;
170
171
  /*-pdf-frame-border: 1;*/
171
172
  }}
172
173
  @frame footer_frame {{
173
174
  -pdf-frame-content: footer_content;
174
- left: 10pt; width: 574pt; top: 802pt; height: 30pt;
175
+ left: 10pt; width: 574pt; top: 802pt; height: 40pt;
175
176
  /*-pdf-frame-border: 1;*/
176
177
  }}
177
178
  }}
@@ -234,7 +235,8 @@ def make_html_report(
234
235
  <div id="header_content">UPGINI</div>
235
236
  <div id="footer_content">
236
237
  © Upgini</br>
237
- sales@upgini.com
238
+ sales@upgini.com</br>
239
+ Launched by version {__version__}
238
240
  </div>
239
241
 
240
242
  <h1>Data search report</h1>
@@ -257,7 +259,7 @@ def make_html_report(
257
259
  }
258
260
  <h3>Relevant data sources</h3>
259
261
  {make_table(relevant_datasources_df)}
260
- <h3>All relevant features. Listing</h3>
262
+ <h3>All relevant features. Listing ({len(relevant_features_df)} items)</h3>
261
263
  {make_table(relevant_features_df, wrap_long_string=25)}
262
264
  {"<h3>Description of AutoFE feature names</h3>" + make_table(autofe_descriptions_df, wrap_long_string=25)
263
265
  if autofe_descriptions_df is not None
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: upgini
3
- Version: 1.1.280.dev1
3
+ Version: 1.1.281
4
4
  Summary: Intelligent data search & enrichment for Machine Learning
5
5
  Project-URL: Bug Reports, https://github.com/upgini/upgini/issues
6
6
  Project-URL: Homepage, https://upgini.com/
@@ -1,12 +1,12 @@
1
- upgini/__about__.py,sha256=fqBvvVtpVdYqoxP5L8bSpUZgPbCyY2JajG5HQR_3yeI,29
1
+ upgini/__about__.py,sha256=QXQ0qAoJaXwuS49QdPQRJ572R1Vxd94TndilBoFpK_s,24
2
2
  upgini/__init__.py,sha256=asENHgEVHQBIkV-e_0IhE_ZWqkCG6398U3ZLrNzAH6k,407
3
3
  upgini/ads.py,sha256=nvuRxRx5MHDMgPr9SiU-fsqRdFaBv8p4_v1oqiysKpc,2714
4
4
  upgini/dataset.py,sha256=7TLVVhGtjgx_9yaiaIUK3kZSe_R9wg5dY0d4F5qCGM4,45636
5
5
  upgini/errors.py,sha256=2b_Wbo0OYhLUbrZqdLIx5jBnAsiD1Mcenh-VjR4HCTw,950
6
- upgini/features_enricher.py,sha256=Nx5ZgUXvOh6HBuBWZt25kU0fqwG6955X8jSJkTxkq7A,176938
6
+ upgini/features_enricher.py,sha256=J1x1YMoJBWMFWhdvP-_h7X67qEoTcLqkRRMrz8I1XKQ,176885
7
7
  upgini/http.py,sha256=khrYSldpY-HbVLCcApfV1BjBFK6Uyuatb4colKybxgY,42301
8
8
  upgini/metadata.py,sha256=CFJekYGD7Ep7pRFH7wCEcsXS4bz83do33FNmtcCY9P4,9729
9
- upgini/metrics.py,sha256=L4LKSMOK9iKFLaJvTBTKk2tQauMgiJqtfrBclM3fBjs,29670
9
+ upgini/metrics.py,sha256=tTXAgjEuoo_vDe4n-R0AFK95IIx_7kugIJJJv2Hr_1o,30128
10
10
  upgini/search_task.py,sha256=LtRJ9bCPjMo1gJ-sUDKERhDwGcWKImrzwVFHjkMSQHQ,17071
11
11
  upgini/spinner.py,sha256=4iMd-eIe_BnkqFEMIliULTbj6rNI2HkN_VJ4qYe0cUc,1118
12
12
  upgini/version_validator.py,sha256=RGg87VweujTNlibgsOuqPLIEiBgIOkuXNVTGuNCD234,1405
@@ -43,7 +43,7 @@ upgini/utils/custom_loss_utils.py,sha256=bLk3uygqkJBaGkyzfO032d72QASae-dDyEURfFe
43
43
  upgini/utils/cv_utils.py,sha256=w6FQb9nO8BWDx88EF83NpjPLarK4eR4ia0Wg0kLBJC4,3525
44
44
  upgini/utils/datetime_utils.py,sha256=-LsDTThsGKsTZ57V1uNiHtLcoTtqktk5tui4WnqggJo,10673
45
45
  upgini/utils/deduplicate_utils.py,sha256=6AbARehUCghJZ4PppFtrej2s3gFRruh41MEm6mzakHs,8607
46
- upgini/utils/display_utils.py,sha256=LKoSwjrE0xgS5_cqVhc2og2CQ1UCZ1nTI2VKboIhoQA,10858
46
+ upgini/utils/display_utils.py,sha256=A2ouB5eiZ-Kyt9ykYxkLQwyoRPrdYeJymwNTiajtFXs,10990
47
47
  upgini/utils/email_utils.py,sha256=PLufTO97Pg9PPsNqB9agcM6M98MIxKUgIgNn2mVwSQ0,3520
48
48
  upgini/utils/fallback_progress_bar.py,sha256=PDaKb8dYpVZaWMroNcOHsTc3pSjgi9mOm0--cOFTwJ0,1074
49
49
  upgini/utils/features_validator.py,sha256=PgKNt5dyqfErTvjtRNNUS9g7GFqHBtAtnsfA-V5UO1A,3307
@@ -56,7 +56,7 @@ upgini/utils/sklearn_ext.py,sha256=c23MGSUVfxLnaDWKAxavHgnOtm5dGKkF3YswdWQcFzs,4
56
56
  upgini/utils/target_utils.py,sha256=Y96_PJ5cC-WsEbeqg20v9uqywDQobLoTb-xoP7S3o4E,7807
57
57
  upgini/utils/track_info.py,sha256=G5Lu1xxakg2_TQjKZk4b5SvrHsATTXNVV3NbvWtT8k8,5663
58
58
  upgini/utils/warning_counter.py,sha256=dIWBB4dI5XRRJZudvIlqlIYKEiwLLPcXarsZuYRt338,227
59
- upgini-1.1.280.dev1.dist-info/METADATA,sha256=PHXQUHQajqAckQJZe8k-vPW5Eq51hEzSfpUKOnctguc,48123
60
- upgini-1.1.280.dev1.dist-info/WHEEL,sha256=zEMcRr9Kr03x1ozGwg5v9NQBKn3kndp6LSoSlVg-jhU,87
61
- upgini-1.1.280.dev1.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
62
- upgini-1.1.280.dev1.dist-info/RECORD,,
59
+ upgini-1.1.281.dist-info/METADATA,sha256=l6PW4_vWrlqKTTbhXFb9Qsm9FIYMQH5BrU3Vca5WzKo,48118
60
+ upgini-1.1.281.dist-info/WHEEL,sha256=zEMcRr9Kr03x1ozGwg5v9NQBKn3kndp6LSoSlVg-jhU,87
61
+ upgini-1.1.281.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
62
+ upgini-1.1.281.dist-info/RECORD,,