upgini 1.2.68a3832.dev10__py3-none-any.whl → 1.2.68a3832.dev12__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of upgini might be problematic. Click here for more details.
- upgini/__about__.py +1 -1
- upgini/http.py +9 -4
- upgini/metrics.py +21 -11
- upgini/utils/deduplicate_utils.py +2 -0
- {upgini-1.2.68a3832.dev10.dist-info → upgini-1.2.68a3832.dev12.dist-info}/METADATA +1 -1
- {upgini-1.2.68a3832.dev10.dist-info → upgini-1.2.68a3832.dev12.dist-info}/RECORD +8 -8
- {upgini-1.2.68a3832.dev10.dist-info → upgini-1.2.68a3832.dev12.dist-info}/WHEEL +0 -0
- {upgini-1.2.68a3832.dev10.dist-info → upgini-1.2.68a3832.dev12.dist-info}/licenses/LICENSE +0 -0
upgini/__about__.py
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "1.2.68a3832.
|
|
1
|
+
__version__ = "1.2.68a3832.dev12"
|
upgini/http.py
CHANGED
|
@@ -16,6 +16,7 @@ from typing import Any, Dict, List, Optional, Tuple
|
|
|
16
16
|
from urllib.parse import urljoin
|
|
17
17
|
|
|
18
18
|
import jwt
|
|
19
|
+
|
|
19
20
|
# import pandas as pd
|
|
20
21
|
import requests
|
|
21
22
|
from pydantic import BaseModel
|
|
@@ -342,7 +343,9 @@ class _RestClient:
|
|
|
342
343
|
else:
|
|
343
344
|
return self._syncronized_refresh_access_token()
|
|
344
345
|
|
|
345
|
-
def _with_unauth_retry(
|
|
346
|
+
def _with_unauth_retry(
|
|
347
|
+
self, request, try_number: int = 0, need_connection_retry: bool = True, silent: bool = False
|
|
348
|
+
):
|
|
346
349
|
try:
|
|
347
350
|
return request()
|
|
348
351
|
except RequestException as e:
|
|
@@ -373,8 +376,9 @@ class _RestClient:
|
|
|
373
376
|
elif "more than one concurrent search request" in e.message.lower():
|
|
374
377
|
raise ValidationError(bundle.get("concurrent_request"))
|
|
375
378
|
else:
|
|
376
|
-
|
|
377
|
-
|
|
379
|
+
if not silent:
|
|
380
|
+
print(e)
|
|
381
|
+
show_status_error()
|
|
378
382
|
raise e
|
|
379
383
|
|
|
380
384
|
@staticmethod
|
|
@@ -706,6 +710,7 @@ class _RestClient:
|
|
|
706
710
|
silent=True,
|
|
707
711
|
),
|
|
708
712
|
need_connection_retry=False,
|
|
713
|
+
silent=True,
|
|
709
714
|
)
|
|
710
715
|
except Exception:
|
|
711
716
|
self.send_log_event_unauth(log_event)
|
|
@@ -716,7 +721,7 @@ class _RestClient:
|
|
|
716
721
|
try:
|
|
717
722
|
requests.post(
|
|
718
723
|
url=urljoin(_RestClient.PROD_BACKEND_URL, api_path),
|
|
719
|
-
json=log_event.
|
|
724
|
+
json=log_event.model_dump(exclude_none=True),
|
|
720
725
|
headers=_RestClient._get_base_headers(content_type="application/json"),
|
|
721
726
|
)
|
|
722
727
|
except Exception:
|
upgini/metrics.py
CHANGED
|
@@ -3,6 +3,7 @@ from __future__ import annotations
|
|
|
3
3
|
import inspect
|
|
4
4
|
import logging
|
|
5
5
|
import re
|
|
6
|
+
import warnings
|
|
6
7
|
from collections import defaultdict
|
|
7
8
|
from copy import deepcopy
|
|
8
9
|
from dataclasses import dataclass
|
|
@@ -359,7 +360,7 @@ class EstimatorWrapper:
|
|
|
359
360
|
self.logger.info(f"After preparing data columns: {x.columns.to_list()}")
|
|
360
361
|
return x, y, groups
|
|
361
362
|
|
|
362
|
-
def _remove_empty_target_rows(self, x: pd.DataFrame, y: pd.Series) -> Tuple[pd.DataFrame,
|
|
363
|
+
def _remove_empty_target_rows(self, x: pd.DataFrame, y: pd.Series) -> Tuple[pd.DataFrame, np.ndarray]:
|
|
363
364
|
joined = pd.concat([x, y], axis=1)
|
|
364
365
|
joined = joined[joined[y.name].notna()]
|
|
365
366
|
joined = joined.reset_index(drop=True)
|
|
@@ -413,7 +414,10 @@ class EstimatorWrapper:
|
|
|
413
414
|
for estimator, split in zip(self.cv_estimators, splits):
|
|
414
415
|
_, validation_idx = split
|
|
415
416
|
cv_x = x.iloc[validation_idx]
|
|
416
|
-
|
|
417
|
+
if isinstance(y, pd.Series):
|
|
418
|
+
cv_y = y.iloc[validation_idx]
|
|
419
|
+
else:
|
|
420
|
+
cv_y = y[validation_idx]
|
|
417
421
|
shaps = self.calculate_shap(cv_x, cv_y, estimator)
|
|
418
422
|
if shaps is not None:
|
|
419
423
|
for feature, shap_value in shaps.items():
|
|
@@ -760,29 +764,35 @@ class LightGBMWrapper(EstimatorWrapper):
|
|
|
760
764
|
for feature in self.cat_features:
|
|
761
765
|
x[feature] = x[feature].astype("category").cat.codes
|
|
762
766
|
if not is_numeric_dtype(y_numpy):
|
|
763
|
-
|
|
767
|
+
y_numpy = correct_string_target(y_numpy)
|
|
764
768
|
|
|
765
|
-
return x,
|
|
769
|
+
return x, y_numpy, groups, params
|
|
766
770
|
|
|
767
771
|
def _prepare_to_calculate(self, x: pd.DataFrame, y: pd.Series) -> Tuple[pd.DataFrame, np.ndarray, dict]:
|
|
768
|
-
x,
|
|
772
|
+
x, y_numpy, params = super()._prepare_to_calculate(x, y)
|
|
769
773
|
if self.cat_features is not None:
|
|
770
774
|
x = fill_na_cat_features(x, self.cat_features)
|
|
771
775
|
for feature in self.cat_features:
|
|
772
776
|
x[feature] = x[feature].astype("category").cat.codes
|
|
773
777
|
if not is_numeric_dtype(y):
|
|
774
|
-
|
|
775
|
-
return x,
|
|
778
|
+
y_numpy = correct_string_target(y_numpy)
|
|
779
|
+
return x, y_numpy, params
|
|
776
780
|
|
|
777
781
|
def calculate_shap(self, x: pd.DataFrame, y: pd.Series, estimator) -> Optional[Dict[str, float]]:
|
|
778
782
|
try:
|
|
779
|
-
|
|
780
|
-
|
|
783
|
+
# Suppress specific warning from SHAP for LightGBM binary classifier
|
|
784
|
+
warnings.filterwarnings(
|
|
785
|
+
"ignore",
|
|
786
|
+
message=(
|
|
787
|
+
"LightGBM binary classifier with TreeExplainer shap values output has changed to a list of ndarray"
|
|
788
|
+
),
|
|
789
|
+
)
|
|
790
|
+
from shap import TreeExplainer
|
|
781
791
|
|
|
782
|
-
if not isinstance(estimator, (
|
|
792
|
+
if not isinstance(estimator, (LGBMRegressor, LGBMClassifier)):
|
|
783
793
|
return None
|
|
784
794
|
|
|
785
|
-
explainer =
|
|
795
|
+
explainer = TreeExplainer(estimator)
|
|
786
796
|
|
|
787
797
|
shap_values = explainer.shap_values(x)
|
|
788
798
|
|
|
@@ -74,6 +74,8 @@ def remove_fintech_duplicates(
|
|
|
74
74
|
# Checking for different dates by the same personal keys
|
|
75
75
|
uniques = grouped_by_personal_cols[date_col].nunique()
|
|
76
76
|
total = len(uniques)
|
|
77
|
+
if total == 0:
|
|
78
|
+
return segment_df, None
|
|
77
79
|
diff_dates = len(uniques[uniques > 1])
|
|
78
80
|
if diff_dates / total >= 0.6:
|
|
79
81
|
return segment_df, None
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: upgini
|
|
3
|
-
Version: 1.2.68a3832.
|
|
3
|
+
Version: 1.2.68a3832.dev12
|
|
4
4
|
Summary: Intelligent data search & enrichment for Machine Learning
|
|
5
5
|
Project-URL: Bug Reports, https://github.com/upgini/upgini/issues
|
|
6
6
|
Project-URL: Homepage, https://upgini.com/
|
|
@@ -1,13 +1,13 @@
|
|
|
1
|
-
upgini/__about__.py,sha256=
|
|
1
|
+
upgini/__about__.py,sha256=U3w9ipbCUQQonL603X2mBrHUqIttoTSqcgno2WwRvzk,34
|
|
2
2
|
upgini/__init__.py,sha256=LXSfTNU0HnlOkE69VCxkgIKDhWP-JFo_eBQ71OxTr5Y,261
|
|
3
3
|
upgini/ads.py,sha256=nvuRxRx5MHDMgPr9SiU-fsqRdFaBv8p4_v1oqiysKpc,2714
|
|
4
4
|
upgini/dataset.py,sha256=1rb6BzyuiQFGVCTDmKL2wox3UFRNjtNaIJOwQnZ801A,34956
|
|
5
5
|
upgini/errors.py,sha256=2b_Wbo0OYhLUbrZqdLIx5jBnAsiD1Mcenh-VjR4HCTw,950
|
|
6
6
|
upgini/features_enricher.py,sha256=GXXx14jwf3F26_KrfJ6O40Vcu1hRx5iBjUB_jxy3Xvg,205476
|
|
7
|
-
upgini/http.py,sha256=
|
|
7
|
+
upgini/http.py,sha256=RvzcShpDXssLs6ycGN8xilkKi8ZV9XGUrrk8bwdUzbw,43607
|
|
8
8
|
upgini/lazy_import.py,sha256=74gQ8JuA48BGRLxAo7lNHNKY2D2emMxrUxKGdxVGhuY,1012
|
|
9
9
|
upgini/metadata.py,sha256=Jh6YTaS00m_nbaOY_owvlSyn9zgkErkqu8iTr9ZjKI8,12279
|
|
10
|
-
upgini/metrics.py,sha256=
|
|
10
|
+
upgini/metrics.py,sha256=I0sVJLNp4fiIq7ZFcUdNTxJjFkzStdFuKbnf2niEGjc,38207
|
|
11
11
|
upgini/search_task.py,sha256=qxUxAD-bed-FpZYmTB_4orW7YJsW_O6a1TcgnZIRFr4,17307
|
|
12
12
|
upgini/spinner.py,sha256=4iMd-eIe_BnkqFEMIliULTbj6rNI2HkN_VJ4qYe0cUc,1118
|
|
13
13
|
upgini/version_validator.py,sha256=DvbaAvuYFoJqYt0fitpsk6Xcv-H1BYDJYHUMxaKSH_Y,1509
|
|
@@ -52,7 +52,7 @@ upgini/utils/country_utils.py,sha256=lY-eXWwFVegdVENFttbvLcgGDjFO17Sex8hd2PyJaRk
|
|
|
52
52
|
upgini/utils/custom_loss_utils.py,sha256=kieNZYBYZm5ZGBltF1F_jOSF4ea6C29rYuCyiDcqVNY,3857
|
|
53
53
|
upgini/utils/cv_utils.py,sha256=w6FQb9nO8BWDx88EF83NpjPLarK4eR4ia0Wg0kLBJC4,3525
|
|
54
54
|
upgini/utils/datetime_utils.py,sha256=_jq-kn_dGNFfs-DGXcWCGzy9bkplfAjrZ8SsmN28zXc,13535
|
|
55
|
-
upgini/utils/deduplicate_utils.py,sha256=
|
|
55
|
+
upgini/utils/deduplicate_utils.py,sha256=AcMLoObMjhOTQ_fMS1LWy0GKp6WXnZ-FNux_8V3nbZU,8914
|
|
56
56
|
upgini/utils/display_utils.py,sha256=DsBjJ8jEYAh8BPgfAbzq5imoGFV6IACP20PQ78BQCX0,11964
|
|
57
57
|
upgini/utils/email_utils.py,sha256=pZ2vCfNxLIPUhxr0-OlABNXm12jjU44isBk8kGmqQzA,5277
|
|
58
58
|
upgini/utils/fallback_progress_bar.py,sha256=PDaKb8dYpVZaWMroNcOHsTc3pSjgi9mOm0--cOFTwJ0,1074
|
|
@@ -70,7 +70,7 @@ upgini/utils/target_utils.py,sha256=b1GzO8_gMcwXSZ2v98CY50MJJBzKbWHId_BJGybXfkM,
|
|
|
70
70
|
upgini/utils/track_info.py,sha256=G5Lu1xxakg2_TQjKZk4b5SvrHsATTXNVV3NbvWtT8k8,5663
|
|
71
71
|
upgini/utils/ts_utils.py,sha256=26vhC0pN7vLXK6R09EEkMK3Lwb9IVPH7LRdqFIQ3kPs,1383
|
|
72
72
|
upgini/utils/warning_counter.py,sha256=-GRY8EUggEBKODPSuXAkHn9KnEQwAORC0mmz_tim-PM,254
|
|
73
|
-
upgini-1.2.68a3832.
|
|
74
|
-
upgini-1.2.68a3832.
|
|
75
|
-
upgini-1.2.68a3832.
|
|
76
|
-
upgini-1.2.68a3832.
|
|
73
|
+
upgini-1.2.68a3832.dev12.dist-info/METADATA,sha256=ElzWiZHc8K-GAYoV-4oqiAyyMHin2uzqhXXkZcHrvjE,49150
|
|
74
|
+
upgini-1.2.68a3832.dev12.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
|
|
75
|
+
upgini-1.2.68a3832.dev12.dist-info/licenses/LICENSE,sha256=5RRzgvdJUu3BUDfv4bzVU6FqKgwHlIay63pPCSmSgzw,1514
|
|
76
|
+
upgini-1.2.68a3832.dev12.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|